Add StringPool and BumpStringPool classes

pull/3588/head
Stenzek 2 weeks ago
parent 84806d3055
commit ac4634c289
No known key found for this signature in database

@ -2,8 +2,10 @@
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "common/string_util.h"
#include "common/string_pool.h"
#include <gtest/gtest.h>
#include <tuple>
TEST(StringUtil, Ellipsise)
{
@ -1019,3 +1021,477 @@ TEST(StringUtil, WideStringToUTF8String)
ASSERT_EQ(dest, "Hello");
}
#endif
// ============================================================================
// BumpStringPool Tests
// ============================================================================
class BumpStringPoolTest : public ::testing::Test
{
protected:
BumpStringPool pool;
};
TEST_F(BumpStringPoolTest, InitialState)
{
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
}
TEST_F(BumpStringPoolTest, AddString_ValidString)
{
const std::string_view test_str = "test";
const auto offset = pool.AddString(test_str);
EXPECT_NE(offset, BumpStringPool::InvalidOffset);
EXPECT_FALSE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), test_str.size() + 1); // +1 for null terminator
}
TEST_F(BumpStringPoolTest, AddString_EmptyString)
{
const auto offset = pool.AddString("");
EXPECT_EQ(offset, BumpStringPool::InvalidOffset);
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
}
TEST_F(BumpStringPoolTest, AddString_MultipleStrings)
{
const std::string_view str1 = "first";
const std::string_view str2 = "second";
const std::string_view str3 = "third";
const auto offset1 = pool.AddString(str1);
const auto offset2 = pool.AddString(str2);
const auto offset3 = pool.AddString(str3);
EXPECT_NE(offset1, BumpStringPool::InvalidOffset);
EXPECT_NE(offset2, BumpStringPool::InvalidOffset);
EXPECT_NE(offset3, BumpStringPool::InvalidOffset);
EXPECT_EQ(offset1, 0u);
EXPECT_EQ(offset2, str1.size() + 1);
EXPECT_EQ(offset3, str1.size() + 1 + str2.size() + 1);
const size_t expected_size = str1.size() + str2.size() + str3.size() + 3; // +3 for null terminators
EXPECT_EQ(pool.GetSize(), expected_size);
}
TEST_F(BumpStringPoolTest, AddString_DuplicateStrings)
{
const std::string_view test_str = "duplicate";
const auto offset1 = pool.AddString(test_str);
const auto offset2 = pool.AddString(test_str);
// BumpStringPool does NOT deduplicate
EXPECT_NE(offset1, offset2);
EXPECT_EQ(pool.GetSize(), (test_str.size() + 1) * 2);
}
TEST_F(BumpStringPoolTest, GetString_ValidOffset)
{
const std::string_view test_str = "hello world";
const auto offset = pool.AddString(test_str);
const auto retrieved = pool.GetString(offset);
EXPECT_EQ(retrieved, test_str);
}
TEST_F(BumpStringPoolTest, GetString_InvalidOffset)
{
const auto retrieved = pool.GetString(BumpStringPool::InvalidOffset);
EXPECT_TRUE(retrieved.empty());
}
TEST_F(BumpStringPoolTest, GetString_OutOfBoundsOffset)
{
std::ignore = pool.AddString("test");
const auto retrieved = pool.GetString(9999);
EXPECT_TRUE(retrieved.empty());
}
TEST_F(BumpStringPoolTest, GetString_MultipleStrings)
{
const std::string_view str1 = "alpha";
const std::string_view str2 = "beta";
const std::string_view str3 = "gamma";
const auto offset1 = pool.AddString(str1);
const auto offset2 = pool.AddString(str2);
const auto offset3 = pool.AddString(str3);
EXPECT_EQ(pool.GetString(offset1), str1);
EXPECT_EQ(pool.GetString(offset2), str2);
EXPECT_EQ(pool.GetString(offset3), str3);
}
TEST_F(BumpStringPoolTest, Clear)
{
std::ignore = pool.AddString("test1");
std::ignore = pool.AddString("test2");
std::ignore = pool.AddString("test3");
EXPECT_FALSE(pool.IsEmpty());
EXPECT_GT(pool.GetSize(), 0u);
pool.Clear();
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
}
TEST_F(BumpStringPoolTest, Reserve)
{
pool.Reserve(1024);
// Reserve doesn't change the logical size or empty state
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
// After reservation, adding strings should still work
const auto offset = pool.AddString("test");
EXPECT_NE(offset, BumpStringPool::InvalidOffset);
}
TEST_F(BumpStringPoolTest, AddString_SpecialCharacters)
{
const std::string_view special_str = "Hello\nWorld\t!@#$%^&*()";
const auto offset = pool.AddString(special_str);
EXPECT_NE(offset, BumpStringPool::InvalidOffset);
EXPECT_EQ(pool.GetString(offset), special_str);
}
TEST_F(BumpStringPoolTest, AddString_UnicodeCharacters)
{
const std::string_view unicode_str = "Hello 世界 🌍";
const auto offset = pool.AddString(unicode_str);
EXPECT_NE(offset, BumpStringPool::InvalidOffset);
EXPECT_EQ(pool.GetString(offset), unicode_str);
}
TEST_F(BumpStringPoolTest, AddString_LongString)
{
std::string long_str(10000, 'x');
const auto offset = pool.AddString(long_str);
EXPECT_NE(offset, BumpStringPool::InvalidOffset);
EXPECT_EQ(pool.GetString(offset), long_str);
EXPECT_EQ(pool.GetSize(), long_str.size() + 1);
}
// ============================================================================
// StringPool Tests
// ============================================================================
class StringPoolTest : public ::testing::Test
{
protected:
StringPool pool;
};
TEST_F(StringPoolTest, InitialState)
{
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
EXPECT_EQ(pool.GetCount(), 0u);
}
TEST_F(StringPoolTest, AddString_ValidString)
{
const std::string_view test_str = "test";
const auto offset = pool.AddString(test_str);
EXPECT_NE(offset, StringPool::InvalidOffset);
EXPECT_FALSE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), test_str.size() + 1);
EXPECT_EQ(pool.GetCount(), 1u);
}
TEST_F(StringPoolTest, AddString_EmptyString)
{
const auto offset = pool.AddString("");
EXPECT_EQ(offset, StringPool::InvalidOffset);
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
EXPECT_EQ(pool.GetCount(), 0u);
}
TEST_F(StringPoolTest, AddString_MultipleStrings)
{
const std::string_view str1 = "first";
const std::string_view str2 = "second";
const std::string_view str3 = "third";
const auto offset1 = pool.AddString(str1);
const auto offset2 = pool.AddString(str2);
const auto offset3 = pool.AddString(str3);
EXPECT_NE(offset1, StringPool::InvalidOffset);
EXPECT_NE(offset2, StringPool::InvalidOffset);
EXPECT_NE(offset3, StringPool::InvalidOffset);
EXPECT_EQ(pool.GetCount(), 3u);
const size_t expected_size = str1.size() + str2.size() + str3.size() + 3;
EXPECT_EQ(pool.GetSize(), expected_size);
}
TEST_F(StringPoolTest, AddString_DuplicateStrings)
{
const std::string_view test_str = "duplicate";
const auto offset1 = pool.AddString(test_str);
const auto offset2 = pool.AddString(test_str);
// StringPool DOES deduplicate
EXPECT_EQ(offset1, offset2);
EXPECT_EQ(pool.GetSize(), test_str.size() + 1);
EXPECT_EQ(pool.GetCount(), 1u);
}
TEST_F(StringPoolTest, AddString_MultipleDuplicates)
{
const std::string_view str1 = "test";
const std::string_view str2 = "hello";
const auto offset1_1 = pool.AddString(str1);
const auto offset2_1 = pool.AddString(str2);
const auto offset1_2 = pool.AddString(str1);
const auto offset2_2 = pool.AddString(str2);
const auto offset1_3 = pool.AddString(str1);
EXPECT_EQ(offset1_1, offset1_2);
EXPECT_EQ(offset1_1, offset1_3);
EXPECT_EQ(offset2_1, offset2_2);
EXPECT_NE(offset1_1, offset2_1);
EXPECT_EQ(pool.GetCount(), 2u);
EXPECT_EQ(pool.GetSize(), str1.size() + str2.size() + 2);
}
TEST_F(StringPoolTest, GetString_ValidOffset)
{
const std::string_view test_str = "hello world";
const auto offset = pool.AddString(test_str);
const auto retrieved = pool.GetString(offset);
EXPECT_EQ(retrieved, test_str);
}
TEST_F(StringPoolTest, GetString_InvalidOffset)
{
const auto retrieved = pool.GetString(StringPool::InvalidOffset);
EXPECT_TRUE(retrieved.empty());
}
TEST_F(StringPoolTest, GetString_OutOfBoundsOffset)
{
std::ignore = pool.AddString("test");
const auto retrieved = pool.GetString(9999);
EXPECT_TRUE(retrieved.empty());
}
TEST_F(StringPoolTest, GetString_MultipleStrings)
{
const std::string_view str1 = "alpha";
const std::string_view str2 = "beta";
const std::string_view str3 = "gamma";
const auto offset1 = pool.AddString(str1);
const auto offset2 = pool.AddString(str2);
const auto offset3 = pool.AddString(str3);
EXPECT_EQ(pool.GetString(offset1), str1);
EXPECT_EQ(pool.GetString(offset2), str2);
EXPECT_EQ(pool.GetString(offset3), str3);
}
TEST_F(StringPoolTest, Clear)
{
std::ignore = pool.AddString("test1");
std::ignore = pool.AddString("test2");
std::ignore = pool.AddString("test3");
EXPECT_FALSE(pool.IsEmpty());
EXPECT_GT(pool.GetSize(), 0u);
EXPECT_EQ(pool.GetCount(), 3u);
pool.Clear();
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
EXPECT_EQ(pool.GetCount(), 0u);
}
TEST_F(StringPoolTest, Clear_WithDuplicates)
{
std::ignore = pool.AddString("test");
std::ignore = pool.AddString("test");
std::ignore = pool.AddString("hello");
EXPECT_EQ(pool.GetCount(), 2u);
pool.Clear();
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetCount(), 0u);
}
TEST_F(StringPoolTest, Reserve)
{
pool.Reserve(1024);
// Reserve doesn't change the logical state
EXPECT_TRUE(pool.IsEmpty());
EXPECT_EQ(pool.GetSize(), 0u);
EXPECT_EQ(pool.GetCount(), 0u);
// After reservation, adding strings should still work
const auto offset = pool.AddString("test");
EXPECT_NE(offset, StringPool::InvalidOffset);
}
TEST_F(StringPoolTest, AddString_SpecialCharacters)
{
const std::string_view special_str = "Hello\nWorld\t!@#$%^&*()";
const auto offset = pool.AddString(special_str);
EXPECT_NE(offset, StringPool::InvalidOffset);
EXPECT_EQ(pool.GetString(offset), special_str);
}
TEST_F(StringPoolTest, AddString_UnicodeCharacters)
{
const std::string_view unicode_str = "Hello 世界 🌍";
const auto offset = pool.AddString(unicode_str);
EXPECT_NE(offset, StringPool::InvalidOffset);
EXPECT_EQ(pool.GetString(offset), unicode_str);
}
TEST_F(StringPoolTest, AddString_LongString)
{
std::string long_str(10000, 'x');
const auto offset = pool.AddString(long_str);
EXPECT_NE(offset, StringPool::InvalidOffset);
EXPECT_EQ(pool.GetString(offset), long_str);
EXPECT_EQ(pool.GetSize(), long_str.size() + 1);
EXPECT_EQ(pool.GetCount(), 1u);
}
TEST_F(StringPoolTest, AddString_SimilarStrings)
{
const std::string_view str1 = "test";
const std::string_view str2 = "test1";
const std::string_view str3 = "testing";
const auto offset1 = pool.AddString(str1);
const auto offset2 = pool.AddString(str2);
const auto offset3 = pool.AddString(str3);
EXPECT_NE(offset1, offset2);
EXPECT_NE(offset1, offset3);
EXPECT_NE(offset2, offset3);
EXPECT_EQ(pool.GetCount(), 3u);
EXPECT_EQ(pool.GetString(offset1), str1);
EXPECT_EQ(pool.GetString(offset2), str2);
EXPECT_EQ(pool.GetString(offset3), str3);
}
TEST_F(StringPoolTest, GetCount_TracksUniqueStrings)
{
EXPECT_EQ(pool.GetCount(), 0u);
std::ignore = pool.AddString("unique1");
EXPECT_EQ(pool.GetCount(), 1u);
std::ignore = pool.AddString("unique2");
EXPECT_EQ(pool.GetCount(), 2u);
std::ignore = pool.AddString("unique1"); // Duplicate
EXPECT_EQ(pool.GetCount(), 2u);
std::ignore = pool.AddString("unique3");
EXPECT_EQ(pool.GetCount(), 3u);
}
TEST_F(StringPoolTest, ReuseAfterClear)
{
const std::string_view test_str = "reuse";
const auto offset1 = pool.AddString(test_str);
EXPECT_EQ(offset1, 0u);
EXPECT_EQ(pool.GetCount(), 1u);
pool.Clear();
const auto offset2 = pool.AddString(test_str);
EXPECT_EQ(pool.GetCount(), 1u);
// After clear, new strings start at offset 0 again
EXPECT_EQ(offset2, 0u);
EXPECT_EQ(pool.GetString(offset2), test_str);
}
// ============================================================================
// Comparison Tests: BumpStringPool vs StringPool
// ============================================================================
TEST(StringPoolComparison, DuplicationBehavior)
{
BumpStringPool bump_pool;
StringPool string_pool;
const std::string_view test_str = "duplicate";
const auto bump_offset1 = bump_pool.AddString(test_str);
const auto bump_offset2 = bump_pool.AddString(test_str);
const auto string_offset1 = string_pool.AddString(test_str);
const auto string_offset2 = string_pool.AddString(test_str);
// BumpStringPool creates duplicates
EXPECT_NE(bump_offset1, bump_offset2);
EXPECT_EQ(bump_pool.GetSize(), (test_str.size() + 1) * 2);
// StringPool deduplicates
EXPECT_EQ(string_offset1, string_offset2);
EXPECT_EQ(string_pool.GetSize(), test_str.size() + 1);
}
TEST(StringPoolComparison, MemoryEfficiency)
{
BumpStringPool bump_pool;
StringPool string_pool;
const std::string_view str = "test";
// Add same string 100 times
for (int i = 0; i < 100; ++i)
{
std::ignore = bump_pool.AddString(str);
std::ignore = string_pool.AddString(str);
}
// BumpStringPool stores 100 copies
EXPECT_EQ(bump_pool.GetSize(), (str.size() + 1) * 100);
// StringPool stores only 1 copy
EXPECT_EQ(string_pool.GetSize(), str.size() + 1);
EXPECT_EQ(string_pool.GetCount(), 1u);
}

@ -56,6 +56,8 @@ add_library(common
small_string.h
string_util.cpp
string_util.h
string_pool.cpp
string_pool.h
time_helpers.h
thirdparty/SmallVector.cpp
thirdparty/SmallVector.h

@ -23,6 +23,7 @@
<ClInclude Include="heap_array.h" />
<ClInclude Include="intrin.h" />
<ClInclude Include="layered_settings_interface.h" />
<ClInclude Include="string_pool.h" />
<ClInclude Include="time_helpers.h" />
<ClInclude Include="log.h" />
<ClInclude Include="log_channels.h" />
@ -73,6 +74,7 @@
<ClCompile Include="sha256_digest.cpp" />
<ClCompile Include="small_string.cpp" />
<ClCompile Include="binary_reader_writer.cpp" />
<ClCompile Include="string_pool.cpp" />
<ClCompile Include="string_util.cpp" />
<ClCompile Include="thirdparty\aes.cpp" />
<ClCompile Include="thirdparty\SmallVector.cpp" />

@ -58,6 +58,7 @@
<Filter>thirdparty</Filter>
</ClInclude>
<ClInclude Include="time_helpers.h" />
<ClInclude Include="string_pool.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="small_string.cpp" />
@ -89,6 +90,7 @@
<ClCompile Include="sha256_digest.cpp" />
<ClCompile Include="thirdparty\aes.cpp" />
<ClCompile Include="task_queue.cpp" />
<ClCompile Include="string_pool.cpp" />
</ItemGroup>
<ItemGroup>
<Natvis Include="bitfield.natvis" />

@ -0,0 +1,116 @@
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "string_pool.h"
BumpStringPool::BumpStringPool() = default;
BumpStringPool::~BumpStringPool() = default;
std::string_view BumpStringPool::GetString(Offset offset) const
{
if (offset == InvalidOffset || offset >= m_buffer.size())
return {};
const char* str = m_buffer.data() + offset;
return std::string_view(str);
}
void BumpStringPool::Clear()
{
m_buffer.clear();
}
size_t BumpStringPool::GetSize() const
{
return m_buffer.size();
}
bool BumpStringPool::IsEmpty() const
{
return m_buffer.empty();
}
void BumpStringPool::Reserve(size_t size)
{
m_buffer.reserve(size);
}
BumpStringPool::Offset BumpStringPool::AddString(std::string_view str)
{
if (str.empty())
return InvalidOffset;
const Offset offset = static_cast<Offset>(m_buffer.size());
const size_t required_size = str.size() + 1; // +1 for null terminator
m_buffer.reserve(m_buffer.size() + required_size);
m_buffer.insert(m_buffer.end(), str.begin(), str.end());
m_buffer.push_back('\0');
return offset;
}
std::string_view StringPool::GetString(Offset offset) const
{
if (offset == InvalidOffset || offset >= m_buffer.size())
return {};
const char* str = m_buffer.data() + offset;
return std::string_view(str);
}
void StringPool::Clear()
{
m_buffer.clear();
m_string_map.clear();
}
size_t StringPool::GetSize() const
{
return m_buffer.size();
}
bool StringPool::IsEmpty() const
{
return m_buffer.empty();
}
size_t StringPool::GetCount() const
{
return m_string_map.size();
}
void StringPool::Reserve(size_t size)
{
m_buffer.reserve(size);
}
StringPool::StringPool() = default;
StringPool::~StringPool() = default;
StringPool::Offset StringPool::AddString(std::string_view str)
{
if (str.empty())
return InvalidOffset;
// Check if string already exists
auto it = m_string_map.find(str);
if (it != m_string_map.end())
return it->second;
// Add new string to buffer
const Offset offset = static_cast<Offset>(m_buffer.size());
const size_t required_size = str.size() + 1; // +1 for null terminator
m_buffer.reserve(m_buffer.size() + required_size);
m_buffer.insert(m_buffer.end(), str.begin(), str.end());
m_buffer.push_back('\0');
// Store string_view pointing to buffer in map
std::string_view stored_str(m_buffer.data() + offset, str.size());
m_string_map.emplace(stored_str, offset);
return offset;
}

@ -0,0 +1,79 @@
// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
#include "heterogeneous_containers.h"
#include <cstddef>
#include <cstring>
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>
class BumpStringPool
{
public:
using Offset = size_t;
static constexpr Offset InvalidOffset = static_cast<Offset>(-1);
BumpStringPool();
~BumpStringPool();
// Adds a string to the pool and returns its offset
[[nodiscard]] Offset AddString(std::string_view str);
// Retrieves a string view from the pool using an offset
[[nodiscard]] std::string_view GetString(Offset offset) const;
// Clears all strings from the pool
void Clear();
// Returns the total size of the pool in bytes
[[nodiscard]] size_t GetSize() const;
// Returns whether the pool is empty
[[nodiscard]] bool IsEmpty() const;
// Reserves space in the buffer to avoid frequent reallocations
void Reserve(size_t size);
private:
std::vector<char> m_buffer;
};
class StringPool
{
public:
using Offset = size_t;
static constexpr Offset InvalidOffset = static_cast<Offset>(-1);
StringPool();
~StringPool();
// Adds a string to the pool and returns its offset. If the string already exists, returns the existing offset.
[[nodiscard]] Offset AddString(std::string_view str);
// Retrieves a string view from the pool using an offset
[[nodiscard]] std::string_view GetString(Offset offset) const;
// Clears all strings from the pool
void Clear();
// Returns the total size of the pool in bytes
[[nodiscard]] size_t GetSize() const;
// Returns whether the pool is empty
[[nodiscard]] bool IsEmpty() const;
// Returns the number of unique strings in the pool
[[nodiscard]] size_t GetCount() const;
// Reserves space in the buffer to avoid frequent reallocations
void Reserve(size_t size);
private:
std::vector<char> m_buffer;
PreferUnorderedStringMap<Offset> m_string_map;
};
Loading…
Cancel
Save