漫谈C++ string(3):FBString的实现

FBStringfolly中关于std::string的实现,它更加精细地控制了内存的使用,在64位X86平台上,它的实现方式如下:

  • 对于小字符串(0~23字节),直接将数据存储在栈内,不需在对上开辟空间,因为栈的访问效率较高
  • 中等长度的字符串(24~254字节),在堆上开辟空间,发生拷贝时直接复制整个字符串
  • 大长度的字符串(大于254字节),在堆上开辟空间,采用COW机制

定义

fbstring的定义如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
typedef basic_fbstring<char> fbstring;

template <typename E, class T, class A, class Storage>
#else
template <
typename E,
class T = std::char_traits<E>,
class A = std::allocator<E>,
class Storage = fbstring_core<E>>
class basic_fbstring {}
class fbstring_core() {
struct MediumLarge {
Char* data_;
size_t size_;
size_t capacity_;

size_t capacity() const {
return kIsLittleEndian ? capacity_ & capacityExtractMask : capacity_ >> 2;
}

//
void setCapacity(size_t cap, Category cat) {
capacity_ = kIsLittleEndian
? cap | (static_cast<size_t>(cat) << kCategoryShift)
: (cap << 2) | static_cast<size_t>(cat);
}
};

//将短字符串存储在栈区,用union存储,可以有效节省内存
union {
uint8_t bytes_[sizeof(MediumLarge)]; // 存储所有字节
Char small_[sizeof(MediumLarge) / sizeof(Char)];
MediumLarge ml_;
};

constexpr static size_t lastChar = sizeof(MediumLarge) - 1;
constexpr static size_t maxSmallSize = lastChar / sizeof(Char);//64为x86平台为23个(宽)字节
constexpr static size_t maxMediumSize = 254 / sizeof(Char);
constexpr static uint8_t categoryExtractMask = kIsLittleEndian ? 0xC0 : 0x3;
constexpr static size_t kCategoryShift = (sizeof(size_t) - 1) * 8;
constexpr static size_t capacityExtractMask = kIsLittleEndian
? ~(size_t(categoryExtractMask) << kCategoryShift)
: 0x0 /* unused */;
}

下面来看fbstring_core的实现。

构造

还是以最常见的std::string(“xxx”)这种构造方式为例来看它对应的构造函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
fbstring_core(
const Char* const data,
const size_t size,
bool disableSSO = FBSTRING_DISABLE_SSO) {
if (!disableSSO && size <= maxSmallSize) {
//SSO(Small String Optiomition),即短字符串优化
initSmall(data, size);
} else if (size <= maxMediumSize) {
initMedium(data, size);
} else {
initLarge(data, size);
}
FBSTRING_ASSERT(this->size() == size);
FBSTRING_ASSERT(
size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0);
}

如前文所述,对不不同的size,它的实现方式是不一样的,下面来逐个分析。

  • Small
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
template <class Char>
inline void fbstring_core<Char>::initSmall(
const Char* const data,
const size_t size) {
// Layout is: Char* data_, size_t size_, size_t capacity_

//如果数据是内存对齐的,则用字长快速复制,否则的话就用保守的memcpy
#ifndef FBSTRING_SANITIZE_ADDRESS
if ((reinterpret_cast<size_t>(data) & (sizeof(size_t) - 1)) == 0) {
const size_t byteSize = size * sizeof(Char);
constexpr size_t wordWidth = sizeof(size_t);
switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words.
case 3:
ml_.capacity_ = reinterpret_cast<const size_t*>(data)[2];
FOLLY_FALLTHROUGH;
case 2:
ml_.size_ = reinterpret_cast<const size_t*>(data)[1];
FOLLY_FALLTHROUGH;
case 1:
ml_.data_ = *reinterpret_cast<Char**>(const_cast<Char*>(data));
FOLLY_FALLTHROUGH;
case 0:
break;
}
} else
#endif
{
if (size != 0) {
//调用memcpy将data复制至small_
fbstring_detail::podCopy(data, data + size, small_);
}
}
setSmallSize(size);
}

void setSmallSize(size_t s) {
// Warning: this should work with uninitialized strings too,
// so don't assume anything about the previous value of
// small_[maxSmallSize].
//string未经初始化时也要能调用,此时small_中可能没有数据
FBSTRING_ASSERT(s <= maxSmallSize);
constexpr auto shift = kIsLittleEndian ? 0 : 2;
//用small_中的最后一个字节来存储字符串大小的信息
small_[maxSmallSize] = char((maxSmallSize - s) << shift);
small_[s] = '\0';
FBSTRING_ASSERT(category() == Category::isSmall && size() == s);
}

由此可见,短字符串的内存结构如下所示:

由于需要一个字节存储size,另外需要一个字节存储结束符,所以small_中最多能存22个字节。

另外还有个问题就是字符串的类型(small、medium、large)是存储在哪的?答案就是同上图的size存在一起,即存在bytes_的最后一个字节中,如下所示:

1
2


因为前面也说到,短字符串的长度最多只有22个字节,存储这个长度不需要占用整个字节,所以可以用最后一个字节的高两位来存储字符串的类型,由此可见FBString对内存控制的精细度。

  • Medium
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::initMedium(
const Char* const data,
const size_t size) {
// 多分配一个字符用以存储结束符,将data指向分配的首地址
auto const allocSize = goodMallocSize((1 + size) * sizeof(Char));
ml_.data_ = static_cast<Char*>(checkedMalloc(allocSize));
//调用memcpy
if (FBSTRING_LIKELY(size > 0)) {
fbstring_detail::podCopy(data, data + size, ml_.data_);
}
ml_.size_ = size;
//设置容量及存储类别
ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
ml_.data_[size] = '\0';
}

由此可见,中型字符串的内存结构如下所示:

  • Large
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::initLarge(
const Char* const data,
const size_t size) {
// 大字符串启用COW
size_t effectiveCapacity = size;
auto const newRC = RefCounted::create(data, &effectiveCapacity);
ml_.data_ = newRC->data_;
ml_.size_ = size;
ml_.setCapacity(effectiveCapacity, Category::isLarge);
ml_.data_[size] = '\0';
}

struct RefCounted {
//使用atomic存储引用计数,data存在随后的连续内存中,提高内存使用效率
std::atomic<size_t> refCount_;
Char data_[1];

static RefCounted* create(size_t* size) {
//通过goodMallocSize将size对齐,提高内存分配效率
const size_t allocSize =
goodMallocSize(getDataOffset() + (*size + 1) * sizeof(Char));
auto result = static_cast<RefCounted*>(checkedMalloc(allocSize));
result->refCount_.store(1, std::memory_order_release);
*size = (allocSize - getDataOffset()) / sizeof(Char) - 1;
return result;
}

static RefCounted* create(const Char* data, size_t* size) {
const size_t effectiveSize = *size;
auto result = create(size);
if (FBSTRING_LIKELY(effectiveSize > 0)) {
//调用memcpy
fbstring_detail::podCopy(data, data + effectiveSize, result->data_);
}
return result;
}
};

大型字符串的内存分布同中型字符串基本相同,仅仅多了个引用计数需要存储,如下所示:

到此,还有个问题就是字符串的类别是如何存储的,代码如下所示:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
typedef uint8_t category_type;
enum class Category : category_type {
isSmall = 0,
isMedium = kIsLittleEndian ? 0x80 : 0x2,
isLarge = kIsLittleEndian ? 0x40 : 0x1,
};

//categoryExtractMask = kIsLittleEndian ? 0xC0 : 0x3;
Category category() const {
// works for both big-endian and little-endian
return static_cast<Category>(bytes_[lastChar] & categoryExtractMask);
}

struct MediumLarge {
Char* data_;
size_t size_;
size_t capacity_;

// 小端序情况下,capacityExtractMask = ~(size_t(categoryExtractMask) << kCategoryShift)
size_t capacity() const {
return kIsLittleEndian ? capacity_ & capacityExtractMask : capacity_ >> 2;
}
// kCategoryShift = (sizeof(size_t) - 1) * 8;
void setCapacity(size_t cap, Category cat) {
capacity_ = kIsLittleEndian
? cap | (static_cast<size_t>(cat) << kCategoryShift)
: (cap << 2) | static_cast<size_t>(cat);
}
};

由此可见,basic_string栈中最后一个字节的高两位被用来存储字符串类型,在短字符串中,该字节存储的是字符串的size,由于此时size最多为22,所以高两位是不需要的,所以可以用来存类别,而在中大型字符串中,这两位对应于capacity中的最高两位,现有主机的内存没有这么大,所以这高两位也不可能用到,因此可以用来存储字符串类型。由此可见,FBString对内存的利用控制得多精细。

拷贝

看完了构造函数,下面来看拷贝构造函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
fbstring_core(const fbstring_core& rhs) {
FBSTRING_ASSERT(&rhs != this);
switch (rhs.category()) {
case Category::isSmall:
copySmall(rhs);
break;
case Category::isMedium:
copyMedium(rhs);
break;
case Category::isLarge:
copyLarge(rhs);
break;
default:
folly::assume_unreachable();
}
FBSTRING_ASSERT(size() == rhs.size());
FBSTRING_ASSERT(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0);
}

下面来分别看不同尺寸下的拷贝构造函数是如何实现的。

  • Small
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
template <class Char>
inline void fbstring_core<Char>::copySmall(const fbstring_core& rhs) {
static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure");
static_assert(
offsetof(MediumLarge, size_) == sizeof(ml_.data_),
"fbstring layout failure");
static_assert(
offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_),
"fbstring layout failure");
// Just write the whole thing, don't look at details. In
// particular we need to copy capacity anyway because we want
// to set the size (don't forget that the last character,
// which stores a short string's length, is shared with the
// ml_.capacity field).
ml_ = rhs.ml_;
FBSTRING_ASSERT(
category() == Category::isSmall && this->size() == rhs.size());
}
  • Medium
1
2
3
4
5
6
7
8
9
10
11
12
template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::copyMedium(
const fbstring_core& rhs) {
//中型字符串总是采用eager-copy的方式,即字符串的拷贝总会重新分配内存并拷贝内容
auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char));
ml_.data_ = static_cast<Char*>(checkedMalloc(allocSize));
fbstring_detail::podCopy(
rhs.ml_.data_, rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_);
ml_.size_ = rhs.ml_.size_;
ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
FBSTRING_ASSERT(category() == Category::isMedium);
}
  • Large
1
2
3
4
5
6
7
8
template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::copyLarge(
const fbstring_core& rhs) {
//大型字符串采用COW
ml_ = rhs.ml_;
RefCounted::incrementRefs(ml_.data_);
FBSTRING_ASSERT(category() == Category::isLarge && size() == rhs.size());
}

下面来看字符串的修改是如何实现的,显而易见的是,Large的修改会导致内存的重新分配,Small及Medium字符串由于是采用的eager-copy的方式,修改的时候直接返回对应的内存地址即可,以basic_string为例来看:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
//basic_string
reference operator[](size_type pos) {
return *(begin() + pos);
}

iterator begin() {
//此处store即是fbstring_core
return store_.mutableData();
}

//fbstring_core
Char* mutableData() {
switch (category()) {
case Category::isSmall:
return small_;
case Category::isMedium:
return ml_.data_;
case Category::isLarge:
return mutableDataLarge();
}
folly::assume_unreachable();
}

template <class Char>
inline Char* fbstring_core<Char>::mutableDataLarge() {
FBSTRING_ASSERT(category() == Category::isLarge);
if (RefCounted::refs(ml_.data_) > 1) { //如果无其他字符串引用,则直接返回即可,否则需要重新分配内存
unshare();
}
return ml_.data_;
}

template <class Char>
FOLLY_MALLOC_NOINLINE inline void fbstring_core<Char>::unshare(
size_t minCapacity) {
FBSTRING_ASSERT(category() == Category::isLarge);
size_t effectiveCapacity = std::max(minCapacity, ml_.capacity());
//重新分配内存
auto const newRC = RefCounted::create(&effectiveCapacity);
FBSTRING_ASSERT(effectiveCapacity >= ml_.capacity());
//拷贝数据
fbstring_detail::podCopy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_);
RefCounted::decrementRefs(ml_.data_);
ml_.data_ = newRC->data_;
ml_.setCapacity(effectiveCapacity, Category::isLarge);
}

总结

  • fbstring在std::string的基础上对于不同尺寸的string采用了不同类型的实现方式,对内存的使用控制非常精细。
  • 短字符串直接存储在栈中从而避免内存分配
  • 中型字符串采用了eager copy的实现方式,因为folly鼓励使用jemalloc来替代glibc下默认的ptmalloc,内存使用使用很高效,开辟这样尺寸的内存可以认为是非常高效的
  • 长字符串则采用了COW的实现,减少内存拷贝

Reference