C++ class memory layout
11 min read

C++ class memory layout

最近在用 ebpf urpobe 探针获取 c++ 程序运行时的内存,这需要对 c++ 特别时 c++ class 继承的内存布局有所了解,这篇文章通过测试代码和内存地址分析并验证 c++ class 多继承、虚继承场景下的内存布局
C++ class memory layout
Photo by Liam Briese / Unsplash

C++ 虚函数表简介

C++ 中当一个类在实现的时候,如果存在一个或以上的虚函数时,那么这个类便会包含一张虚函数表。而当一个子类继承并重载了基类的虚函数时,它也会有自己的一张虚函数表。当我们在设计类的时候,如果把某个函数设置成虚函数时,也就表明我们希望子类在继承的时候能够有自己的实现方式;如果我们明确这个类不会被继承,那么就不应该有虚函数的出现。

类 A 的实现如下:

class A {
public:
    virtual void vfunc1();
    virtual void vfunc2();
            void func1();
            void func2();
private:
    int m_data1, m_data1;
};

类 A 的虚函数表的内存布局如下图所示:

虚函数表内存布局 by https://zhuanlan.zhihu.com/p/380147337

如果有如下代码所示的继承关系:

class A {
public:
    virtual void vfunc1();
    virtual void vfunc2();
            void func1();
            void func2();
private:
    int m_data1, m_data1;
};

class B : public A {
public:
    virtual void vfunc1();
            void func2();
private:
    int m_data3;
};

class C : public B {
public:
    virtual void vfunc1();
            void func2();
private:
    int m_data1, m_data4;
};

类 A B C 的虚函数表的内存布局如下所示:

多继承虚函数内存布局 by https://zhuanlan.zhihu.com/p/380147337
  • 对于非虚函数,三个类中虽然都有一个叫 func2 的函数,但他们彼此互不关联,因此都是各自独立的,不存在重载一说,在调用的时候也不需要进行查表的操作,直接调用即可。
  • 由于子类B和子类C都是继承于基类A,因此他们都会存在一个虚指针用于指向虚函数表。注意,假如子类B和子类C中不存在虚函数,那么这时他们将共用基类A的一张虚函数表,在B和C中用虚指针指向该虚函数表即可。但是,上面的代码设计时子类B和子类C中都有一个虚函数 vfunc1,因此他们就需要各自产生一张虚函数表,并用各自的虚指针指向该表。由于子类B和子类C都对 vfunc1 作了重载,因此他们有三种不同的实现方式,函数地址也不尽相同,在使用的时候需要从各自类的虚函数表中去查找对应的 vfunc1 地址。
  • 对于虚函数 vfunc2,两个子类都没有进行重载操作,所以基类A、子类B和子类C将共用一个 vfunc2,该虚函数的地址会分别保存在三个类的虚函数表中,但他们的地址是相同的。
  • 从上图可以发现,在类对象的头部存放着一个虚指针,该虚指针指向了各自类所维护的虚函数表,再通过查找虚函数表中的地址来找到对应的虚函数。
  • 对于类中的数据而言,子类中都会包含父类的信息。如上例中的子类C,它自己拥有一个变量 m_data1,似乎是和基类中的 m_data1 重名了,但其实他们并不存在联系,从存放的位置便可知晓。

C++ class 内存布局

普通继承 class 的内存布局

如下代码所示类继承关系:

class Polygon
{
protected:
  int width, height;
  long init_area;

private:
  int _private = 111;

public:
  void set_values(int a, int b)
  {
    width = a;
    height = b;
    init_area = (long)width * (long)height;
  }
  virtual int area()
  {
    return 0;
  }

  void *mem_addr(int i)
  {
    if (i == 0)
    {
      return &width;
    }
    else if (i == 1)
    {
      return &height;
    }
    else if (i == 2)
    {
      return &init_area;
    }
    else if (i == 3)
    {
      return &_private;
    }
    else if (i == 4)
    {
      return &_private_2;
    }
    else if (i == 5)
    {
      return &test2;
    }
    else
    {
      return NULL;
    }
  }

private:
  int _private_2 = 222;

protected:
  int test2 = 333;
};

class Rectangle : public Polygon
{
public:
  int area()
  {
    foo();
    return width * height;
  }
  void foo()
  {
    printf("Rectangle->mem_addr_rect addr: %p\n", (void *)(&Rectangle::mem_addr_rect));
  }
  void *mem_addr_rect(int i)
  {
    if (i == 0)
    {
      return &test3;
    }
    else if (i == 1)
    {
      return &_private_3;
    }
    else
    {
      return NULL;
    }
  }

protected:
  int test3;

private:
  int _private_3 = 0;
};

class Base
{
public:
  int a;
  int b;
  virtual void function_1()
  {
    std::cout << "function 1" << std::endl;
  }
  void *mem_addr_base(int i)
  {
    if (i == 0)
    {
      return &a;
    }
    else if (i == 1)
    {
      return &b;
    }
    else
    {
      return NULL;
    }
  }
};

class MultiRectangle : public Rectangle,
                       public Base
{
public:
  void *mem_addr_multi(int i)
  {
    if (i == 0)
    {
      return &test4;
    }
    else if (i == 1)
    {
      return &_private_4;
    }
    else
    {
      return NULL;
    }
  }

private:
  int _private_4 = 0;

protected:
  int test4;
};

通过代码探测类的内存布局,测试代码如下所示:

  Rectangle rect;
  Polygon *ppoly = &rect;
  ppoly->set_values(4, 5);
  ppoly->area();

  std::cout << "***********************************" << std::endl;
  std::cout << "[single inherit memory layout]" << std::endl;
  std::cout << "Rectangle addr: " << &rect << std::endl;
  std::cout << "Polygon addr: " << (Polygon *)(&rect) << std::endl;
  std::cout << "Rectangle.width addr: " << rect.mem_addr(0) << std::endl;
  std::cout << "Rectangle.height addr: " << rect.mem_addr(1) << std::endl;
  std::cout << "Rectangle.init_area addr: " << rect.mem_addr(2) << std::endl;
  std::cout << "Rectangle._private addr: " << rect.mem_addr(3) << std::endl;
  std::cout << "Rectangle._private_2 addr: " << rect.mem_addr(4) << std::endl;
  std::cout << "Rectangle.test2 addr: " << rect.mem_addr(5) << std::endl;
  std::cout << "Rectangle.test3 addr: " << rect.mem_addr_rect(0) << std::endl;
  std::cout << "Rectangle._private_3 addr: " << rect.mem_addr_rect(1) << std::endl;

  MultiRectangle multi_rect;
  std::cout << "***********************************" << std::endl;
  std::cout << "[multiple inherit memory layout]" << std::endl;
  std::cout << "MultiRectangle addr: " << &multi_rect << std::endl;
  std::cout << "Rectangle addr: " << (Rectangle *)(&multi_rect) << std::endl;
  std::cout << "Base addr: " << (Base *)(&multi_rect) << std::endl;

  std::cout << "MultiRectangle->Rectangle vptr addr: " << (long *)((Rectangle *)(&multi_rect)) << std::endl;
  std::cout << "MultiRectangle->Rectangle vptr function: " << (void *)(*(long *)*((long *)((Rectangle *)(&multi_rect)))) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon vptr addr: " << (long *)((Polygon *)(&multi_rect)) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon vptr function: " << (void *)(*(long *)*((long *)((Polygon *)(&multi_rect)))) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->width addr: " << multi_rect.mem_addr(0) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->height addr: " << multi_rect.mem_addr(1) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->init_area addr: " << multi_rect.mem_addr(2) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->_private addr: " << multi_rect.mem_addr(3) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->_private_2 addr: " << multi_rect.mem_addr(4) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->test2 addr: " << multi_rect.mem_addr(5) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->set_values[FUNC] addr: " << (void *)(&Polygon::set_values) << std::endl;
  std::cout << "MultiRectangle->Rectangle->Polygon->area addr[FUNC]: " << (void *)(&Polygon::area) << std::endl;
  std::cout << "MultiRectangle->Rectangle->test3 addr: " << multi_rect.mem_addr_rect(0) << std::endl;
  std::cout << "MultiRectangle->Rectangle->_private_3 addr: " << multi_rect.mem_addr_rect(1) << std::endl;
  std::cout << "MultiRectangle->Rectangle->mem_addr_rect[FUNC] addr: " << (void *)(&Rectangle::mem_addr_rect) << std::endl;
  std::cout << "MultiRectangle->Rectangle->area[FUNC] addr: " << (void *)(&Rectangle::area) << std::endl;
  std::cout << "MultiRectangle->Rectangle->foo[FUNC] addr: " << (void *)(&Rectangle::foo) << std::endl;
  std::cout << "MultiRectangle->mem_addr_multi[FUNC] addr: " << (void *)(&MultiRectangle::mem_addr_multi) << std::endl;
  std::cout << "MultiRectangle->Base vptr addr: " << (Base *)(&multi_rect) << std::endl;
  std::cout << "MultiRectangle->Base vptr function: " << (void *)(*(long *)*((long *)((Base *)(&multi_rect)))) << std::endl;
  std::cout << "????? Base: " << multi_rect.mem_addr_rect(1) + 4 << std::endl;
  std::cout << "????? Base: " << *((int *)(multi_rect.mem_addr_rect(1) + 4)) << std::endl;
  std::cout << "????? Base: " << (void *)(*(long *)*((long *)(multi_rect.mem_addr_rect(1) + 8))) << std::endl;
  std::cout << "MultiRectangle->Base->a addr: " << multi_rect.mem_addr_base(0) << std::endl;
  std::cout << "MultiRectangle->Base->b addr: " << multi_rect.mem_addr_base(1) << std::endl;
  std::cout << "MultiRectangle->Base->function_1[FUNC] addr: " << (void *)(&Base::function_1) << std::endl;
  std::cout << "MultiRectangle->Base->mem_addr_base[FUNC] addr: " << (void *)(&Base::mem_addr_base) << std::endl;
  std::cout << "MultiRectangle vptr addr: " << (long *)(&multi_rect) << std::endl;
  std::cout << "MultiRectangle vptr function: " << (void *)(*(long *)*((long *)(&multi_rect))) << std::endl;
  std::cout << "MultiRectangle->_private_4 addr: " << multi_rect.mem_addr_multi(0) << std::endl;
  std::cout << "MultiRectangle->test4 addr: " << multi_rect.mem_addr_multi(1) << std::endl;
  std::cout << "MultiRectangle->mem_addr_multi[FUNC] addr: " << (void *)(&MultiRectangle::mem_addr_multi) << std::endl;

测试结果分析如下图所示:

普通继承 class 内存布局分析表

虚继承 class 的内存布局

如下代码所示类虚继承关系:

class B
{
public:
  int ib;

public:
  B(int i = 1) : ib(i) {}

  virtual void f() { cout << "B::f()" << endl; }

  virtual void Bf() { cout << "B::Bf()" << endl; }
};
class B1 : virtual public B
{
public:
  int ib1;

public:
  B1(int i = 100) : ib1(i) {}

  virtual void f() { cout << "B1::f()" << endl; }

  virtual void f1() { cout << "B1::f1()" << endl; }

  virtual void Bf1() { cout << "B1::Bf1()" << endl; }
};
class B2 : virtual public B
{
public:
  int ib2;

public:
  B2(int i = 1000) : ib2(i) {}

  virtual void f() { cout << "B2::f()" << endl; }

  virtual void f2() { cout << "B2::f2()" << endl; }

  virtual void Bf2() { cout << "B2::Bf2()" << endl; }
};
class D : public B1, public B2
{
public:
  int id;

public:
  D(int i = 10000) : id(i) {}

  virtual void f() { cout << "D::f()" << endl; }

  virtual void f1() { cout << "D::f1()" << endl; }

  virtual void f2() { cout << "D::f2()" << endl; }

  virtual void Df() { cout << "D::Df()" << endl; }
};

通过代码探测类的内存布局,测试代码如下所示:

  D d;
  std::cout << "***********************************" << std::endl;
  std::cout << "[multiple virtual inherit memory layout]" << std::endl;
  cout << "D->vptr addr: " << &d << endl;
  cout << "D->B1->vptr addr: " << (B1 *)(&d) << endl;
  cout << "D->B1->ib1 addr: " << &d.ib1 << endl;
  cout << "D->B1->f[FUNC] addr: " << (void *)&B1::f << endl;
  cout << "D->B1->f1[FUNC] addr: " << (void *)&B1::f1 << endl;
  cout << "D->B1->Bf1[FUNC] addr: " << (void *)&B1::Bf1 << endl;
  cout << "D->B2->vptr addr: " << (B2 *)(&d) << endl;
  cout << "D->B2->ib2 addr: " << &d.ib2 << endl;
  cout << "D->B2->f[FUNC] addr: " << (void *)&B2::f << endl;
  cout << "D->B2->f1[FUNC] addr: " << (void *)&B2::f2 << endl;
  cout << "D->B2->Bf1[FUNC] addr: " << (void *)&B2::Bf2 << endl;
  cout << "D->id addr: " << &d.id << endl;
  cout << "D->f[FUNC] addr: " << (void *)&D::f << endl;
  cout << "D->f1[FUNC] addr: " << (void *)&D::f1 << endl;
  cout << "D->f2[FUNC] addr: " << (void *)&D::f2 << endl;
  cout << "D->Df[FUNC] addr: " << (void *)&D::Df << endl;
  cout << "D->B1->B vptr addr: " << (B *)(&d) << endl;
  cout << "D->B1->B->ib addr: " << &d.ib << endl;
  cout << "D->B1->B->f[FUNC] addr: " << (void *)&B::f << endl;
  cout << "D->B1->B->Bf[FUNC] addr: " << (void *)&B::Bf << endl;

测试结果分析如下图所示:

虚继承 class 内存布局分析表格

总结

  • class memeber 是按照定义的顺序排列的;
  • class 首地址是函数表指针即 vptr,占8字节;
  • 接着 vptr 就是 class 数据成员,按照各自 size 做地址偏移;

References

  1. 浅析C++类的内存布局 - 知乎
  2. 图说C++对象模型:对象内存布局详解 - melonstreet - 博客园
  3. C++对象内存模型 - Tango的博客 | Tango's Blog
  4. C++中的虚指针与虚函数表 - 知乎

Public discussion