0%

C++ Virtual(虚函数)

C++ Virtual

Virtual Function

what’s virtual

The virtual specifier specifies that a non-static member function is virtual and supports dynamic dispatch. It may only appear in the decl-specifier-seq of the initial declaration of a non-static member function (i.e., when it is declared in the class definition).

Explanation

Virtual functions are member functions whose behavior can be overridden in derived classes. As opposed to non-virtual functions, the overridden behavior is preserved even if there is no compile-time information about the actual type of the class. If a derived class is handled using pointer or reference to the base class, a call to an overridden virtual function would invoke the behavior defined in the derived class. This behavior is suppressed if the function is selected using qualified name lookup (that is, if the function’s name appears to the right of the scope resolution operator :😃.

1
#include <iostream>
2
struct Base {
3
   virtual void f() {
4
       std::cout << "base\n";
5
   }
6
};
7
struct Derived : Base {
8
    void f() override { // 'override' is optional
9
        std::cout << "derived\n";
10
    }
11
};
12
int main()
13
{
14
    Base b;
15
    Derived d;
16
 
17
    // virtual function call through reference
18
    Base& br = b; // the type of br is Base&
19
    Base& dr = d; // the type of dr is Base& as  well
20
    br.f(); // prints "base"
21
    dr.f(); // prints "derived"
22
 
23
    // virtual function call through pointer
24
    Base* bp = &b; // the type of bp is Base*
25
    Base* dp = &d; // the type of dp is Base* as  well
26
    bp->f(); // prints "base"
27
    dp->f(); // prints "derived"
28
 
29
    // non-virtual function call
30
    br.Base::f(); // prints "base"
31
    dr.Base::f(); // prints "base"
32
}

C++中使用 classname::functionname 可以直接获取成员函数的地址指针

1
// non-virtual function call
2
br.Base::f(); // prints "base"
3
dr.Base::f(); // prints "base"

不支持多重继承的语言不需要提供virtual词法,内部提供了类似virtual的机制,且默认使用

another example (why virtual required)

1
class Animal
2
{
3
    public:
4
        void eat() { std::cout << "I'm eating generic food."; }
5
};
6
7
class Cat : public Animal
8
{
9
    public:
10
        void eat() { std::cout << "I'm eating a rat."; }
11
};

In your main function:

1
Animal *animal = new Animal;
2
Cat *cat = new Cat;
3
4
animal->eat(); // Outputs: "I'm eating generic food."
5
cat->eat();    // Outputs: "I'm eating a rat."

So far so good, right? Animals eat generic food, cats eat rats, all without virtual.

Let’s change it a little now so that eat() is called via an intermediate function (a trivial function just for this example):

1
// This can go at the top of the main.cpp file
2
void func(Animal *xyz) { xyz->eat(); }

Now our main function is:

1
Animal *animal = new Animal;
2
Cat *cat = new Cat;
3
4
func(animal); // Outputs: "I'm eating generic food."
5
func(cat);    // Outputs: "I'm eating generic food."

Uh oh… we passed a Cat into func(), but it won’t eat rats. Should you overload func() so it takes a Cat*? If you have to derive more animals from Animal they would all need their own func().

The solution is to make eat() from the Animal class a virtual function:

1
class Animal
2
{
3
    public:
4
        virtual void eat() { std::cout << "I'm eating generic food."; }
5
};
6
7
class Cat : public Animal
8
{
9
    public:
10
        void eat() { std::cout << "I'm eating a rat."; }
11
};

Main:

1
func(animal); // Outputs: "I'm eating generic food."
2
func(cat);    // Outputs: "I'm eating a rat."

In detail

If some member function vf is declared as virtual in a class Base, and some class Derived, which is derived, directly or indirectly, from Base, has a declaration for member function with the same

  • name
  • parameter type list (but not the return type)
  • cv-qualifiers
  • ref-qualifiers

Then this function in the class Derived is also virtual (whether or not the keyword virtual is used in its declaration) and overrides Base::vf (whether or not the word override is used in its declaration).

Base::vf does not need to be visible (can be declared private, or inherited using private inheritance) to be overridden.

1
class B 
2
{
3
    virtual void do_f(); // private member
4
 public:
5
    void f() { do_f(); } // public interface
6
};
7
struct D : public B 
8
{
9
    void do_f() override; // overrides B::do_f
10
};
11
 
12
int main()
13
{
14
    D d;
15
    B* bp = &d;
16
    bp->f(); // internally calls D::do_f();
17
}

For every virtual function, there is the final overrider, which is executed when a virtual function call is made. A virtual member function vf of a base class Base is the final overrider unless the derived class declares or inherits (through multiple inheritance) another function that overrides vf.

1
struct A { virtual void f(); };     // A::f is virtual
2
struct B : A { void f(); };         // B::f overrides A::f in B
3
struct C : virtual B { void f(); }; // C::f overrides A::f in C
4
struct D : virtual B {}; // D does not introduce an overrider, B::f is final in D
5
struct E : C, D  {       // E does not introduce an overrider, C::f is final in E
6
    using A::f; // not a function declaration, just makes A::f visible to lookup
7
};
8
int main() {
9
   E e;
10
   e.f();    // virtual call calls C::f, the final overrider in e
11
   e.E::f(); // non-virtual call calls A::f, which is visible in E
12
}

A function with the same name but different parameter list does not override the base function of the same name, but hides it: when unqualified name lookup examines the scope of the derived class, the lookup finds the declaration and does not examine the base class.

1
struct B {
2
    virtual void f();
3
};
4
struct D : B {
5
    void f(int); // D::f hides B::f (wrong parameter list)
6
};
7
struct D2 : D {
8
    void f(); // D2::f overrides B::f (doesn't matter that it's not visible)
9
};
10
 
11
int main()
12
{
13
    B b;   B& b_as_b   = b;
14
    D d;   B& d_as_b   = d;    D& d_as_d = d;
15
    D2 d2; B& d2_as_b  = d2;   D& d2_as_d = d2;
16
 
17
    b_as_b.f(); // calls B::f()
18
    d_as_b.f(); // calls B::f()
19
    d2_as_b.f(); // calls D2::f()
20
 
21
    d_as_d.f(); // Error: lookup in D finds only f(int)
22
    d2_as_d.f(); // Error: lookup in D finds only f(int)
23
}

If a function is declared with the specifier override, but does not override a virtual function, the program is ill-formed:

1
struct B {
2
    virtual void f(int);
3
};
4
struct D : B {
5
    virtual void f(int) override; // OK, D::f(int) overrides B::f(int)
6
    virtual void f(long) override; // Error: f(long) does not override B::f(int)
7
};

总结:

  • Non-virtual method ⇒ static binding
  • virtual method => dynamic runtime binding
  • Non-member functions and static member functions cannot be virtual.
  • 若基类中的成员函数声明为虚函数,派生类中重写的virtual函数自动成为虚函数

Covariant return types

If the function Derived::f overrides a function Base::f, their return types must either be the same or be covariant. Two types are covariant if they satisfy all of the following requirements:

  • both types are pointers or references (lvalue or rvalue) to classes. Multi-level pointers or references are not allowed.
  • the referenced/pointed-to class in the return type of Base::f() must be a unambiguous and accessible direct or indirect base class of the referenced/pointed-to class of the return type of Derived::f().
  • the return type of Derived::f() must be equally or less cv-qualified than the return type of Base::f().

The class in the return type of Derived::f must be either Derived itself, or must be a complete type at the point of declaration of Derived::f.

When a virtual function call is made, the type returned by the final overrider is implicitly converted to the return type of the overridden function that was called:

1
class B {};
2
 
3
struct Base {
4
    virtual void vf1();
5
    virtual void vf2();
6
    virtual void vf3();
7
    virtual B* vf4();
8
    virtual B* vf5();
9
};
10
 
11
class D : private B {
12
    friend struct Derived; // in Derived, B is an accessible base of D
13
};
14
 
15
class A; // forward-declared class is an incomplete type
16
 
17
struct Derived : public Base {
18
    void vf1();    // virtual, overrides Base::vf1()
19
    void vf2(int); // non-virtual, hides Base::vf2()
20
//  char vf3();    // Error: overrides Base::vf3, but has different
21
                   // and non-covariant return type
22
    D* vf4();      // overrides Base::vf4() and has covariant return type
23
//  A* vf5();      // Error: A is incomplete type
24
};
25
 
26
int main()
27
{
28
    Derived d;
29
    Base& br = d;
30
    Derived& dr = d;
31
 
32
    br.vf1(); // calls Derived::vf1()
33
    br.vf2(); // calls Base::vf2()
34
//  dr.vf2(); // Error: vf2(int) hides vf2()
35
 
36
    B* p = br.vf4(); // calls Derived::vf4() and converts the result to B*
37
    D* q = dr.vf4(); // calls Derived::vf4() and does not convert
38
                     //  the result to B*
39
}

Virtual destructor

父类有virtual修饰的方法,且被子类覆盖,需要注意必须使用virtual声明父类的析构函数。

上述场景一般使用工厂设计模式,定义一个父类的指针,指向子类对象,而在delete 父类指针时,期望释放对象。但父类析构函数不加Virtual修饰,则只会调用父类析构函数,而不调用子类析构函数,导致只释放了对象的父类部分,而子类部分没有释放。Virtual含义就是有一个函数映射表,调用时会去查映射表调用对应的方法。

Even though destructors are not inherited, if a base class declares its destructor virtual, the derived destructor always overrides it. This makes it possible to delete dynamically allocated objects of polymorphic type through pointers to base.

1
class Base {
2
 public:
3
    virtual ~Base() { /* releases Base's resources */ }
4
};
5
 
6
class Derived : public Base {
7
    ~Derived() { /* releases Derived's resources */ }
8
};
9
 
10
int main()
11
{
12
    Base* b = new Derived;
13
    delete b; // Makes a virtual function call to Base::~Base()
14
              // since it is virtual, it calls Derived::~Derived() which can
15
              // release resources of the derived class, and then calls
16
              // Base::~Base() following the usual order of destruction
17
}

Moreover, if a class is polymorphic (declares or inherits at least one virtual function), and its destructor is not virtual, deleting it is undefined behavior regardless of whether there are resources that would be leaked if the derived destructor is not invoked.

virtual function table

what do vtables have to do with all this?

Well, there are cases where it is not possible for the compiler to know which routine to execute at compile time. This is the case, for instance, when we declare virtual functions:

1
#include <iostream>
2
class B
3
{
4
public:
5
  virtual void bar();
6
  virtual void qux();
7
};
8
9
void B::bar()
10
{
11
  std::cout << "This is B's implementation of bar" << std::endl;
12
}
13
14
void B::qux()
15
{
16
  std::cout << "This is B's implementation of qux" << std::endl;
17
}

The thing about virtual functions is that they can be overriden by subclasses:

1
class C : public B
2
{
3
public:
4
  void bar() override;
5
};
6
7
void C::bar()
8
{
9
  std::cout << "This is C's implementation of bar" << std::endl;
10
}

Now consider the following call to bar():

1
B* b = new C();
2
b->bar();

If we use static dispatch as above, the call b->bar() would execute B::bar(), since (from the point of view of the compiler) b points to an object of type B. This would be horribly wrong, off course, because b actually points to an object of type C and C::bar() should be called instead.

Hopefully you can see the problem by now: given that virtual functions can be redefined in subclasses, calls via pointers (or references) to a base type can not be dispatched at compile time. The compiler has to find the right function definition (i.e. the most specific one) at runtime. This process is called dynamic dispatch or late method binding.

how do we implement dynamic dispatch?

For every class that contains virtual functions, the compiler constructs a virtual table, a.k.a vtable. The vtable contains an entry for each virtual function accessible by the class and stores a pointer to its definition. Only the most specific function definition callable by the class is stored in the vtable. Entries in the vtable can point to either functions declared in the class itself (e.g. C::bar()), or virtual functions inherited from a base class (e.g. C::qux()).

In our example, the compiler will create the following virtual tables:
vtables

The vtable of class B has two entries, one for each of the two virtual functions declared in B’s scope: bar() and qux(). Additionally, the vtable of B points to the local definition of functions, since they are the most specific (and only) from B’s point of view.

More interesting is C’s vtable. In this case, the entry for bar() points to own C’s implementation, given that it is more specific than B::bar(). Since C doesn’t override qux(), its entry in the vtable points to B’s definition (the most specific definition).

Note that vtables exist at the class level, meaning there exists a single vtable per class, and is shared by all instances.

Vpointers

You might be thinking: vtables are cool and all, but how exactly do they solve the problem? When the compiler sees b->bar() in the example above, it will lookup B’s vtable for bar’s entry and follow the corresponding function pointer, right? We would still be calling B::bar() and not C::bar()…

Very true, I still need to tell the second part of the story: vpointers. Every time the compiler creates a vtable for a class, it adds an extra argument to it: a pointer to the corresponding virtual table, called the vpointer.

Note that the vpointer is just another class member added by the compiler and increases the size of every object that has a vtable by sizeof(vpointer).

Hopefully you have grasped how dynamic function dispatch can be implemented by using vtables: when a call to a virtual function on an object is performed, the vpointer of the object is used to find the corresponding vtable of the class. Next, the function name is used as index to the vtable to find the correct (most specific) routine to be executed. Done!

Virtual Destructors

By now it should also be clear why it is always a good idea to make destructors of base classes virtual. Since derived classes are often handled via base class references, declaring a non-virtual destructor will be dispatched statically, obfuscating the destructor of the derived class:

1
#include <iostream>
2
3
class Base
4
{
5
public:
6
  ~Base()
7
  {
8
    std::cout << "Destroying base" << std::endl;
9
  }
10
};
11
12
class Derived : public Base
13
{
14
public:
15
  Derived(int number)
16
  {
17
    some_resource_ = new int(number);
18
  }
19
20
  ~Derived()
21
  {
22
    std::cout << "Destroying derived" << std::endl;
23
    delete some_resource_;
24
  }
25
26
private:
27
  int* some_resource_;
28
};
29
30
int main()
31
{
32
  Base* p = new Derived(5);
33
  delete p;
34
}

This will output:

1
Destroying base

Making Base’s destructor virtual will result in the expected behavior:

1
Destroying derived
2
Destroying base

Wrapping up

  1. Function overriding makes it impossible to dispatch virtual functions statically (at compile time)
  2. Dispatching of virtual functions needs to happen at runtime
  3. The virtual table method is a popular implementation of dynamic dispatch
  4. For every class that defines or inherits virtual functions the compiler creates a virtual table
  5. The virtual table stores a pointer to the most specific definition of each virtual function
  6. For every class that has a vtable, the compiler adds an extra member to the class: the vpointer
  7. The vpointer points to the corresponding vtable of the class
  8. Always declare desctructors of base classes as virtual

Virtual inherit

通过合理的重构和设计,避开使用虚继承

扩展阅读

C++的编译器应该是保证虚函数表的指针存在于对象实例中最前面的位置(这是为了保证取到虚函数表的有最高的性能——如果有多层继承或是多重继承的情况下)。 这意味着我们通过对象实例的地址得到这张虚函数表,然后就可以遍历其中函数指针,并调用相应的函数。

1
class Base {
2
     public:
3
            virtual void f() { cout << "Base::f" << endl; }
4
            virtual void g() { cout << "Base::g" << endl; }
5
            virtual void h() { cout << "Base::h" << endl; }
6
};
7
8
//Base b;
9
//sizeof (b); //outputs 8

按照上面的说法,我们可以通过Base的实例来得到虚函数表。 下面是实际例程:

1
typedef void(*Fun)(void);
2
Base b;
3
Fun pFun = NULL;
4
5
cout << "虚函数表地址:" << (int*)(&b) << endl;
6
cout << "虚函数表 — 第一个函数地址:" << (int*)*(int*)(&b) << endl;
7
8
// Invoke the first virtual function 
9
pFun = (Fun)*((int*)*(int*)(&b));
10
pFun();
11
12
//运行结果
13
虚函数表地址:0x7ffdc351ecf0
14
虚函数表 — 第一个函数地址:0x400e50
15
Base::f

可以看到,通过强行把 &b 转成int*,取得虚函数表的地址,然后,再次取址就可以得到第一个虚函数的地址了,也就是Base::f(),这在上面的程序中得到了验证(把int* 强制转成了函数指针)。通过这个示例,我们就可以知道如果要调用Base::g()和Base::h()

1
(Fun)*((int*)*(int*)(&b)+0);  // Base::f()
2
(Fun)*((int*)*(int*)(&b)+1);  // Base::g()
3
(Fun)*((int*)*(int*)(&b)+2);  // Base::h()

virtual table display

上图中,虚函数表的最后多加了一个结点,这是虚函数表的结束结点,就像字符串的结束符“/0”一样,其标志了虚函数表的结束。这个结束标志的值在不同的编译器下是不同的。在WinXP+VS2003下,这个值是NULL。而在Ubuntu 7.10 + Linux 2.6.22 + GCC 4.1.3下,这个值是如果1,表示还有下一个虚函数表,如果值是0,表示是最后一个虚函数表。

下面将分别说明“无覆盖”和“有覆盖”时的虚函数表的样子。没有覆盖父类的虚函数是毫无意义的。作为对比,先说明没有覆盖的情况。在比较之下,我们可以更加清楚地知道其内部的具体实现。

一般继承(无虚函数覆盖)

假设有如下所示的一个继承关系
inherit

注意,在这个继承关系中,子类没有重载任何父类的函数。那么,在派生类的实例中,
对于实例:Derive d; 的虚函数表如下:
vtable

显然可见:

  1. 虚函数按照其声明顺序放于表中。
  2. 父类的虚函数在子类的虚函数前面。

一般继承(有虚函数覆盖)

假设有如下所示的一个继承关系
inherit

在这个类的设计中,我只覆盖了父类的一个函数:f()。那么,对于派生类的实例,其虚函数表会是下面的一个样子
vtable

我们从表中可以看到下面几点,

  1. 覆盖的f()函数被放到了虚表中原来父类虚函数的位置。
  2. 没有被覆盖的函数依旧。

下面由b所指的内存中的虚函数表的f()的位置已经被Derive::f()函数地址所取代,于是在实际调用发生时,是Derive::f()被调用了。这就实现了多态。

1
Base *b = new Derive();
2
b->f();

多重继承(无虚函数覆盖)

设有下面这样一个类的继承关系。注意:子类并没有覆盖父类的函数。
inherit
子类实例中的虚函数表,是下面这个样子:
vtable

我们可以看到:

  1. 每个父类都有自己的虚表。
  2. 子类的成员函数被放到了第一个父类的表中。(所谓的第一个父类是按照声明顺序来判断的)

这样做就是为了解决不同的父类类型的指针指向同一个子类实例,而能够调用到实际的函数。

多重继承(有虚函数覆盖)

设有下面这样一个类的继承关系。 注意:我们在子类中覆盖了父类的f()函数。
inherit
子类实例中的虚函数表,是下面这个样子:
vtable

我们可以看见,三个父类虚函数表中的f()的位置被替换成了子类的函数指针。这样,我们就可以任一静态类型的父类来指向子类,并调用子类的f()了。如:

1
Derive d;
2
3
Base1 *b1 = &d;
4
Base2 *b2 = &d;
5
Base3 *b3 = &d;
6
7
b1->f(); //Derive::f()
8
b2->f(); //Derive::f()
9
b3->f(); //Derive::f()
10
 
11
b1->g(); //Base1::g()
12
b2->g(); //Base2::g()
13
b3->g(); //Base3::g()

安全性

通过父类型的指针访问子类自己的虚函数

我们知道,子类没有重载父类的虚函数是一件毫无意义的事情。因为多态也是要基于函数重载的。虽然在上面的图中我们可以看到Base1的虚表中有Derive的虚函数,但我们根本不可能使用下面的语句来调用子类的自有虚函数:

1
Base1 *b1 = new Derive();
2
b1->f1();  //编译出错

任何妄图使用父类指针想调用子类中的未覆盖父类的成员函数的行为都会被编译器视为非法,所以,这样的程序根本无法编译通过。

但在运行时,我们可以通过指针的方式访问虚函数表来达到违反C++语义的行为。

访问non-public的虚函数

如果父类的虚函数是private或是protected的,但这些非public的虚函数同样会存在于虚函数表中,所以,我们同样可以使用访问虚函数表的方式来访问这些non-public的虚函数,这是很容易做到的。

1
class Base {
2
	private:
3
	    virtual void f() { cout << "Base::f" << endl; }
4
};
5
 
6
class Derive : public Base{
7
};
8
 
9
typedef void(*Fun)(void);
10
 
11
void main() {
12
    Derive d;
13
    Fun  pFun = (Fun)*((int*)*(int*)(&d)+0);
14
    pFun();
15
}
附录:多重继承运行时环境父类型的指针访问子类自己的虚函数
1
#include <iostream>
2
using namespace std;
3
 
4
class Base1 {
5
public:
6
      virtual void f() { cout << "Base1::f" << endl; }
7
      virtual void g() { cout << "Base1::g" << endl; }
8
      virtual void h() { cout << "Base1::h" << endl; }
9
};
10
 
11
class Base2 {
12
public:
13
      virtual void f() { cout << "Base2::f" << endl; }
14
      virtual void g() { cout << "Base2::g" << endl; }
15
      virtual void h() { cout << "Base2::h" << endl; }
16
};
17
 
18
class Base3 {
19
public:
20
      virtual void f() { cout << "Base3::f" << endl; }
21
      virtual void g() { cout << "Base3::g" << endl; }
22
      virtual void h() { cout << "Base3::h" << endl; }
23
};
24
 
25
 
26
class Derive : public Base1, public Base2, public Base3 {
27
public:
28
      virtual void f() { cout << "Derive::f" << endl; }
29
      virtual void g1() { cout << "Derive::g1" << endl; }
30
};
31
 
32
typedef void(*Fun)(void);
33
 
34
int main() 
35
{
36
      Fun pFun = NULL;
37
38
      Derive d;
39
      int** pVtab = (int**)&d;
40
41
      //Base1's vtable
42
      //pFun = (Fun)*((int*)*(int*)((int*)&d+0)+0);
43
      pFun = (Fun)pVtab[0][0];
44
      pFun();
45
46
      //pFun = (Fun)*((int*)*(int*)((int*)&d+0)+1);
47
      pFun = (Fun)pVtab[0][1];
48
      pFun();
49
50
      //pFun = (Fun)*((int*)*(int*)((int*)&d+0)+2);
51
      pFun = (Fun)pVtab[0][2];
52
      pFun();
53
54
      //Derive's vtable
55
      //pFun = (Fun)*((int*)*(int*)((int*)&d+0)+3);
56
      pFun = (Fun)pVtab[0][3];
57
      pFun();
58
59
      //The tail of the vtable
60
      pFun = (Fun)pVtab[0][4];
61
      cout<<pFun<<endl;
62
63
64
      //Base2's vtable
65
      //pFun = (Fun)*((int*)*(int*)((int*)&d+1)+0);
66
      pFun = (Fun)pVtab[1][0];
67
      pFun();
68
69
      //pFun = (Fun)*((int*)*(int*)((int*)&d+1)+1);
70
      pFun = (Fun)pVtab[1][1];
71
      pFun();
72
73
      pFun = (Fun)pVtab[1][2];
74
      pFun(); 
75
76
      //The tail of the vtable
77
      pFun = (Fun)pVtab[1][3];
78
      cout<<pFun<<endl;
79
80
81
82
      //Base3's vtable
83
      //pFun = (Fun)*((int*)*(int*)((int*)&d+1)+0);
84
      pFun = (Fun)pVtab[2][0];
85
      pFun();
86
87
      //pFun = (Fun)*((int*)*(int*)((int*)&d+1)+1);
88
      pFun = (Fun)pVtab[2][1];
89
      pFun();
90
91
      pFun = (Fun)pVtab[2][2];
92
      pFun(); 
93
94
      //The tail of the vtable
95
      pFun = (Fun)pVtab[2][3];
96
      cout<<pFun<<endl;
97
98
      return 0;
99
}

@Reference