objc-runtime梳理二杂七杂八的笔记

jiezi

5 年前

本文是学习 runtime 过程中的笔记，主要是对象初始化和对象结构这块的，比较细碎，emmm，基本上不太是给人看得。

Class 和 Object 本质上都是结构体。

其定义如下：

typedef struct objc_object *id;
typedef struct objc_class *Class;

struct objc_object {
private:
    isa_t isa;
}

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
}

对于一个 Objc 的类，运行时会有一个唯一的 objc_class 与之对应，这个类的每个实例就是个objc_object。

objc_object有个成员变量 isa，可以先简单理解成指向它对应的Class 的指针，具体内容下面再讲。

objc_class继承自objc_object，它有三个成员变量：

superclass，指向父类，显然。
cache用来缓存实例方法，提高执行效率。
bits存放了所有的实例方法。

此外，它还从 objc_object 继承了 isa，那么，class 的isa指向什么？指向的是 metaclass。metaclass 也是 objc_class 类型的变量，它主要用来存放一个类的类方法。

有以上基本了解后，我们来看这张经典的图：

这张图清晰地展现了 objc 对象的运行时结构：

对象实例的 isa 指向 class
class 的 isa 指向 meta class，class 的 superclass 指向父类
meta class 的 isa 指向 root meta class，meta class 的 superclass 指向父类的 meta class
root class 的 isa 指向 root meta class，root class 的 superclass 指向 nil
root meta class 的 super class 指向 root class，root meta class 的 isa 指向自己

首先来看 objc_object，它只显式声明了一个成员变量isa，早年，isa 直接就是个 Class 类型的变量，指向它的类，而 64 位机器出现后，由于虚拟地址并不需要 64 位这么多的空间，为了提高空间的使用率，使用了 isa_t 这个 union 类型。

这里贴出其在 arm64 下的定义：

union isa_t 
{isa_t() { }
    isa_t(uintptr_t value) : bits(value) { }

    Class cls;
    uintptr_t bits;
    struct {
        uintptr_t nonpointer        : 1;
        uintptr_t has_assoc         : 1;
        uintptr_t has_cxx_dtor      : 1;
        uintptr_t shiftcls          : 33; // MACH_VM_MAX_ADDRESS 0x1000000000
        uintptr_t magic             : 6;
        uintptr_t weakly_referenced : 1;
        uintptr_t deallocating      : 1;
        uintptr_t has_sidetable_rc  : 1;
        uintptr_t extra_rc          : 19;
    };
};

运行时，有一些底层的特殊的类，由于向前兼容的需要，使用了 isa.cls，这就跟 32 位时代的用法是一致的了，这种形式的isa 称为 raw isa。而通常情况下，isa 使用了下面这个结构体，其中 shiftcls 字段存储了 Class 指针，其它字段记录了一些额外信息。

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
}

没什么可说的，单纯地指向父类。

cache 先放一边，我们先看class_data_bits_t bits。

struct class_data_bits_t {

    // Values are the FAST_ flags above.
    uintptr_t bits;
}

bits里面是个 uintptr_t 类型的 bits，uintptr_t 其实就是 unsigned long，我们知道 unsigned long 的长度是平台相关的，在 32 位下是 32 位，在 64 位下是 64 位。

注释中体贴地告诉我们，这个 bits 跟前面的 FAST 标记有关。

以 arm64 为例，来看一下 bits 里面都存了什么：

// 是否是 Swift 类
#define FAST_IS_SWIFT           (1UL<<0)
// 是否有默认的 Retain/Release 等实现
#define FAST_HAS_DEFAULT_RR     (1UL<<1)
// 是否需要 raw isa
#define FAST_REQUIRES_RAW_ISA   (1UL<<2)
// 指向 data 部分的指针
#define FAST_DATA_MASK          0x00007ffffffffff8UL

可以看到，FAST_DATA_MASK存了个数据指针，其它的都是 class 相关的几个标记位。我们来逐一看看这几个字段是如何读写的。

2.1 标记位读写

以 isSwift 这个位为例，来看一下其读写过程：

#define FAST_IS_SWIFT           (1UL<<0)
bool isSwift() {return getBit(FAST_IS_SWIFT);
}

void setIsSwift() {setBits(FAST_IS_SWIFT);
}
bool getBit(uintptr_t bit)
{return bits & bit;}
void setBits(uintptr_t set) 
{
    uintptr_t oldBits;
    uintptr_t newBits;
    do {oldBits = LoadExclusive(&bits);
        newBits = updateFastAlloc(oldBits | set, set);
    } while (!StoreReleaseExclusive(&bits, oldBits, newBits));
}

可以看到下层调用的是 getBit 和setBits。getBit比较简单，一个基本的位运算。

setBits看起来就复杂多了。

LoadExclusive是原子读操作，看代码：

#if __arm64__

static ALWAYS_INLINE
uintptr_t 
LoadExclusive(uintptr_t *src)
{
    uintptr_t result;
    asm("ldxr %x0, [%x1]" 
        : "=r" (result) 
        : "r" (src), "m" (*src));
    return result;
}
#elif __arm__  

static ALWAYS_INLINE
uintptr_t 
LoadExclusive(uintptr_t *src)
{return *src;}
#elif __x86_64__  ||  __i386__

static ALWAYS_INLINE
uintptr_t 
LoadExclusive(uintptr_t *src)
{return *src;}
#else 
#   error unknown architecture
#endif

可以看到，在 arm64 下，LoadExclusive使用了汇编指令 ldxr 保证原子性，在其它平台下都是直接读出对应的值。这是因为，在 arm64 下，默认的变量赋值用的是 ldr 指令，不保证原子性。

参考：ARM Compiler armasm Reference Guide 和对 int 变量赋值的操作是原子的吗？

然后看updateFastAlloc，

#if FAST_ALLOC
    static uintptr_t updateFastAlloc(uintptr_t oldBits, uintptr_t change)
    {if (change & FAST_ALLOC_MASK) {if (((oldBits & FAST_ALLOC_MASK) == FAST_ALLOC_VALUE)  &&  
                ((oldBits >> FAST_SHIFTED_SIZE_SHIFT) != 0)) 
            {oldBits |= FAST_ALLOC;} else {oldBits &= ~FAST_ALLOC;}
        }
        return oldBits;
    }
#else
    static uintptr_t updateFastAlloc(uintptr_t oldBits, uintptr_t change) {return oldBits;}
#endif

注意 FAST_ALLOC 这个宏，其实是常关的。

当它关闭时 updateFastAlloc 不做任何处理。

当它打开时，其实是判断是否是修改 FAST_ALLOC_MASK 这个位，如果是的话，需要满足一定的条件才能改，否则不许改。

再看下面的StoreReleaseExclusive

static ALWAYS_INLINE
bool 
StoreReleaseExclusive(uintptr_t *dst, uintptr_t oldvalue, uintptr_t value)
{return StoreExclusive(dst, oldvalue, value);
}

static ALWAYS_INLINE
bool 
StoreExclusive(uintptr_t *dst, uintptr_t oldvalue, uintptr_t value)
{return __sync_bool_compare_and_swap((void **)dst, (void *)oldvalue, (void *)value);
}

这里使用的 __sync_bool_compare_and_swap，提供了原子的比较和交换，如果*dst == oldValue，就将value 写入*dst。这个函数返回写入成功 / 失败。

参考 gcc 原子操作函数

到这里，前面的 setBits 就完全清楚了：

原子读当前bits
FastAlloc逻辑处理
原子写，如果失败，重试。

2.2 data 部分

class_rw_t* data() {return (class_rw_t *)(bits & FAST_DATA_MASK);
}

可以看到，从 bits 中取出 FAST_DATA_MASK 对应的部分，即 [3, 47] 位。可以看到取出的是 class_rw_t 类型的指针。

struct class_rw_t {
    // Be warned that Symbolication knows the layout of this structure.
    uint32_t flags;
    uint32_t version;

    const class_ro_t *ro;

    method_array_t methods;
    property_array_t properties;
    protocol_array_t protocols;

    Class firstSubclass;
    Class nextSiblingClass;

    char *demangledName;
}
struct class_ro_t {
    uint32_t flags;
    uint32_t instanceStart;
    uint32_t instanceSize;
#ifdef __LP64__
    uint32_t reserved;
#endif

    const uint8_t * ivarLayout;
    
    const char * name;
    method_list_t * baseMethodList;
    protocol_list_t * baseProtocols;
    const ivar_list_t * ivars;

    const uint8_t * weakIvarLayout;
    property_list_t *baseProperties;
};

rw是 read-write，ro 是read-only。class_ro_t存放的是一个类在编译阶段已经完全确定的信息，因此是只读的；class_rw_t存放的则是在运行时仍可以修改的信息，因此是可读写的。

data 部分的 set 很有意思

void setData(class_rw_t *newData)
{assert(!data()  ||  (newData->flags & (RW_REALIZING | RW_FUTURE)));
    // Set during realization or construction only. No locking needed.
    // Use a store-release fence because there may be concurrent
    // readers of data and data's contents.
    uintptr_t newBits = (bits & ~FAST_DATA_MASK) | (uintptr_t)newData;
    atomic_thread_fence(memory_order_release);
    bits = newBits;
}

参考：如何理解 C++11 的六种 memory order？– zlilegion 的回答 – 知乎

理解 C++ 的 Memory Order

ARM64: LDXR/STXR vs LDAXR/STLXR

简而言之，memory-order 是一种保证线程间控制执行顺序的手段，弱于锁但消耗也更小。一般的应用开发中，比较少见。

这里似乎是为了保证 get 操作和 set 操作不被重排。（不是很确定）

cache 里其实是实例方法的缓存，我们来看一下 cache_t 这个结构其实是个哈希表。

这里也算是个比较简单的性能优化手段。在 class_rw_t 中，有存放方法列表，但那是个数组，我们知道数组的查询效率是 O(n)的。因此，把部分常用方法放到一个比较小的哈希表中，就可以大大提高查询效率。

以下笔记基于 objc-750 版本。

首先看 NSObject 的初始化方法，alloc 和 new，最终都会走到 callAlloc 这个函数中。

+ (id)alloc {return _objc_rootAlloc(self);
}

+ (id)new {return [callAlloc(self, false/*checkNil*/) init];
}

id
_objc_rootAlloc(Class cls)
{return callAlloc(cls, false/*checkNil*/, true/*allocWithZone*/);
}

callAlloc这个函数比较长，一点点来看。

static ALWAYS_INLINE id
callAlloc(Class cls, bool checkNil, bool allocWithZone=false)
{if (slowpath(checkNil && !cls)) return nil;
    if (fastpath(!cls->ISA()->hasCustomAWZ())) {
        // No alloc/allocWithZone implementation. Go straight to the allocator.
        // fixme store hasCustomAWZ in the non-meta class and 
        // add it to canAllocFast's summary
        if (fastpath(cls->canAllocFast())) {
            // No ctors, raw isa, etc. Go straight to the metal.
            bool dtor = cls->hasCxxDtor();
            id obj = (id)calloc(1, cls->bits.fastInstanceSize());
            if (slowpath(!obj)) return callBadAllocHandler(cls);
            obj->initInstanceIsa(cls, dtor);
            return obj;
        }
        else {
            // Has ctor or raw isa or something. Use the slower path.
            id obj = class_createInstance(cls, 0);
            if (slowpath(!obj)) return callBadAllocHandler(cls);
            return obj;
        }
    }

    // No shortcuts available.
    if (allocWithZone) return [cls allocWithZone:nil];
    return [cls alloc];
}

slowpath和fastpath，可以看到这两个宏是：

#define fastpath(x) (__builtin_expect(bool(x), 1))
#define slowpath(x) (__builtin_expect(bool(x), 0))

__builtin_expect可以参考__builtin_expect 说明，简而言之，它通过预测其中的值进行非常底层的性能优化，不影响逻辑。

cls->ISA()->hasCustomAWZ()，AWZ 是 ”AllocWithZone” 的缩写，可知这里是判断当前 class 是否有自定义的 allocWithZone 方法。当然，通常没有人会去干预对象的内存分配。

如果有自定义的 allocWithZone，则调用 class 的allocWithZone 或alloc。

当没有自定义的 allocWithZone 时，cls->canAllocFast()看起来是判断是否能够快速初始化的。点进去发现这个功能目前是关闭的。移除无关代码后逻辑如下：

#if !__LP64__
#elif 1
#else
// summary bit for fast alloc path: !hasCxxCtor and 
//   !instancesRequireRawIsa and instanceSize fits into shiftedSize
#define FAST_ALLOC              (1UL<<2)
#endif

#if FAST_ALLOC
#else
    bool canAllocFast() {return false;}
#endif

那么，剩下的部分就很明了了：通过 class_createInstance 创建 obj 并返回，如果创建失败就走callBadAllocHandler。

id 
class_createInstance(Class cls, size_t extraBytes)
{return _class_createInstanceFromZone(cls, extraBytes, nil);
}

static __attribute__((always_inline)) 
id
_class_createInstanceFromZone(Class cls, size_t extraBytes, void *zone, 
                              bool cxxConstruct = true, 
                              size_t *outAllocatedSize = nil)
{if (!cls) return nil;

    assert(cls->isRealized());

    // Read class's info bits all at once for performance
    bool hasCxxCtor = cls->hasCxxCtor();
    bool hasCxxDtor = cls->hasCxxDtor();
    bool fast = cls->canAllocNonpointer();

    size_t size = cls->instanceSize(extraBytes);
    if (outAllocatedSize) *outAllocatedSize = size;

    id obj;
    if (!zone  &&  fast) {obj = (id)calloc(1, size);
        if (!obj) return nil;
        obj->initInstanceIsa(cls, hasCxxDtor);
    } 
    else {if (zone) {obj = (id)malloc_zone_calloc ((malloc_zone_t *)zone, 1, size);
        } else {obj = (id)calloc(1, size);
        }
        if (!obj) return nil;

        // Use raw pointer isa on the assumption that they might be 
        // doing something weird with the zone or RR.
        obj->initIsa(cls);
    }

    if (cxxConstruct && hasCxxCtor) {obj = _objc_constructOrFree(obj, cls);
    }

    return obj;
}

两个关键的变量，hasCxxCtor和hasCxxDtor，其定义如下：

// class or superclass has .cxx_construct implementation
#define RW_HAS_CXX_CTOR       (1<<18)
// class or superclass has .cxx_destruct implementation
#define RW_HAS_CXX_DTOR       (1<<17)

参考 iOS :“.cxx_destruct”– a hidden selector in my class，gcc – -fobjc-call-cxx-cdtors 这两个玩意儿一开始是 objc++ 中用来处理 c ++ 成员变量的构造和析构的，后来 .cxx_destruct 也用来处理 ARC 下的内存释放。

下一句，bool fast = cls->canAllocNonpointer();

isa这个变量应该熟悉，它是 objc_object 的成员，早些年，它是个单纯的 Class 类型的变量，指向这个对象的 Class。后来为了节省 64 位机器上的空间，它被赋予了更多的内容，即isa_t 类型。

isa_t是个 union，其定义如下：(这里取了 arm64 下的定义)

union isa_t 
{isa_t() { }
    isa_t(uintptr_t value) : bits(value) { }

    Class cls;
    uintptr_t bits;
    struct {
        uintptr_t nonpointer        : 1;
        uintptr_t has_assoc         : 1;
        uintptr_t has_cxx_dtor      : 1;
        uintptr_t shiftcls          : 33; // MACH_VM_MAX_ADDRESS 0x1000000000
        uintptr_t magic             : 6;
        uintptr_t weakly_referenced : 1;
        uintptr_t deallocating      : 1;
        uintptr_t has_sidetable_rc  : 1;
        uintptr_t extra_rc          : 19;
    };
};

可以看到，新的isa_t，如果填充的是isa.cls，就跟原来一样，如果填充的是其中的 struct，则是新的方式了。

这里，旧的 isa 被称为 raw isa，新的 isa 被称为 nonpointer isa。

后面的逻辑比较清晰，根据 cls 中记录的 size 申请内存，然后调用 initIsa 初始化 isa。注意这里的 size 其实是 isa 和成员变量所需空间的总和。

最后，如果存在 c ++ 构造函数，调用之。

这里看一下 initIsa 的过程

inline void 
objc_object::initIsa(Class cls, bool nonpointer, bool hasCxxDtor) 
{assert(!isTaggedPointer()); 
    
    if (!nonpointer) {isa.cls = cls;} else {assert(!DisableNonpointerIsa);
        assert(!cls->instancesRequireRawIsa());

        isa_t newisa(0);

#if SUPPORT_INDEXED_ISA
        assert(cls->classArrayIndex() > 0);
        newisa.bits = ISA_INDEX_MAGIC_VALUE;
        // isa.magic is part of ISA_MAGIC_VALUE
        // isa.nonpointer is part of ISA_MAGIC_VALUE
        newisa.has_cxx_dtor = hasCxxDtor;
        newisa.indexcls = (uintptr_t)cls->classArrayIndex();
#else
        newisa.bits = ISA_MAGIC_VALUE;
        // isa.magic is part of ISA_MAGIC_VALUE
        // isa.nonpointer is part of ISA_MAGIC_VALUE
        newisa.has_cxx_dtor = hasCxxDtor;
        newisa.shiftcls = (uintptr_t)cls >> 3;
#endif

        // This write must be performed in a single store in some cases
        // (for example when realizing a class because other threads
        // may simultaneously try to use the class).
        // fixme use atomics here to guarantee single-store and to
        // guarantee memory order w.r.t. the class index table
        // ...but not too atomic because we don't want to hurt instantiation
        isa = newisa;
    }
}

有个 SUPPORT_INDEXED_ISA 的宏，上面已经说过了 raw isa 和 nonpointer isa，其中，nonpointer isa 在不同平台的结构体是不太一样的，主要又分为 indexed isa 和 packed isa，indexed isa 是用在 iWatch 上的，iWatch 情况比较特殊，为了节省内存，大体上类似在 64 位 CPU 上跑 32 位程序。

isa 的初始化看起来也很简单，直接写死了一个 Magic number 进行初始化，可以参考一下对象是如何初始化的（iOS），对应到 isa 的 struct 上，其实就是给 indexed 和 magic 两个字段赋值。indexed 上面已经讲了，magic 则是用来标记当前的 isa 是否已经初始化了。

isa 经过 magic number 初始化后，写入了两个变量：hasCxxDtor 和 shiftcls。

hasCxxDtor 用于标记是否需要处理析构函数，而 shiftcls 则真正存储了 class 的地址。这里右移 3 位的原因是，这里指针是按照 8bit 对齐的，后 3 位必然是 0。

性能优化手段__builtin_expect，runtime 中包装了fastpath、slowpath
hasCxxCtor和hasCxxDtor，跟 objc++ 和 ARC 有关，编译器插入的构造和析构函数
raw isa 和 nonpointer isa
- raw isa 就是个 class 指针
- nonpointer isa 则赋值为结构体，其中 class 指针存在 shiftcls，还存了别的信息
主要流程：
1. 申请内存空间
2. 初始化 isa
3. 执行构造函数

objc-runtime梳理二杂七杂八的笔记

对象基本结构

具体内容

objc_object

objc_class

1. superclass

2.bits

2.1 标记位读写

2.2 data 部分

3. cache

Objc 对象初始化学习笔记

小结