本文使用的 runtime 版本为 objc4-706

Runtime 的入口,是可以在 objc-os.mm 找到的函数 _objc_init,它被 libSystem 调用,进行初始化和加载 image,所谓 image,在 macOS 和 iOS 上来说,就是 Mach-O 文件。

void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;
    
    // fixme defer initialization until an objc-using image is found?
    environ_init();
    tls_init();
    static_init();
    lock_init();
    exception_init();

    _dyld_objc_notify_register(&map_2_images, load_images, unmap_image);
}

在上面 _objc_init 的代码中,除去初始化函数的调用,最后一行是对 _dyld_objc_notify_register 的调用,这个函数接收三个函数当作回调,传递 image 信息让其自行处理。

其中 map_2_images 函数就是 image 中的 Objective-C 类的信息进入 runtime 发生的地方:

void
map_2_images(unsigned count, const char * const paths[],
             const struct mach_header * const mhdrs[])
{
    rwlock_writer_t lock(runtimeLock);
    return map_images_nolock(count, paths, mhdrs);
}

map_2_images 函数在加锁后就转向了 map_images_nolock 函数:

void 
map_images_nolock(unsigned mhCount, const char * const mhPaths[],
                  const struct mach_header * const mhdrs[])
{    
    ...
    
    if (hCount > 0) {
        _read_images(hList, hCount, totalClasses, unoptimizedTotalClasses);
    }
    
    ...
}

map_images_nolock 函数中,它检查传入的每个 image,如果 image 有 runtime 需要的信息,就将它记录在 hList 中,并将 hCount 加一,最终判断 hCount 来决定继续读取 image 中的数据,即 _read_images 函数:

void _read_images(header_info **hList, uint32_t hCount, int totalClasses, int unoptimizedTotalClasses)
{
    ...
    
#define EACH_HEADER \
    hIndex = 0;         \
    hIndex < hCount && (hi = hList[hIndex]); \
    hIndex++

    ...
    
    // Discover classes. Fix up unresolved future classes. Mark bundle classes.

    for (EACH_HEADER) {
        if (! mustReadClasses(hi)) {
            // Image is sufficiently optimized that we need not call readClass()
            continue;
        }

        bool headerIsBundle = hi->isBundle();
        bool headerIsPreoptimized = hi->isPreoptimized();

        classref_t *classlist = _getObjc2ClassList(hi, &count);
        for (i = 0; i < count; i++) {
            Class cls = (Class)classlist[i];
            Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);

            if (newCls != cls  &&  newCls) {
                // Class was moved but not deleted. Currently this occurs 
                // only when the new class resolved a future class.
                // Non-lazily realize the class below
                resolvedFutureClasses = (Class *)
                    realloc(resolvedFutureClasses, 
                            (resolvedFutureClassCount+1) * sizeof(Class));
                resolvedFutureClasses[resolvedFutureClassCount++] = newCls;
            }
        }
    }
    
    ...
    
    // Realize non-lazy classes (for +load methods and static instances)
    for (EACH_HEADER) {
        classref_t *classlist = 
            _getObjc2NonlazyClassList(hi, &count);
        for (i = 0; i < count; i++) {
            Class cls = remapClass(classlist[i]);
            if (!cls) continue;
            realizeClass(cls);
        }
    }

    ...
    
#undef EACH_HEADER
}

_read_images 函数中有两段关于类加载的部分。第一部分将每个 image 里的类信息都经过 readClass 函数处理一遍,将类的名字和类本身存到一个表里,这样就可以使用 objc_getClass 函数通过名字来获取类了。第二部分让 image 里的类经 realizedClass 函数处理一遍,先回顾一下类的定义:

struct class_ro_t {
    uint32_t flags;
    uint32_t instanceStart;
    uint32_t instanceSize;
#ifdef __LP64__
    uint32_t reserved;
#endif

    const uint8_t * ivarLayout;
    
    const char * name;
    method_list_t * baseMethodList;
    protocol_list_t * baseProtocols;
    const ivar_list_t * ivars;

    const uint8_t * weakIvarLayout;
    property_list_t *baseProperties;

    method_list_t *baseMethods() const {
        return baseMethodList;
    }
};

struct class_rw_t {
    uint32_t flags;
    uint32_t version;

    const class_ro_t *ro;

    method_array_t methods;
    property_array_t properties;
    protocol_array_t protocols;

    Class firstSubclass;
    Class nextSiblingClass;

    char *demangledName;
    
    ...
    
};

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags

    class_rw_t *data() { 
        return bits.data();
    }
    
    ...

};

可以看到定义中,objc_classdata 方法返回类型是 class_rw_t *,但是在 image 中的类信息,data 返回的实际上是 class_ro_t *,这就需要 realizeClass 函数进行一些适当的操作将这些数据整对:

static Class realizeClass(Class cls)
{
    ...

    ro = (const class_ro_t *)cls->data();
    if (ro->flags & RO_FUTURE) {
        // This was a future class. rw data is already allocated.
        rw = cls->data();
        ro = cls->data()->ro;
        cls->changeInfo(RW_REALIZED|RW_REALIZING, RW_FUTURE);
    } else {
        // Normal class. Allocate writeable class data.
        rw = (class_rw_t *)calloc(sizeof(class_rw_t), 1);
        rw->ro = ro;
        rw->flags = RW_REALIZED|RW_REALIZING;
        cls->setData(rw);
    }
    
    ...

    supercls = realizeClass(remapClass(cls->superclass));
    metacls = realizeClass(remapClass(cls->ISA()));

    // Update superclass and metaclass in case of remapping
    cls->superclass = supercls;
    cls->initClassIsa(metacls);

    // Reconcile instance variable offsets / layout.
    // This may reallocate class_ro_t, updating our ro variable.
    if (supercls  &&  !isMeta) reconcileInstanceVariables(cls, supercls, ro);

    // Set fastInstanceSize if it wasn't set already.
    cls->setInstanceSize(ro->instanceSize);

    // Copy some flags from ro to rw
    if (ro->flags & RO_HAS_CXX_STRUCTORS) {
        cls->setHasCxxDtor();
        if (! (ro->flags & RO_HAS_CXX_DTOR_ONLY)) {
            cls->setHasCxxCtor();
        }
    }

    // Connect this class to its superclass's subclass lists
    if (supercls) {
        addSubclass(supercls, cls);
    } else {
        addRootClass(cls);
    }

    // Attach categories
    methodizeClass(cls);

    return cls;
}

realizeClass 函数会给类创建 class_rw_t * 的数据,并将 ro 里的一些东西拷贝过来,顺带给父类和元类也 realizeClass 一下。最后使用 methodizeClass 函数将 ro 中方法、属性这些信息也拷贝到类的 class_rw_t * 中来。

总结

总的来说,类需要先从 Mach-O 文件中 map 过来,再进行 realize。但其中环绕太多细节,这篇虎头蛇尾的文章就算一个随意的记录了。

参考