Hc1m1

IoT固件Fuzz：从Harness编写到QEMU适配

Thu, 13 Nov 2025 09:04:25 GMT

本文旨在探讨一种针对 IoT 设备的 AFL++ Fuzz 新方案。

Harness 编写

目前大部分 Fuzz 工具仅支持标准输入或命令行参数作为输入，而 IoT 设备 Fuzz 的主要对象为网络程序，需通过 Socket 进行输入输出，这构成了技术难点。

在掌握 Harness 编写技术后，可利用该方案对 IoT 设备的 Socket 通信程序进行 Fuzz 测试。

本文以 ASUS RT-N56U 设备为例进行阐述。目标 Fuzz 程序为 httpd，通过逆向分析可知，处理 HTTP 流程的代码位于 handle_request 函数中，该函数的第一个参数是 Socket 文件描述符，第二个参数是一个包含连接信息的结构体。

在 handle_request 函数中，通过 fgets 函数获取 HTTP 请求，如下所示：

  if ( !fgets(v95, 4096, a1) )
  {
    v4 = "Bad Request";
    v5 = "No request found.";
LABEL_14:
    v6 = 400;
LABEL_15:
    v7 = 0;
    return send_error(v6, v4, v7, v5, a1);
  }

基于此，可以构建如下思路：

Hook httpd 的 main 函数。
将 HTTP 请求置于文件中，文件名作为 httpd 参数输入。
伪造一个可读写的文件描述符，将输入的 HTTP 请求写入该描述符，供 httpd 读取。
伪造 handle_request 所需的参数。

基于上述逻辑，编写 Fuzz 函数如下：

void fuzz(const char *filename)
{
    int memfd;
    printf("do fuzz(%s)\n", filename);
    chdir(currentPWD);

    memfd = create_memfd_from_file(filename);
    if (memfd == -1) {
        perror("create_memfd_from_file error.");
        return;
    }

    FILE* single_handle = fdopen(memfd, "r+");
    if (!single_handle) {
        perror("fdopen error.");
        close(memfd);
        return;
    }

    chdir("/www");
    conn_item_t fake_item;
    fake_item.fd = memfd;
    fake_item.usa.sa_in.sin_family = AF_INET;
    fake_item.usa.sa_in.sin_port = htons(12345);
    inet_pton(AF_INET, "192.168.1.10", &fake_item.usa.sa_in.sin_addr);
    handle_request(single_handle, &fake_item);
    if (getenv("DEBUG")) {
        if (fseek(single_handle, 0, SEEK_SET) != 0) {
            perror("fseek single_handle before dump");
        } else {
            char buf[4096];
            int write_failed = 0;
            for (;;) {
                size_t r = fread(buf, 1, sizeof(buf), single_handle);
                if (r == 0) {
                    if (ferror(single_handle)) {
                        perror("fread single_handle dump");
                        clearerr(single_handle);
                    }
                    break;
                }
                size_t off = 0;
                while (off < r) {
                    ssize_t w = write(STDOUT_FILENO, buf + off, r - off);
                    if (w < 0) {
                        perror("write stdout dump");
                        write_failed = 1;
                        break;
                    }
                    off += (size_t)w;
                }
                if (write_failed) {
                    break;
                }
            }
            if (write_failed) {
                // 如果写失败，确保后续读取状态被清理
                clearerr(single_handle);
            }
        }
    }
}

首先，Fuzz 函数的参数源于命令行参数。接着，编写 create_memfd_from_file 函数，将文件内容转换为可读写的文件描述符，实现逻辑如下：

// 兼容封装：优先使用 memfd_create 系统调用；不可用时回退到匿名临时文件
static int memfd_create_compat(const char *name, unsigned int flags)
{
#ifdef SYS_memfd_create
    return (int)syscall(SYS_memfd_create, name, flags);
#elif defined(__NR_memfd_create)
    return (int)syscall(__NR_memfd_create, name, flags);
#else
    // Fallback: 使用匿名临时文件模拟（并非真正的 memfd，但可读写且不落磁盘路径）
    FILE *tf = tmpfile();
    if (!tf) return -1;
    int fd = dup(fileno(tf));
    fclose(tf);
    return fd;
#endif
}

// 从文件读入全部内容到匿名内存文件（memfd），并回到偏移 0，返回该 fd
static int create_memfd_from_file(const char *filename)
{
    int fd = memfd_create_compat("harness_mem", MFD_CLOEXEC);
    if (fd < 0) {
        perror("memfd_create");
        return -1;
    }
    
    int file_fd = open(filename, O_RDONLY);
    if (file_fd < 0) {
        perror("open file");
        close(fd);
        return -1;
    }
    
    char buf[8192];
    for (;;) {
        ssize_t r = read(file_fd, buf, sizeof(buf));
        if (r == 0) break; // EOF
        if (r < 0) {
            if (errno == EINTR) continue;
            perror("read file");
            close(file_fd);
            close(fd);
            return -1;
        }
        ssize_t off = 0;
        while (off < r) {
            ssize_t w = write(fd, buf + off, r - off);
            if (w < 0) {
                if (errno == EINTR) continue;
                perror("write memfd");
                close(file_fd);
                close(fd);
                return -1;
            }
            off += w;
        }
    }
    close(file_fd);
    
    if (lseek(fd, 0, SEEK_SET) < 0) {
        perror("lseek memfd");
        close(fd);
        return -1;
    }
    return fd;
}

随后，使用 fdopen 函数将 int 类型的文件描述符转换为 FILE* 类型。最后构造 fake_item 结构体，作为 handle_request 的第二个参数，fake_item 结构体定义如下：

struct qm_trace {
char * lastfile;
int lastline;
char * prevfile;
int prevline;
};

#defineTRACEBUFstruct qm_trace trace;
#defineTAILQ_ENTRY(type)\
struct {\
struct type *tqe_next;/* next element */\
struct type **tqe_prev;/* address of previous next element */\
TRACEBUF\
}

typedef union {
    struct sockaddr sa;
    struct sockaddr_in sa_in;
#if defined (USE_IPV6)
    struct sockaddr_in6 sa_in6;
#endif
} usockaddr;

typedef struct conn_item {
    TAILQ_ENTRY(conn_item) entry;
    int fd;
#if defined (SUPPORT_HTTPS)
    int ssl;
#endif
    usockaddr usa;
} conn_item_t;

此外，在调试模式下，还需支持输出 HTTP 请求结果。

Hook main 函数的方法如下：

int __uClibc_main(
    int (*main)(int, char **, char **),
    int argc,
    char **argv,
    void (*app_init)(void),
    void (*app_fini)(void),
    void (*rtld_fini)(void),
    void *stack_end) {

    // debug
    printf("do __uClibc_main(argc=%d, argv=%p)\n", argc, argv);

    if (!uClibc_main_orig) {
        LOG("dlsym(RTLD_NEXT, __uClibc_main_orig) failed: %s\n", dlerror());
        _exit(1);
    }
    LOG("uClibc_main_orig = %p\n", uClibc_main_orig);
    // ... and call it with our custom main function
    return uClibc_main_orig(main_hook, argc, argv, app_init, app_fini, rtld_fini, stack_end);
}

// 在 .init 段执行的 constructor
__attribute__((constructor))
static void harness_init(void)
{
    LOG("constructor executed: harness.so loaded\n");
    uClibc_main_orig = dlsym(RTLD_NEXT, "__uClibc_main");
    if (!uClibc_main_orig) {
        LOG("dlsym(RTLD_NEXT, __uClibc_main_orig) failed: %s\n", dlerror());
    } else {
        LOG("dlsym(RTLD_NEXT, __uClibc_main_orig) success: %p\n", uClibc_main_orig);
    }
    
}

由于大多数 IoT 设备使用 uClibc 库，因此需要 Hook __uClibc_main 函数。

针对不同设备，需根据具体情况和架构进行差异化处理。在本例中，目标 httpd 程序在监听 Web 端口之前，也会执行初始化操作，如设置管理员账号密码等，因此需执行同样的初始化动作。如下所示：

typedef void* (*HANDLE_RESET_LOGIN_DATA) (void);
HANDLE_RESET_LOGIN_DATA handle_reset_login_data = (HANDLE_RESET_LOGIN_DATA) 0x402ED8;
typedef void* (*HANDLE_LOAD_NVRAM_AUTH) (void);
HANDLE_LOAD_NVRAM_AUTH handle_load_nvram_auth = (HANDLE_LOAD_NVRAM_AUTH) 0x402DE0;

void init() {
    // httpd处理请求前的初始化
    handle_reset_login_data();
    handle_load_nvram_auth();
    currentPWD = malloc(MAX_PATH);
    if (getcwd(currentPWD, MAX_PATH) != NULL) {
        printf("Current working directory: %s\n", currentPWD);
    } else {
        perror("Error getting current working directory");
    }
    
}

Harness 编写思路至此结束，后续需针对具体 IoT 程序进行差异化操作，以确保程序正常运行。

处理 Fuzz 程序依赖问题

本例中，httpd 仅需解决 NVRAM 依赖问题。IoT 设备通常使用 NVRAM 存储配置信息，但运行 Fuzz 的主机通常不包含 NVRAM 驱动，不过部分操作系统可能存在 NVRAM 驱动包，可自行安装。为使 httpd 正常使用 NVRAM，存在以下三种方案：

若操作系统存在 NVRAM 包，可直接安装并尝试适配使用，该方案最为简便。
当默认 NVRAM 驱动与 IoT 设备不匹配时，可通过逆向 IoT 固件中的 NVRAM 驱动，参考其代码，借助 AI 编写适配当前机器的 NVRAM 驱动。
可通过逆向 IoT 固件中的 libnvram 共享库，参考其代码，Hook 核心函数，如：nvram_get, nvram_set 等。

接下来，需获取设备的配置文件，或导出设备上的 NVRAM 数据并导入当前机器，以实现更真实的仿真。

鉴于代码篇幅较长，此处仅展示用法，如下所示：

$ hexdump -C /var/lib/soft_nvram.bin |tail
00001fd0  74 73 70 3d 30 00 6e 66  5f 61 6c 67 5f 73 69 70  |tsp=0.nf_alg_sip|
00001fe0  3d 30 00 70 72 65 66 65  72 72 65 64 5f 6c 61 6e  |=0.preferred_lan|
00001ff0  67 3d 45 4e 00 6c 6f 67  69 6e 5f 74 69 6d 65 73  |g=EN.login_times|
00002000  74 61 6d 70 3d 33 33 30  38 38 35 39 00 00 00 00  |tamp=3308859....|
00002010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
00010000
$ make
make -C /lib/modules/6.1.0-37-amd64/build M=/home/debian/nvram_driver modules
make[1]: Entering directory '/usr/src/linux-headers-6.1.0-37-amd64'
  CC [M]  /home/debian/nvram_driver/soft_nvram.o
  MODPOST /home/debian/nvram_driver/Module.symvers
  CC [M]  /home/debian/nvram_driver/soft_nvram.mod.o
  LD [M]  /home/debian/nvram_driver/soft_nvram.ko
  BTF [M] /home/debian/nvram_driver/soft_nvram.ko
Skipping BTF generation for /home/debian/nvram_driver/soft_nvram.ko due to unavailability of vmlinux
make[1]: Leaving directory '/usr/src/linux-headers-6.1.0-37-amd64'
$ sudo insmod soft_nvram.ko backing_path=/var/lib/soft_nvram.bin
$ cd romfs
$ export QEMU_LD_PREFIX="."
$ afl-qemu-trace ./usr/sbin/nvram get http_username
./usr/sbin/nvram: cache '/etc/ld.so.cache' is corrupt
admin

Patch QEMU

通常情况下，上述 NVRAM 程序尚无法正常运行，因仍缺关键一步。该架构下的 NVRAM 驱动调用需使用 ioctl，而 QEMU 对 ioctl 调用有独立处理逻辑，并非默认使用系统调用。QEMU 默认无法识别 NVRAM 的 ioctl 调用方法。因此，需对 QEMU 进行相应修改，经研究，较为简便的修改方案如下：

diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index 3b41128fd7..e8b636badc 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -758,3 +758,17 @@
 #ifdef TUNGETDEVNETNS
   IOCTL(TUNGETDEVNETNS,  IOC_R, TYPE_NULL)
 #endif
+
+// nvram
+#define NVRAM_IOCTL_CLEAR 0x14
+#define TARGET_NVRAM_IOCTL_CLEAR NVRAM_IOCTL_CLEAR
+#define NVRAM_IOCTL_COMMIT  0xA
+#define TARGET_NVRAM_IOCTL_COMMIT NVRAM_IOCTL_COMMIT
+#define NVRAM_IOCTL_GET   0x28
+#define TARGET_NVRAM_IOCTL_GET NVRAM_IOCTL_GET
+#define NVRAM_IOCTL_SET   0x1e
+#define TARGET_NVRAM_IOCTL_SET NVRAM_IOCTL_SET
+IOCTL(NVRAM_IOCTL_COMMIT, 0, TYPE_NULL)
+IOCTL(NVRAM_IOCTL_CLEAR,  0, TYPE_NULL)
+IOCTL(NVRAM_IOCTL_SET, IOC_W, MK_PTR(MK_STRUCT(STRUCT_anvram_ioctl_t)))
+IOCTL(NVRAM_IOCTL_GET, IOC_RW, MK_PTR(MK_STRUCT(STRUCT_anvram_ioctl_t)))

diff --git a/linux-user/syscall_types.h b/linux-user/syscall_types.h
index 6dd7a80ce5..e82b654df3 100644
--- a/linux-user/syscall_types.h
+++ b/linux-user/syscall_types.h
@@ -642,3 +642,11 @@ STRUCT(usbdevfs_disconnect_claim,
         TYPE_INT, /* flags */
         MK_ARRAY(TYPE_CHAR, USBDEVFS_MAXDRIVERNAME + 1)) /* driver */
 #endif /* CONFIG_USBFS */
+
+STRUCT(anvram_ioctl_t,
+       TYPE_INT, // size
+       TYPE_INT, // is_temp
+       TYPE_INT, // len_param
+       TYPE_INT, // len_value
+       TYPE_PTRVOID, // param
+       TYPE_PTRVOID) // value

然而，测试发现 QEMU 5.X 版本中 NVRAM 读写操作会因未知原因失败，而 QEMU 10.X 版本则能成功运行。鉴于 QEMU 代码库庞大，排查难度较高，因此考虑采用 QEMU 10.X 进行 AFL Fuzz。

目前公开的 QEMUAFL 支持的最高版本为 QEMU 5.X。若要使用 QEMU 10.X，需自行进行 Patch 适配。相关的 Patch 方案及过程将在后续文章中进行分享。

QEMU v10 适配 AFL：架构变更与 MIPS 延迟槽 Bug 分析

Thu, 13 Nov 2025 09:04:25 GMT

本文将探讨将 qemuafl 的补丁应用到 QEMU v10.x 版本时可能遇到的困难及解决方案。

前文分析了 AFL++ 对 QEMU 5.X 版本的补丁内容，以及在 QEMU 5.X 版本中通过 ioctl 操作 nvram 时可能出现的未知 Bug。鉴于此，计划将 qemuafl 的补丁迁移至 QEMU 的最新版本中。

QEMU 当前最新版本为：v10.1.3。

在尝试将 qemuafl 的补丁迁移至该版本时，遇到了因 QEMU 架构变更导致的严重问题。

在新版本的 QEMU 中，为了加速文件的编译速度，对代码结构进行了区分。例如，一部分代码的编译只依赖主机架构，而另一部分代码的编译则依赖目标架构。

以 accel/tcg 目录下的代码为例，在最新版的 QEMU 中，该部分被认为仅需考虑主机架构，因此只需编译一次。例如在首次编译 mips 架构的 QEMU 后，再次编译 arm 架构时，这部分代码无需重新编译，仅需编译与架构相关的代码。

然而，qemuafl 的补丁在仅需考虑主机架构的目录代码中引入了对目标架构参数的依赖，从而导致编译失败。

研究发现，自 v10.1 版本起，QEMU 已完全变更为新架构。若在此版本中添加 AFL 相关补丁，需要进行大量修改。进一步分析发现，v10.0.6 版本的 QEMU 架构尚未完全变更，若在该架构中进行相关补丁适配，可节省大量时间。

以下是需要修改的代码部分说明。

首先是 accel/tcg 目录下的代码，accel/tcg/meson.build 文件的部分代码如下所示：

common_ss.add(when: 'CONFIG_TCG', if_true: files(
  'cpu-exec-common.c',
  'tcg-runtime.c',
  'tcg-runtime-gvec.c',
))
tcg_specific_ss = ss.source_set()
tcg_specific_ss.add(files(
  'tcg-all.c',
  'cpu-exec.c',
  'tb-maint.c',
  'translate-all.c',
  'translator.c',
))

common_ss 集合中的文件表明该部分代码是通用代码，无需考虑目标架构。tcg_specific_ss 集合中的代码为需要考虑目标架构的 tcg 代码。

在 tcg-runtime.h 的代码中，补丁内容大量使用了 tl 类型，如下所示：

1	DEF_HELPER_FLAGS_2(qasan_load1, TCG_CALL_NO_RWG, void, env, tl)

将宏展开后可以发现，tl 类型的定义位于 include/exec/helper-head.h.inc 文件中，如下所示：

#define dh_alias(t) glue(dh_alias_, t)

#ifdef COMPILING_PER_TARGET
# ifdef TARGET_LONG_BITS
#  if TARGET_LONG_BITS == 32
#   define dh_alias_tl i32
#   define dh_typecode_tl dh_typecode_i32
#  else
#   define dh_alias_tl i64
#   define dh_typecode_tl dh_typecode_i64
#  endif
# endif
# define dh_ctype_tl target_ulong
#endif /* COMPILING_PER_TARGET */

dh_alias(t) => dh_alias_tl，该类型只有当存在 COMPILING_PER_TARGET 定义时，才会被声明。

通过最外层的 meson.build 代码可以看到，只有在编译需要考虑目标架构的代码时，才会设置该值，代码如下所示：

...
foreach target : target_dirs
  config_target = config_target_mak[target]
  target_name = config_target['TARGET_NAME']
  target_base_arch = config_target['TARGET_BASE_ARCH']
  arch_srcs = [config_target_h[target]]
  arch_deps = []
  c_args = ['-DCOMPILING_PER_TARGET',
            '-DCONFIG_TARGET="@[email protected]"'.format(target),
  ]
...

因此，需要将 tcg-runtime.c 和 tcg-runtime.h 文件中 AFL 补丁的代码提取出来，放入 tcg-runtime_afl.c 和 tcg-runtime_afl.h 文件中。

在这部分的补丁代码中，受影响最大的是 TCP Helper 函数，该部分代码的实现逻辑在前文中已说明，此处不再赘述。

在 tcg-runtime_afl.c 文件中，代码主要是 Helper 函数的实现，为了防止 missing-prototypes 错误，需要添加以下头文件：

1
2
3

#define HELPER_H  "accel/tcg/tcg-runtime_afl.h"
#include "exec/helper-proto.h.inc"
#undef  HELPER_H

经测试，需包含的最简头文件如下所示：

#include "qemu/osdep.h"
#include "qemu/host-utils.h"
#include "exec/cpu-common.h"
#include "user/page-protection.h"
#include "accel/tcg/getpc.h"
#include "user/abitypes.h"
#include "exec/cpu-all.h"

#include "qemuafl/common.h"
#include "tcg/tcg-op.h"
#include "qemuafl/qemu-ijon-support.h"

#define HELPER_H  "accel/tcg/tcg-runtime_afl.h"
#include "exec/helper-proto.h.inc"
#include "exec/helper-info.c.inc"
#undef  HELPER_H

最后修改 accel/tcg/meson.build，如下所示：

tcg_specific_ss.add(files(
  'tcg-all.c',
  'cpu-exec.c',
  'tb-maint.c',
  'translate-all.c',
  'translator.c',
  'tcg-runtime_afl.c'
))

QASAN核心代码位于tcg目录下，该部分代码也发生了变化，代码从tcg/tcg-op.c文件迁移到了tcg/tcg-op-ldst.c文件中。并且tcg目录下的代码都为通用代码，不考虑目标架构，但是AFL patch的头文件需要考虑目标架构，因此还需要对头文件进行修改。

build_qemu_support.sh 兼容性修改

QEMU v10.0.6 不存在以下配置，需从 build_qemu_support.sh 文件中删除：

--disable-blobs
--disable-live-block-migration
--disable-sheepdog
--disable-vhost-scsi
--disable-vhost-vsock
--disable-vnc-png
--disable-xfsctl

qemuafl/asan-giovese-inl.h

在 qemuafl/asan-giovese-inl.h 文件中需要添加一个声明，如下所示：

1 2	void queue_signal(CPUArchState env, int sig, int si_type, target_siginfo_t info);

函数变化

tcg_const_tl -> tcg_constant_tl
tcg_const_ptr->tcg_constant_ptr
tcg_constant_tl 得到的值不再需要使用 tcg_temp_free 释放，该 free 函数也已不再存在。因此需要搜索所有 tcg_temp_free 调用并将其注释。考虑到 AFL 补丁代码中该部分可能仍需释放，因此进行了以下修改：

// include/tcg/tcg-op.h
#include "tcg/tcg-temp-internal.h"
#if TARGET_LONG_BITS == 32
typedef TCGv_i32 TCGv;
#define tcg_temp_new() tcg_temp_new_i32()
#define tcg_temp_free(v) tcg_temp_free_i32(v)    // 添加32位的free
#define tcg_global_mem_new tcg_global_mem_new_i32
#define tcgv_tl_temp tcgv_i32_temp
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
#elif TARGET_LONG_BITS == 64
typedef TCGv_i64 TCGv;
#define tcg_temp_new() tcg_temp_new_i64()
#define tcg_temp_free(v) tcg_temp_free_i64(v)  // 添加64位的free
#define tcg_global_mem_new tcg_global_mem_new_i64
#define tcgv_tl_temp tcgv_i64_temp
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
#else
#error Unhandled TARGET_LONG_BITS value
#endif

修复编译警告

针对一些编译警告的修复如下：

../accel/tcg/cpu-exec.c:586:10: warning: no previous prototype for ‘ijon_simple_hash’ [-Wmissing-prototypes]
  586 | uint64_t ijon_simple_hash(uint64_t x) {
      |          ^~~~~~~~~~~~~~~~
// 在accel/tcg/cpu-exec.c实现的一些函数缺少声明
// 在qemuafl/common.h添加以下声明
uint64_t ijon_simple_hash(uint64_t x);
uint32_t ijon_hashint(uint32_t old, uint32_t val);
uint32_t ijon_hashstr(uint32_t old, char *val);
void ijon_max_variadic(uint32_t addr, ...);
void ijon_min_variadic(uint32_t addr, ...);
uint32_t ijon_strdist(char *a, char *b);
-----------
../accel/tcg/cpu-exec.c:1342:42: warning: implicit declaration of function ‘open_self_maps’; did you mean ‘free_self_maps’? [-Wimplicit-function-declaration]
 1342 |       if (getenv("AFL_QEMU_DEBUG_MAPS")) open_self_maps(env, 1);
      |                                          ^~~~~~~~~~~~~~
      |                                          free_self_maps
// 在accel/tcg/cpu-exec.c中patch代码使用到open_self_maps函数，该函数声明的头文件未包含。
// open_self_maps函数的实现位于linux-user/syscall.c，并且被声明为静态函数，因此我们需要把该函数的静态声明去除。
// linux-user/syscall.c修改内容：
-static int open_self_maps(CPUArchState *cpu_env, int fd)
+int open_self_maps(CPUArchState *cpu_env, int fd)
// 然后在qemuafl/common.h中添加函数声明
int open_self_maps(CPUArchState *cpu_env, int fd);
------------
./qemuafl/imported/snapshot-inl.h:110:13: warning: ‘afl_snapshot_clean’ defined but not used [-Wunused-function]
  110 | static void afl_snapshot_clean(void) {
      |             ^~~~~~~~~~~~~~~~~~
./qemuafl/imported/snapshot-inl.h:98:12: warning: ‘afl_snapshot_do’ defined but not used [-Wunused-function]
   98 | static int afl_snapshot_do(void) {
      |            ^~~~~~~~~~~~~~~
./qemuafl/imported/snapshot-inl.h:76:13: warning: ‘afl_snapshot_exclude_vmrange’ defined but not used [-Wunused-function]
   76 | static void afl_snapshot_exclude_vmrange(void *start, void *end) {
      |             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
// 有几个函数声明的未使用，暂时注释掉
--------------
../linux-user/syscall.c: In function ‘do_execv’:
../linux-user/syscall.c:8649:23: warning: declaration of ‘p’ shadows a previous local [-Wshadow=local]
 8649 |                 char *p, *q, *r;
      |                       ^
../linux-user/syscall.c:8612:11: note: shadowed declaration is here
 8612 |     void *p;
      |           ^
../linux-user/syscall.c:8649:27: warning: declaration of ‘q’ shadows a previous local [-Wshadow=local]
 8649 |                 char *p, *q, *r;
      |                           ^
../linux-user/syscall.c:8611:12: note: shadowed declaration is here
 8611 |     char **q;
      |            ^
// 还有变量重复声明的问题，改个变量名就好

QEMU V10.1.3 修改方案

主要查看 QEMU V10.1.3 版本的三个配置文件，如下所示：

accel/tcg/meson.build 代码如下所示：

if not have_tcg
   subdir_done()
endif

tcg_ss = ss.source_set()

tcg_ss.add(files(
  'cpu-exec.c',
  'cpu-exec-common.c',
  'tcg-runtime.c',
  'tcg-runtime-gvec.c',
  'tb-maint.c',
  'tcg-all.c',
  'tcg-stats.c',
  'translate-all.c',
  'translator.c',
))
if get_option('plugins')
  tcg_ss.add(files('plugin-gen.c'))
endif

user_ss.add_all(tcg_ss)
system_ss.add_all(tcg_ss)

user_ss.add(files(
  'user-exec.c',
  'user-exec-stub.c',
))

system_ss.add(files(
  'cputlb.c',
  'icount-common.c',
  'monitor.c',
  'tcg-accel-ops.c',
  'tcg-accel-ops-icount.c',
  'tcg-accel-ops-mttcg.c',
  'tcg-accel-ops-rr.c',
  'watchpoint.c',
))

从上述代码可以看出，在当前架构，配置文件不区分 common_ss 通用代码和 tcg_specific_ss tcg 特定架构代码。而是分成 user_ss user 模式编译的代码和 system_ss system 模型编译的代码。

accel/meson.build 代码如下所示：

common_ss.add(files('accel-common.c'))
specific_ss.add(files('accel-target.c'))
system_ss.add(files('accel-system.c', 'accel-blocker.c', 'accel-qmp.c'))
user_ss.add(files('accel-user.c'))

subdir('tcg')
if have_system
  subdir('hvf')
  subdir('qtest')
  subdir('kvm')
  subdir('xen')
  subdir('stubs')
endif

# qtest
system_ss.add(files('dummy-cpus.c'))

meson.build 部分代码如下所示：

user_ss = ss.source_set()
......
libuser = static_library('user',
                         user_ss.all_sources() + genh,
                         c_args: ['-DCONFIG_USER_ONLY',
                                  '-DCOMPILING_SYSTEM_VS_USER'],
                         include_directories: common_user_inc,
                         dependencies: user_ss.all_dependencies(),
                         build_by_default: false)

在当前架构中，accel/tcg/ 目录下的代码在第一次编译的时候都会被编译到 libuser 库中，无需考虑目标架构。但是该目录下有大量 AFL 补丁的代码，且 AFL 的代码有大量需要考虑目标架构。这就导致 AFL 的补丁代码无法直接迁移到该版本中。

不过，查看accel/meson.build的编译代码可以发现，仍然存在 specific_ss 配置，只需将 AFL 需要依赖目标架构的代码，迁移到 accel 目录下，然后把文件加入到 specific_ss 当中，即可在 QEMU V10.1.3 版本中成功编译 AFL 补丁版本。

QEMU V10.0.6 版本存在的 BUG

测试版本为 QEMU V10.0.6 linux-user 模式，目标架构为 mips。（V10.1.3 版本仍然存在）

成功编译 V10.0.6 版本的 qemuafl 后，开始尝试运行 afl fuzz。

前文曾提及 AFL 的 qemu 模式运行逻辑，默认情况下采用 forkserver 模式。在 fuzz 程序首次运行时，程序能正常运行，但在第二次执行到某个指令时，却会重新跳转回 main 函数，导致执行失败。

最终通过 QEMU 的 exec_tb、translate_block 日志信息定位到了问题根源。

测试脚本如下所示：

#!/usr/bin/env python3
import os
import sys
import struct
import time
import subprocess
import multiprocessing

# 模拟管道文件描述符，使用 os.pipe()
ctl_read, ctl_write = os.pipe()   # 父→子 控制通道
st_read, st_write = os.pipe()     # 子→父 状态通道

FORKSRV_FD = 198   # 模拟 AFL 使用的固定 FD
FUZZ_BIN = "/home/debian/fuzz/AFLplusplus/afl-qemu-trace"
MAGIC = 0x41464c01

def run_target(exec_bin):
    pid = os.fork()
    if pid == 0:  # 子进程
        # 先把管道 dup 到期望的 FD（模拟 forkserver 子进程环境）
        os.dup2(ctl_read, FORKSRV_FD)
        os.dup2(st_write, FORKSRV_FD + 1)
        # 关闭不需要的文件描述符
        os.close(ctl_write)
        os.close(st_read)
        os.execve(exec_bin, [exec_bin, "./usr/sbin/httpd_patch"] + sys.argv[1:], os.environ)


def parent_process():
    run_target(FUZZ_BIN)
    status = os.read(st_read, 4)
    print(f"status = {status}")
    reply = MAGIC ^ 0xFFFFFFFF
    os.write(ctl_write, reply.to_bytes(4, "little"))
    option = os.read(st_read, 4)
    print(f"option = {option}")
    map_size = os.read(st_read, 4)
    print(f"map_size = {map_size}")
    version = os.read(st_read, 4)
    print(f"version = {version}")
    while True:
        was_killed = 0
        os.write(ctl_write, was_killed.to_bytes(4, "little"))
        child_pid = os.read(st_read, 4)
        print(f"child pid = {int.from_bytes(child_pid, 'little')}")
        status = os.read(st_read, 4)
        print(f"status = {status}")
        r = input("continue?")
        if "q" in r:
            break

if __name__ == "__main__":
    parent_process()

调试命令如下所示：

1	QEMU_LOG_FILENAME="/tmp/debug.txt" QEMU_LOG="in_asm,out_asm" python3 afl-py.py

出现错误的流程大致如下：

qemu 第一次 fork 出子进程，正常执行完整个流程。
父进程在第一个子进程执行的过程中，通过 afl_wait_tsl 函数，在父进程的内存空间中同步翻译指令块。这样，在下次 fork 出的子进程执行到同样的块时，就无需再翻译一次，可以节省代码执行时间。
第二个 fork 出的子进程按照父进程翻译的结果执行，在某个代码处开始出错。

经定位，错误代码位于 libuClibc 库的 strchr 函数中，关键代码如下所示：

.text:00038FA0 03 00 43 30                 andi    $v1, $v0, 3
.text:00038FA4 F9 FF 60 54                 bnezl   $v1, loc_38F8C
.text:00038FA8 00 00 43 90                 lbu     $v1, 0($v0)
.text:00038FAC 00 1A 05 00                 sll     $v1, $a1, 8
.text:00038FB0 25 18 65 00                 or      $v1, $a1
.text:00038FB4 00 5C 03 00                 sll     $t3, $v1, 16
.text:00038FB8 FE 7E 06 3C                 lui     $a2, 0x7EFE
.text:00038FBC 01 81 0A 3C                 lui     $t2, 0x8101
.text:00038FC0 25 58 63 01                 or      $t3, $v1
.text:00038FC4 FF FE C6 34                 li      $a2, 0x7EFEFEFF
.text:00038FC8 21 18 40 00                 move    $v1, $v0
.text:00038FCC 00 01 4A 35                 li      $t2, 0x81010100
.text:00038FD0
.text:00038FD0             loc_38FD0:                               # CODE XREF: index+7C↓j
.text:00038FD0 00 00 64 8C                 lw      $a0, 0($v1)
.text:00038FD4
.text:00038FD4             loc_38FD4:                               # CODE XREF: index+D8↓j
.text:00038FD4 04 00 63 24                 addiu   $v1, 4
.text:00038FD8 26 38 8B 00                 xor     $a3, $a0, $t3
.text:00038FDC 21 40 E6 00                 addu    $t0, $a3, $a2
.text:00038FE0 21 10 86 00                 addu    $v0, $a0, $a2
.text:00038FE4 27 38 07 00                 nor     $a3, $zero, $a3
.text:00038FE8 27 20 04 00                 nor     $a0, $zero, $a0
.text:00038FEC 26 38 E8 00                 xor     $a3, $t0
.text:00038FF0 26 20 82 00                 xor     $a0, $v0
.text:00038FF4 25 20 E4 00                 or      $a0, $a3, $a0
.text:00038FF8 24 20 8A 00                 and     $a0, $t2
.text:00038FFC F4 FF 80 10                 beqz    $a0, loc_38FD0
.text:00039000 FC FF 69 24                 addiu   $t1, $v1, -4
.text:00039004 FC FF 64 90                 lbu     $a0, -4($v1)
.text:00039008 FD FF 62 24                 addiu   $v0, $v1, -3
.text:0003900C 03 00 85 14                 bne     $a0, $a1, loc_3901C
.text:00039010 FE FF 68 24                 addiu   $t0, $v1, -2
.text:00039014 08 00 E0 03                 jr      $ra
.text:00039018 21 10 20 01                 move    $v0, $t1

按照 QEMU 分块的逻辑，0x00038FAC - 0x0039000 算一个代码块。

但是在 target/mips/tcg/translate.c 文件的 mips_tr_translate_insn 函数中，有以下代码：

static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
{
......
    /*
     * End the TB on (most) page crossings.
     * See mips_tr_init_disas_context about single-stepping a branch
     * together with its delay slot.
     */
    if (ctx->base.pc_next - ctx->page_start >= TARGET_PAGE_SIZE
        && !(tb_cflags(ctx->base.tb) & CF_SINGLE_STEP)) {
        ctx->base.is_jmp = DISAS_TOO_MANY;
    }
}

在上面的代码中 TARGET_PAGE_SIZE=4096=0x1000，通过注释也能知道，该部分代码是代码块按照页进行分割，一页的大小就是 0x1000。

MIPS 指令集中存在 delay slot（延迟槽）机制，即在执行跳转指令之前，会先执行下一条指令。

比如以下指令
.text:00038FFC F4 FF 80 10                 beqz    $a0, loc_38FD0
.text:00039000 FC FF 69 24                 addiu   $t1, $v1, -4
实际执行过程如下：
$t1 = $v1-4
if $a0 == 0:
  jump
else
  no jump

但根据 QEMU 的分页机制，0x00038FAC - 0x0039000 块将会被分成：0x00038FAC - 0x0038FFC 和 0x0039000 两个代码块。

不过，QEMU 的开发者也考虑到了这种情况，因此有以下代码：

static void mips_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
{
    DisasContext *ctx = container_of(dcbase, DisasContext, base);

    switch (ctx->base.is_jmp) {
    case DISAS_STOP:
        gen_save_pc(ctx->base.pc_next);
        tcg_gen_lookup_and_goto_ptr();
        break;
    case DISAS_NEXT:
    case DISAS_TOO_MANY:
        save_cpu_state(ctx, 0);
        gen_goto_tb(ctx, 0, ctx->base.pc_next);
        break;
    case DISAS_EXIT:
        tcg_gen_exit_tb(NULL, 0);
        break;
    case DISAS_NORETURN:
        break;
    default:
        g_assert_not_reached();
    }
}

当 is_jmp == DISAS_TOO_MANY 时，首先保存上下文的 cpu 状态信息，然后设置当前块的下一跳为 delay slot 指令。

从这点看，QEMU 本身逻辑并无问题。分页是为了减少内存开销，同时也处理了被分开的代码块。

但是和 AFL 结合后，就会导致兼容性问题，产生 bug。下面将通过 QEMU 的日志信息，来展示该 BUG。

首先，第一个子进程在翻译代码块时的日志如下所示：

IN(子进程):
0x2b506fac:  sll        v1,a1,0x8
0x2b506fb0:  or v1,v1,a1
0x2b506fb4:  sll        t3,v1,0x10
0x2b506fb8:  lui        a2,0x7efe
0x2b506fbc:  lui        t2,0x8101
0x2b506fc0:  or t3,t3,v1
0x2b506fc4:  ori        a2,a2,0xfeff
0x2b506fc8:  move       v1,v0
0x2b506fcc:  ori        t2,t2,0x100
0x2b506fd0:  lw a0,0(v1)
0x2b506fd4:  addiu      v1,v1,4
0x2b506fd8:  xor        a3,a0,t3
0x2b506fdc:  addu       t0,a3,a2
0x2b506fe0:  addu       v0,a0,a2
0x2b506fe4:  nor        a3,zero,a3
0x2b506fe8:  nor        a0,zero,a0
0x2b506fec:  xor        a3,a3,t0
0x2b506ff0:  xor        a0,a0,v0
0x2b506ff4:  or a0,a3,a0
0x2b506ff8:  and        a0,a0,t2
0x2b506ffc:  beqz       a0,0x2b506fd0

  -- guest addr 0x000000002b506ffc
0x7f0cd4028655:  41 83 fc 01              cmpl     $1, %r12d
0x7f0cd4028659:  1b db                    sbbl     %ebx, %ebx
0x7f0cd402865b:  f7 db                    negl     %ebx
0x7f0cd402865d:  89 9d 24 1b 00 00        movl     %ebx, 0x1b24(%rbp)
0x7f0cd4028663:  c7 85 1c 1b 00 00 a2 10  movl     $0x110a2, 0x1b1c(%rbp)
0x7f0cd402866b:  01 00
0x7f0cd402866d:  c7 85 20 1b 00 00 d0 6f  movl     $0x2b506fd0, 0x1b20(%rbp)
0x7f0cd4028675:  50 2b
0x7f0cd4028677:  c7 85 80 00 00 00 00 70  movl     $0x2b507000, 0x80(%rbp)
0x7f0cd402867f:  50 2b
0x7f0cd4028681:  48 8b fd                 movq     %rbp, %rdi
0x7f0cd4028684:  ff 15 2e 00 00 00        callq    *0x2e(%rip)
0x7f0cd402868a:  ff e0                    jmpq     *%rax
0x7f0cd402868c:  48 8d 05 70 fe ff ff     leaq     -0x190(%rip), %rax
0x7f0cd4028693:  e9 80 79 fd ff           jmp      0x7f0cd4000018

IN(子进程):
0x2b507000:  addiu      t1,v1,-4

OUT: [size=120]
-- guest addr 0x000000002b507000 + tb prologue
0x7f0cd4028800:  8b 5d f8                 movl     -8(%rbp), %ebx
0x7f0cd4028803:  85 db                    testl    %ebx, %ebx
0x7f0cd4028805:  0f 8c 58 00 00 00        jl       0x7f0cd4028863
0x7f0cd402880b:  c6 45 fc 01              movb     $1, -4(%rbp)
0x7f0cd402880f:  8b 5d 0c                 movl     0xc(%rbp), %ebx
0x7f0cd4028812:  83 c3 fc                 addl     $-4, %ebx
0x7f0cd4028815:  89 5d 24                 movl     %ebx, 0x24(%rbp)
0x7f0cd4028818:  c7 85 1c 1b 00 00 a2 00  movl     $0xa2, 0x1b1c(%rbp)
0x7f0cd4028820:  00 00
0x7f0cd4028822:  8b 9d 24 1b 00 00        movl     0x1b24(%rbp), %ebx
0x7f0cd4028828:  85 db                    testl    %ebx, %ebx
0x7f0cd402882a:  0f 85 1e 00 00 00        jne      0x7f0cd402884e
0x7f0cd4028830:  66 66 90                 nop
0x7f0cd4028833:  e9 00 00 00 00           jmp      0x7f0cd4028838
0x7f0cd4028838:  c7 85 80 00 00 00 04 70  movl     $0x2b507004, 0x80(%rbp)
0x7f0cd4028840:  50 2b
0x7f0cd4028842:  48 8d 05 f8 fe ff ff     leaq     -0x108(%rip), %rax
0x7f0cd4028849:  e9 ca 77 fd ff           jmp      0x7f0cd4000018
0x7f0cd402884e:  c7 85 80 00 00 00 d0 6f  movl     $0x2b506fd0, 0x80(%rbp)
0x7f0cd4028856:  50 2b
0x7f0cd4028858:  48 8b fd                 movq     %rbp, %rdi
0x7f0cd402885b:  ff 15 0f 00 00 00        callq    *0xf(%rip)
0x7f0cd4028861:  ff e0                    jmpq     *%rax
0x7f0cd4028863:  48 8d 05 d9 fe ff ff     leaq     -0x127(%rip), %rax
0x7f0cd402886a:  e9 a9 77 fd ff           jmp      0x7f0cd4000018

观察上述指令发现，QEMU 翻译的跳转目标地址出现了偏差：movl $0x402ac0, 0x80(%rbp)。0x402ac0 地址为 httpd 程序的 _start 函数起始地址。

经过详细分析与调试，明确了该 Bug 的成因。

将原本的分支指令块设为 TB_A，把被分片的 delay slot 指令块设为 TB_B。

当 QEMU 翻译完 TB_A 代码块后，将会执行 mips_tr_tb_stop 函数，然后调用 save_cpu_state 函数：代码如下所示：

static inline void save_cpu_state(DisasContext *ctx, int do_save_pc)
{
    LOG_DISAS("hflags %08x saved %08x\n", ctx->hflags, ctx->saved_hflags);
    if (do_save_pc && ctx->base.pc_next != ctx->saved_pc) {
        gen_save_pc(ctx->base.pc_next);
        ctx->saved_pc = ctx->base.pc_next;
    }
    if (ctx->hflags != ctx->saved_hflags) {
        tcg_gen_movi_i32(hflags, ctx->hflags);
        ctx->saved_hflags = ctx->hflags;
        switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
        case MIPS_HFLAG_BR:
            break;
        case MIPS_HFLAG_BC:
        case MIPS_HFLAG_BL:
        case MIPS_HFLAG_B:
            tcg_gen_movi_tl(btarget, ctx->btarget);
            break;
        }
    }
}

因为在 mips_tr_tb_stop 函数中调用的是 save_cpu_state(ctx, 0);，因此 do_save_pc=0。最终只执行了三句指令：

1
2
3

tcg_gen_movi_i32(hflags, ctx->hflags);
ctx->saved_hflags = ctx->hflags;
tcg_gen_movi_tl(btarget, ctx->btarget);

这三句指令可以对应到之前 QEMU 日志中的指令，如下所示：

# 设置hflags
tcg_gen_movi_i32(hflags, ctx->hflags); 
-> 0x7f0cd4028663: movl     $0x110a2, 0x1b1c(%rbp)
# 设置btarget
tcg_gen_movi_tl(btarget, ctx->btarget);
-> 0x7f0cd402866d: movl     $0x2b506fd0, 0x1b20(%rbp)

当前代码块翻译完成后，接下来翻译 TB_B 块。在翻译块的开头调用 mips_tr_init_disas_context 函数进行上下文变量初始化，将会调用 restore_cpu_state 函数，恢复 btarget 值，相关代码如下所示：

static inline void restore_cpu_state(CPUMIPSState *env, DisasContext *ctx)
{
    ctx->saved_hflags = ctx->hflags;
    switch (ctx->hflags & MIPS_HFLAG_BMASK_BASE) {
    case MIPS_HFLAG_BR:
        break;
    case MIPS_HFLAG_BC:
    case MIPS_HFLAG_BL:
    case MIPS_HFLAG_B:
        ctx->btarget = env->btarget;
        break;
    }
}

上述代码是导致本次 Bug 的另一个核心点，下文将详细说明。在初始化完上下文信息后，将会调用 mips_tr_translate_insn 翻译代码，首先翻译 add 指令。翻译完成后，因为 is_slot=True，所以将会调用 gen_branch(ctx, insn_bytes); 函数生成分支跳转代码。关键代码如下所示：

static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
{
......
    if (is_slot) {
        gen_branch(ctx, insn_bytes);
    }
......
}
static void gen_branch(DisasContext *ctx, int insn_bytes)
{
    if (ctx->hflags & MIPS_HFLAG_BMASK) {
        int proc_hflags = ctx->hflags & MIPS_HFLAG_BMASK;
        /* Branches completion */
        clear_branch_hflags(ctx);
        ctx->base.is_jmp = DISAS_NORETURN;
        switch (proc_hflags & MIPS_HFLAG_BMASK_BASE) {
......
        case MIPS_HFLAG_BC:
            /* Conditional branch */
            {
                TCGLabel *l1 = gen_new_label();

                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
                gen_goto_tb(ctx, 1, ctx->base.pc_next + insn_bytes);
                gen_set_label(l1);
                gen_goto_tb(ctx, 0, ctx->btarget);
            }
            break;
......
}

该部分代码可以和上面 QEMU 日志信息进行一一对应，如下所示：

tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
-> 
0x7f0cd4028822:  movl     0x1b24(%rbp), %ebx
0x7f0cd4028828:  testl    %ebx, %ebx
0x7f0cd402882a:  jne      0x7f0cd402884e (l1地址)
gen_goto_tb(ctx, 1, ctx->base.pc_next + insn_bytes);
-> 
0x7f0cd4028833:  jmp      0x7f0cd4028838
0x7f0cd4028838:  movl     $0x2b507004, 0x80(%rbp)
gen_set_label(l1);
gen_goto_tb(ctx, 0, ctx->btarget);
->
l1:
0x7f0cd402884e:  c7 85 80 00 00 00 d0 6f  movl     $0x2b506fd0, 0x80(%rbp)

至此，相关流程已梳理完毕。该 Bug 情况总体概括如下：AFL 在翻译 TB_B 代码块时，由于 ctx->btarget 的值错误地等于 0x402ac0，导致翻译出来的指令为：movl 0x402ac0, 0x80(%rbp)。在代码执行到该分支后，将会错误地跳转回程序的 _start 函数，最终导致程序崩溃。

总体梳理一下该 BUG 的成因：

QMEU 会根据 0x1000 大小对代码块进行分片。这可能会导致分支跳转指令和其 delay slot 指令被分成两块：TB_A 和 TB_B。
QEMU 考虑到分片会影响 delay slot 的情况，因此在翻译完 TB_A 后，会调用 save_cpu_state 函数保存跳转地址信息。
save_cpu_state 函数保存 btarget 的方案是翻译成 TCG 代码（movl $0x2b506fd0, 0x1b20(%rbp)）。
restore_cpu_state 函数恢复 btarget 的方案是从 env->btarget 上下文中获取。

上述的 3, 4 两点就产生了冲突，最终导致 BUG 的产生。

在原版的 QEMU 中，因为性能考虑，不会一次性把所有代码都翻译成 TCG 指令。而是运行到哪，翻译到哪。因此在翻译完 TB_A 指令后，将会执行 TB_A 指令。TB_A 翻译出的 TCG 指令最终会跳转到 TB_B 地址，因此下一步将会翻译 TB_B 指令。由于已经执行了 TB_A TCG 指令中的 movl $0x2b506fd0, 0x1b20(%rbp) 指令，因此 env->btarget 已经被成功设置成正确的 btarget 地址了。所以在后面翻译 TB_B 指令的流程中没有出错。

在 qemuafl 中，第一个子进程的翻译过程就是按照 QEMU 原版的逻辑来运行，因此不会出错。但是，随后父进程将会跟着开始翻译代码块，却不执行，这就导致 env->btarget 无法被正确设置，最终代码出错，程序崩溃。

简单来说，原版 QEMU 的流程为：翻译TB_A -> 执行TB_A -> 翻译TB_B -> 执行TB_B。
qemuafl 父进程的流程为：翻译TB_A -> 翻译TB_B -> ...。

随后对原版 qemuafl（QEMU V5 版本）进行的简要分析显示，该 Bug 仍然存在，如下所示：

OUT: [size=114]
  -- guest addr 0x3fd87000 + tb prologue
0x7fe6dc01ce9b:  44 8b e3                 movl     %ebx, %r12d
0x7fe6dc01ce9e:  41 83 e4 01              andl     $1, %r12d
0x7fe6dc01cea2:  44 8b ad 9c 1a 00 00     movl     0x1a9c(%rbp), %r13d
0x7fe6dc01cea9:  41 81 e5 ff fb ff ff     andl     $0xfffffbff, %r13d
0x7fe6dc01ceb0:  41 c1 e4 0a              shll     $0xa, %r12d
0x7fe6dc01c240:  8b 5d f8                 movl     -8(%rbp), %ebx
0x7fe6dc01ceb4:  45 0b ec                 orl      %r12d, %r13d
0x7fe6dc01c243:  85 db                    testl    %ebx, %ebx
0x7fe6dc01ceb7:  44 89 ad 9c 1a 00 00     movl     %r13d, 0x1a9c(%rbp)
0x7fe6dc01c245:  0f 8c 5b 00 00 00        jl       0x7fe6dc01c2a6
0x7fe6dc01cebe:  83 e3 fe                 andl     $0xfffffffe, %ebx
0x7fe6dc01c24b:  8b 5d 0c                 movl     0xc(%rbp), %ebx
0x7fe6dc01cec1:  89 9d 80 00 00 00        movl     %ebx, 0x80(%rbp)
0x7fe6dc01c24e:  83 c3 fc                 addl     $-4, %ebx
0x7fe6dc01cec7:  48 8b fd                 movq     %rbp, %rdi
0x7fe6dc01c251:  89 5d 24                 movl     %ebx, 0x24(%rbp)
0x7fe6dc01ceca:  ff 15 10 00 00 00        callq    *0x10(%rip)
0x7fe6dc01c254:  c7 85 9c 1a 00 00 a2 00  movl     $0xa2, 0x1a9c(%rbp)
0x7fe6dc01ced0:  ff e0                    jmpq     *%rax
0x7fe6dc01c25c:  00 00
0x7fe6dc01ced2:  48 8d 05 2a ff ff ff     leaq     -0xd6(%rip), %rax
0x7fe6dc01c25e:  8b 9d a4 1a 00 00        movl     0x1aa4(%rbp), %ebx
0x7fe6dc01ced9:  e9 3a 31 fe ff           jmp      0x7fe6dc000018
0x7fe6dc01c264:  85 db                    testl    %ebx, %ebx
  -- tb slow paths + alignment
0x7fe6dc01c266:  0f 85 1e 00 00 00        jne      0x7fe6dc01c28a
0x7fe6dc01c26c:  66 66 90                 nop
0x7fe6dc01c26f:  e9 00 00 00 00           jmp      0x7fe6dc01c274
0x7fe6dc01c274:  c7 85 80 00 00 00 04 70  movl     $0x3fd87004, 0x80(%rbp)
0x7fe6dc01c27c:  d8 3f
0x7fe6dc01cede:  90                       nop
0x7fe6dc01c27e:  48 8d 05 3c ff ff ff     leaq     -0xc4(%rip), %rax
0x7fe6dc01cedf:  90                       nop
0x7fe6dc01c285:  e9 8e 3d fe ff           jmp      0x7fe6dc000018
  data: [size=8]
0x7fe6dc01c28a:  90                       nop
0x7fe6dc01cee0:  .quad  0x55ad16a506f0
0x7fe6dc01c28b:  e9 00 00 00 00           jmp      0x7fe6dc01c290

0x7fe6dc01c290:  c7 85 80 00 00 00 c0 2a  movl     $0x402ac0, 0x80(%rbp)
0x7fe6dc01c298:  40 00
0x7fe6dc01c29a:  48 8d 05 1f ff ff ff     leaq     -0xe1(%rip), %rax
0x7fe6dc01c2a1:  e9 72 3d fe ff           jmp      0x7fe6dc000018
0x7fe6dc01c2a6:  48 8d 05 16 ff ff ff     leaq     -0xea(%rip), %rax
0x7fe6dc01c2ad:  e9 66 3d fe ff           jmp      0x7fe6dc000018

0x7fe6dc01c290 地址的指令仍然出错，至于为何未能触发该 Bug，需要进一步调试分析，感兴趣的读者可自行探索。

修复方案

不把 delay slot 进行分片，patch 代码如下所示：

/*
 * End the TB on (most) page crossings.
 * See mips_tr_init_disas_context about single-stepping a branch
 * together with its delay slot.
 */
if (ctx->base.pc_next - ctx->page_start >= TARGET_PAGE_SIZE
    && !(tb_cflags(ctx->base.tb) & CF_SINGLE_STEP)
    && !(ctx->hflags & MIPS_HFLAG_BMASK) // patch代码
) {
    ctx->base.is_jmp = DISAS_TOO_MANY;
}

该修复方案优点在于快捷简单，缺点是可能会影响代码分片，从而对 QEMU 执行效率产生一定影响。

修改gen_branch函数的逻辑，如下所示：

gen_goto_tb(ctx, 0, ctx->btarget);
修改成->
tcg_gen_mov_tl(cpu_PC, btarget);
tcg_gen_lookup_and_goto_ptr();

该修复方案同样快捷简单，把所有分支跳转的btarget都修改成从内存中获取。缺点是性能开销大，这步骤需要进行内存寻址操作，性能开销远大于硬编码。mips代码中肯定会存在大量分支跳转指令，这会大大影响的程序的执行效率。

只针对该BUG，修改gen_branch函数的逻辑，如下所示：

target/mips/tcg/translate.h 中：
typedef struct DisasContext {
    DisasContextBase base;
    // ... 原有字段 ...
    bool mi;
    int gi;
    // 新增字段
    bool btarget_from_env; 
} DisasContext;

target/mips/tcg/translate.c 
static void mips_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
{
    // ... 原有代码 ...
    restore_cpu_state(env, ctx); // 这里 ctx->btarget 被赋值为 env->btarget

    // 新增代码：如果当前处于 Branch Mask 状态，说明是在 delay slot 中开始的 TB
    ctx->btarget_from_env = (ctx->hflags & MIPS_HFLAG_BMASK) != 0;

    // ... 原有代码 ...
}
...
static void gen_branch(DisasContext *ctx, int insn_bytes)
{
    if (ctx->hflags & MIPS_HFLAG_BMASK) {
        int proc_hflags = ctx->hflags & MIPS_HFLAG_BMASK;
        // ... 原有代码 ...
        switch (proc_hflags & MIPS_HFLAG_BMASK_BASE) {
        // ...
        case MIPS_HFLAG_B:
            /* unconditional branch */
           if (proc_hflags & MIPS_HFLAG_BX) {
               tcg_gen_xori_i32(hflags, hflags, MIPS_HFLAG_M16);
           }
           // 修改开始
           if (ctx->btarget_from_env) {
               tcg_gen_mov_tl(cpu_PC, btarget);
               tcg_gen_lookup_and_goto_ptr();
           } else {
               gen_goto_tb(ctx, 0, ctx->btarget);
           }
           // 修改结束
           break;
       case MIPS_HFLAG_BL:
           /* blikely taken case */
           // 修改开始
           if (ctx->btarget_from_env) {
               tcg_gen_mov_tl(cpu_PC, btarget);
               tcg_gen_lookup_and_goto_ptr();
           } else {
               gen_goto_tb(ctx, 0, ctx->btarget);
           }
           // 修改结束
           break;
       case MIPS_HFLAG_BC:
           /* Conditional branch */
           {
               TCGLabel *l1 = gen_new_label();

               tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
               gen_goto_tb(ctx, 1, ctx->base.pc_next + insn_bytes);
               gen_set_label(l1);
               // 修改开始
               if (ctx->btarget_from_env) {
                   tcg_gen_mov_tl(cpu_PC, btarget);
                   tcg_gen_lookup_and_goto_ptr();
               } else {
                   gen_goto_tb(ctx, 0, ctx->btarget);
               }
               // 修改结束
           }
           break;
       // ... MIPS_HFLAG_BR 本身已经是动态跳转了，无需修改 ...
       }
   }
}

该方案由AI生成，修改起来稍微复杂一点，但是性能损失小。

AFL Fuzz QEMU新版适配：深度解析 Patch 细节

Thu, 13 Nov 2025 09:04:25 GMT

本文将深度解析 AFL++ 对 QEMU 的 patch 细节。

AFL对 QEMU 的修改

首先，下面列出 AFL对 QEMU 的修改目录：

accel/tcg/cpu-exec.c                          | 1582 +++++++++++++++++
accel/tcg/tcg-runtime.c                       |  824 +++++++++
accel/tcg/tcg-runtime.h                       |   28 +
accel/tcg/translate-all.c                     |  212 +++
accel/tcg/translator.c                        |   30 +
linux-user/elfload.c                          |   55 +
linux-user/main.c                             |  120 ++
linux-user/mips/cpu_loop.c                    |   15 +
linux-user/mmap.c                             |   63 +
linux-user/signal.c                           |   42 +-
linux-user/syscall.c                          |   52 +-
qemuafl/api.h                                 |  215 +++
qemuafl/asan-giovese-inl.h                    | 1536 ++++++++++++++++
qemuafl/asan-giovese.h                        |  155 ++
qemuafl/common.h                              |  200 +++
qemuafl/cpu-translate.h                       |  177 ++
qemuafl/imported/afl_hash.h                   |   74 +
qemuafl/imported/cmplog.h                     |  106 ++
qemuafl/imported/config.h                     |  591 ++++++
qemuafl/imported/snapshot-inl.h               |  115 ++
qemuafl/imported/types.h                      |  253 +++
qemuafl/interval-tree/.gitignore              |    3 +
qemuafl/interval-tree/COPYING                 |   20 +
qemuafl/interval-tree/compiler.h              |   17 +
qemuafl/interval-tree/interval-tree.inl       |    2 +
qemuafl/interval-tree/interval_tree_generic.h |  193 ++
qemuafl/interval-tree/rbtree.h                |  108 ++
qemuafl/interval-tree/rbtree.inl              |  549 ++++++
qemuafl/interval-tree/rbtree_augmented.h      |  245 +++
qemuafl/qasan-qemu.h                          |  143 ++
qemuafl/qasan.h                               |  264 +++
qemuafl/qemu-ijon-support.h                   |   65 +
target/mips/tcg/translate.c                   |  146 ++
tcg/tcg-op.c                                  |   19 +
tcg/tcg.c                                     |   13 +

不过上面的目录并不完全，只针对了目标架构为 mips 的情况，不同架构的以下文件不会一样：

1 2	linux-user/{arch}/cpu_loop.c target/{arch}/tcg/translate.c

首先，qemuafl目录下的文件为 AFL 相关的头文件，包含相关全局变量结构体类型声明等等。若需查看 AFL 对 QEMU 进行了哪些修改，可以通过检查该文件代码是否包含qemuafl目录下的头文件。

由于 AFL 的 QEMU 使用的是user-mode，因此应从 linux-user 目录下的代码开始分析。

linux-user/main.c文件的代码，patch 内容如下所示：

diff --git a/linux-user/main.c b/linux-user/main.c
index 2cd867491b..b0172d86fb 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -68,6 +68,9 @@
 #define AT_FLAGS_PRESERVE_ARGV0 (1 << AT_FLAGS_PRESERVE_ARGV0_BIT)
 #endif
 
+#include "tcg/tcg-op.h"
+#include "qemuafl/qasan-qemu.h"
+
 char *exec_path;
 char real_exec_path[PATH_MAX];
 
@@ -267,6 +270,73 @@ CPUArchState *cpu_copy(CPUArchState *env)
     return new_env;
 }
 
+/* A shorthand way to suppress the warnings that you are ignoring the return value of asprintf() */
+static inline void ignore_result(long long int unused_result)
+{
+    (void) unused_result;
+}
+
+/* Get libqasan path. */
+#ifndef AFL_PATH
+  #define AFL_PATH "/usr/local/lib/afl/"
+#endif
+static char *get_libqasan_path(char *own_loc)
+{
+    if (!unlikely(own_loc)) {
+        fprintf(stderr, "BUG: param own_loc is NULL\n");
+        exit(EXIT_FAILURE);
+    }
+
+    char *tmp, *cp = NULL, *rsl, *own_copy;
+
+    tmp = getenv("AFL_PATH");
+    if (tmp) {
+        ignore_result(asprintf(&cp, "%s/libqasan.so", tmp));
+        if (access(cp, X_OK)) {
+            fprintf(stderr, "Unable to find '%s'\n", tmp);
+            exit(EXIT_FAILURE);
+        }
+
+        return cp;
+    }
+
+    own_copy = strdup(own_loc);
+    rsl = strrchr(own_copy, '/');
+    if (rsl) {
+        *rsl = 0;
+
+        ignore_result(asprintf(&cp, "%s/libqasan.so", own_copy));
+        free(own_copy);
+
+        if (!access(cp, X_OK)) { return cp; }
+
+    } else {
+        free(own_copy);
+    }
+
+    if (!access(AFL_PATH "/libqasan.so", X_OK)) {
+        if (cp) { free(cp); }
+
+        return strdup(AFL_PATH "/libqasan.so");
+    }
+
+    /* This is an AFL error message, but since it is in QEMU it can't
+       have all the pretty formatting of AFL without importing
+       a bunch of AFL pieces. */
+    fprintf(stderr, "\n" "" "[-] " ""
+        "Oops, unable to find the 'libqasan.so' binary. The binary must be "
+        "built\n"
+        "    separately by following the instructions in "
+        "qemu_mode/libqasan/README.md. "
+        "If you\n"
+        "    already have the binary installed, you may need to specify "
+        "AFL_PATH in the\n"
+        "    environment.\n");
+
+    fprintf(stderr, "Failed to locate 'libqasan.so'.\n");
+    exit(EXIT_FAILURE);
+}
+
 static void handle_arg_help(const char *arg)
 {
     usage(EXIT_SUCCESS);
@@ -713,6 +783,18 @@ int main(int argc, char **argv, char **envp)
     unsigned long max_reserved_va;
     bool preserve_argv0;
 
+    use_qasan = !!getenv("AFL_USE_QASAN");
+
+    if (getenv("QASAN_MAX_CALL_STACK"))
+      qasan_max_call_stack = atoi(getenv("QASAN_MAX_CALL_STACK"));
+    if (getenv("QASAN_SYMBOLIZE"))
+      qasan_symbolize = atoi(getenv("QASAN_SYMBOLIZE"));
+
+#if defined(ASAN_GIOVESE) && !defined(DO_NOT_USE_QASAN)
+    if (use_qasan)
+      asan_giovese_init();
+#endif
+
     error_init(argv[0]);
     module_call_init(MODULE_INIT_TRACE);
     qemu_init_cpu_list();
@@ -733,6 +815,45 @@ int main(int argc, char **argv, char **envp)
         (void) envlist_setenv(envlist, *wrk);
     }
 
+    /* Add AFL_PRELOAD for qasan if it is enabled */
+    if(use_qasan) {
+        char *preload = getenv("AFL_PRELOAD");
+        char *libqasan = get_libqasan_path(argv[0]);
+
+        if (!preload) {
+            setenv("AFL_PRELOAD", libqasan, 0);
+        } else {
+            /* NOTE: If there is more than one in the list, LD_PRELOAD allows spaces or colons
+                     as separators (but no escaping provided), but DYLD_INSERT_LIBRARIES allows only colons.
+                     Prefer colons for maximum compatibility, but use space if the string already has any. */
+            char * afl_preload;
+            if (strchr(preload, ' ')) {
+                ignore_result(asprintf(&afl_preload, "%s %s", libqasan, preload));
+            } else {
+                ignore_result(asprintf(&afl_preload, "%s:%s", libqasan, preload));
+            }
+
+            setenv("AFL_PRELOAD", afl_preload, 1);
+            free(afl_preload);
+        }
+        free(libqasan);
+    }
+
+    /* Expand AFL_PRELOAD to append preload libraries */
+    char *afl_preload = getenv("AFL_PRELOAD");
+    if (afl_preload) {
+        /* NOTE: If there is more than one in the list, LD_PRELOAD allows spaces or colons
+                 as separators, but DYLD_INSERT_LIBRARIES allows only colons.
+                 Maybe we should attempt to normalize the list here before we assign it? */
+        char * ld_preload;
+        ignore_result(asprintf(&ld_preload, "LD_PRELOAD=%s", afl_preload));
+        envlist_setenv(envlist, ld_preload);
+
+        char * dyld_insert;
+        ignore_result(asprintf(&dyld_insert, "DYLD_INSERT_LIBRARIES=%s", afl_preload));
+        envlist_setenv(envlist, dyld_insert);
+    }
+
     /* Read the stack limit from the kernel.  If it's "unlimited",
        then we can do little else besides use the default.  */
     {

main.c 代码中添加的内容旨在让 QEMU 支持 QASan。关于 QASan，ChatGPT 的解释如下：

QASan（QEMU Address Sanitizer）
是 AFL++ 专为 QEMU 模式实现的一种轻量级 Address Sanitizer（内存错误检测）。
在 QEMU 的 linux-user 模式中，通过 LD_PRELOAD + runtime hook 去检测目标程序的内存错误。
QASan 可以检测：
✔️ 堆缓冲区越界（heap OOB）
例如 malloc 100 字节，但写到 100 以外的区域。
✔️ Use-after-free（UAF）
free 之后继续使用。
✔️ Double-free
同一个指针重复释放。
✔️ Invalid free
释放不是 malloc 得到的地址。
✔️ 一些栈溢出触发崩溃行为
（栈上的 shadow memory 不完整，所以能力有限，但比没有强。）
✔️ 内存泄漏检测（部分）
它覆盖的是 heap 和全局区内存错误。
栈检测有限，但也能帮助 fuzzing 提升覆盖率和能检测更多 bug。

linux-user/elfload.c文件patch 内容如下所示：

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index fa83d78667..e8b1a946fb 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -33,6 +33,8 @@
 #include "target/arm/cpu-features.h"
 #endif
 
+#include "qemuafl/common.h"
+
 #ifdef _ARCH_PPC64
 #undef ARCH_DLINFO
 #undef ELF_PLATFORM
@@ -3463,9 +3465,11 @@ static void load_elf_image(const char *image_name, const ImageSource *src,
             if (elf_prot & PROT_EXEC) {
                 if (vaddr < info->start_code) {
                     info->start_code = vaddr;
+                    if (!afl_start_code) afl_start_code = vaddr;
                 }
                 if (vaddr_ef > info->end_code) {
                     info->end_code = vaddr_ef;
+                    if (!afl_end_code) afl_end_code = vaddr_ef;
                 }
             }
             if (elf_prot & PROT_WRITE) {
@@ -3499,6 +3503,57 @@ static void load_elf_image(const char *image_name, const ImageSource *src,
         load_symbols(ehdr, src, load_bias);
     }
 
+    if (getenv("AFL_QEMU_BLOCK_COV")) {
+      block_cov = 1;
+      block_id = 5;
+    }
+
+    if (!afl_exit_point) {
+      char *ptr;
+      if ((ptr = getenv("AFL_EXITPOINT")) != NULL) {
+        afl_exit_point = strtoul(ptr, NULL, 16);
+#ifdef TARGET_ARM
+      /* The least significant bit indicates Thumb mode. */
+        afl_exit_point = afl_exit_point & ~(target_ulong)1;
+#endif
+        if (getenv("AFL_DEBUG") != NULL)
+          fprintf(stderr, "AFL exitpoint: 0x%lx\n",
+                  (unsigned long)afl_exit_point);
+      }
+    }
+
+    if (!afl_entry_point) {
+      char *ptr;
+      if ((ptr = getenv("AFL_ENTRYPOINT")) != NULL) {
+        afl_entry_point = strtoul(ptr, NULL, 16);
+      } else {
+        // On PowerPC64 the entry point is the _function descriptor_
+        // of the entry function. For AFL to properly initialize,
+        // afl_entry_point needs to be set to the actual first instruction
+        // as opposed executed by the target program. This as opposed to
+        // where the function's descriptor sits in memory.
+        // copied from PPC init_thread
+#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
+        if (get_ppc64_abi(info) < 2) {
+            uint64_t val;
+            get_user_u64(val, info->entry);
+            afl_entry_point = val + info->load_bias;
+        } else {
+            afl_entry_point = info->entry;
+        }
+#else
+        afl_entry_point = info->entry;
+#endif
+      }
+#ifdef TARGET_ARM
+      /* The least significant bit indicates Thumb mode. */
+      afl_entry_point = afl_entry_point & ~(target_ulong)1;
+#endif
+    }
+    if (getenv("AFL_DEBUG") != NULL)
+      fprintf(stderr, "AFL forkserver entrypoint: 0x%lx\n",
+              (unsigned long)afl_entry_point);
+
     debuginfo_report_elf(image_name, src->fd, load_bias);
 
     mmap_unlock();

elfload.c 文件主要用于读取目标 ELF 文件结构信息。AFL 在该文件中进行全局变量的初始化，比如获取该 ELF 文件的代码段访问。还可以通过AFL_EXITPOINT环境变量设置程序fuzz 的结束地址。还可以使用AFL_ENTRYPOINT环境变量设置fuzz 的入口地址。如果没设置，默认为 ELF 程序的代码起始地址。

linux-user/mmap.c文件patch 内容如下所示：

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index d1f36e6f16..c080a739bf 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -34,6 +34,25 @@
 #include "target/arm/cpu-features.h"
 #endif
 
+#include "qemuafl/common.h"
+#include "qemuafl/interval-tree/interval-tree.inl"
+
+struct mmap_tree_node {
+
+  struct rb_node rb;
+  abi_long start, end;
+  abi_long __subtree_last;
+
+};
+
+#define MMAP_TREE_START(node) ((node)->start)
+#define MMAP_TREE_LAST(node) ((node)->end)
+
+INTERVAL_TREE_DEFINE(struct mmap_tree_node, rb, abi_long, __subtree_last,
+                     MMAP_TREE_START, MMAP_TREE_LAST, static, mmap_tree)
+
+static struct rb_root mmap_tree_root = RB_ROOT;
+
 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
 static __thread int mmap_lock_count;
 
@@ -585,6 +604,12 @@ static abi_long mmap_end(abi_ulong start, abi_ulong last,
             qemu_log_unlock(f);
         }
     }
+    if (afl_fork_child && persistent_memory) {
+        struct mmap_tree_node* node = calloc(sizeof(struct mmap_tree_node), 1);
+        node->start = start;
+        node->end = last;
+        mmap_tree_insert(node, &mmap_tree_root);
+    }
     return start;
 }
 
@@ -1095,6 +1120,17 @@ int target_munmap(abi_ulong start, abi_ulong len)
     if (likely(ret == 0)) {
         page_set_flags(start, start + len - 1, 0);
         shm_region_rm_complete(start, start + len - 1);
+
+        if (afl_fork_child && persistent_memory) {
+            struct mmap_tree_node* node = mmap_tree_iter_first(&mmap_tree_root,
+                                            start, start + len - 1);
+            while (node) {
+                struct mmap_tree_node* next = mmap_tree_iter_next(node, start,
+                                                start + len - 1);
+                mmap_tree_remove(node, &mmap_tree_root);
+                node = next;
+            }
+        }
     }
     mmap_unlock();
 
@@ -1189,6 +1225,21 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
         page_set_flags(new_addr, new_addr + new_size - 1,
                        prot | PAGE_VALID | PAGE_RESET);
         shm_region_rm_complete(new_addr, new_addr + new_size - 1);
+        if (afl_fork_child && persistent_memory) {
+            struct mmap_tree_node* node = mmap_tree_iter_first(&mmap_tree_root,
+                                            old_addr, old_addr + old_size - 1);
+            while (node) {
+                struct mmap_tree_node* next = mmap_tree_iter_next(node, old_addr,
+                                                old_addr + old_size - 1);
+                mmap_tree_remove(node, &mmap_tree_root);
+                node = next;
+            }
+            
+            node = calloc(sizeof(struct mmap_tree_node), 1);
+            node->start = new_addr;
+            node->end = new_addr + new_size - 1;
+            mmap_tree_insert(node, &mmap_tree_root);
+        }
     }
     mmap_unlock();
     return new_addr;
@@ -1486,3 +1537,15 @@ abi_long target_shmdt(abi_ulong shmaddr)
     }
     return rv;
 }
+
+void afl_target_unmap_trackeds(void) {
+
+    struct mmap_tree_node* node = mmap_tree_iter_first(&mmap_tree_root, 0,
+                                                        (abi_ulong)-1);
+    while (node) {
+        struct mmap_tree_node* next = mmap_tree_iter_next(node, 0, (abi_ulong)-1);
+        target_munmap(node->start, node->end - node->start);
+        node = next;
+    }
+
+}

mmap.c 文件涉及内存管理。在 persistent 模式下，AFL 需要自动跟踪目标应用的所有 mmap 内存区域。

linux-user/signal.c文件patch 内容如下所示：

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 4dafc2c3a2..8c99829023 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -39,6 +39,9 @@
 #include "user/signal.h"
 #include "tcg/tcg.h"
 
+#include "tcg/tcg-op.h"
+#include "qemuafl/qasan-qemu.h"
+
 /* target_siginfo_t must fit in gdbstub's siginfo save area. */
 QEMU_BUILD_BUG_ON(sizeof(target_siginfo_t) > MAX_SIGINFO_LENGTH);
 
@@ -1293,19 +1296,55 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
         print_taken_signal(sig, &unswapped);
     }
 
-    if (handler == TARGET_SIG_DFL) {
-        /* default handler : ignore some signal. The other are job control or fatal */
+    int ignore_handling = !!getenv("AFL_QEMU_FORCE_DFL");
+
+    if (handler == TARGET_SIG_DFL || ignore_handling) {
+    /* default handler : ignore some signal. The other are job control or fatal */
         if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) {
             kill(getpid(),SIGSTOP);
         } else if (sig != TARGET_SIGCHLD &&
                    sig != TARGET_SIGURG &&
                    sig != TARGET_SIGWINCH &&
                    sig != TARGET_SIGCONT) {
+#if defined(ASAN_GIOVESE) && !defined(DO_NOT_USE_QASAN)
+            if (use_qasan) {
+              if (sig == TARGET_SIGILL ||
+                  sig != TARGET_SIGFPE ||
+                  sig != TARGET_SIGSEGV ||
+                  sig != TARGET_SIGBUS)
+                asan_giovese_deadly_signal(target_to_host_signal(sig),
+                                           k->info._sifields._sigfault._addr,
+                                           PC_GET(cpu_env), BP_GET(cpu_env),
+                                           SP_GET(cpu_env));
+              else
+                asan_giovese_deadly_signal(target_to_host_signal(sig),
+                                           PC_GET(cpu_env),
+                                           PC_GET(cpu_env), BP_GET(cpu_env),
+                                           SP_GET(cpu_env));
+            }
+#endif
             dump_core_and_abort(cpu_env, sig);
         }
     } else if (handler == TARGET_SIG_IGN) {
         /* ignore sig */
     } else if (handler == TARGET_SIG_ERR) {
+#if defined(ASAN_GIOVESE) && !defined(DO_NOT_USE_QASAN)
+      if (use_qasan) {
+        if (sig == TARGET_SIGILL ||
+            sig == TARGET_SIGFPE ||
+            sig == TARGET_SIGSEGV ||
+            sig == TARGET_SIGBUS)
+          asan_giovese_deadly_signal(target_to_host_signal(sig),
+                                     k->info._sifields._sigfault._addr,
+                                     PC_GET(cpu_env), BP_GET(cpu_env),
+                                     SP_GET(cpu_env));
+        else
+          asan_giovese_deadly_signal(target_to_host_signal(sig),
+                                     PC_GET(cpu_env),
+                                     PC_GET(cpu_env), BP_GET(cpu_env),
+                                     SP_GET(cpu_env));
+      }
+#endif
         dump_core_and_abort(cpu_env, sig);

signal.c 文件主要处理信号相关内容。AFL 主要通过信号来判断目标程序是否 crash，比如：SIGILL, SIGFPE, SIGSEGV, SIGBUS。

然而，当目标程序自行编写信号处理函数时，AFL 可能无法捕获相关信号。因此，可通过

除此之外，还有添加跟QASan相关的代码，在 fatal signals 之前调用 QASan。

linux-user/syscall.c文件patch 内容如下所示：

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 3a25abfaca..d6612ace5a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -148,6 +148,9 @@
 #include "fd-trans.h"
 #include "user/cpu_loop.h"
 
+#include "qemuafl/common.h"
+#include "qemuafl/qasan-qemu.h"
+
 #ifndef CLONE_IO
 #define CLONE_IO                0x80000000      /* Clone io context */
 #endif
@@ -847,6 +850,15 @@ void target_set_brk(abi_ulong new_brk)
     initial_target_brk = target_brk;
 }
 
+abi_ulong afl_get_brk(void) {
+  return target_brk;
+}
+abi_ulong afl_set_brk(abi_ulong new_brk) {
+  abi_ulong old_brk = target_brk;
+  target_brk = new_brk;
+  return old_brk;
+}
+
 /* do_brk() must return target values and target errnos. */
 abi_long do_brk(abi_ulong brk_val)
 {
@@ -8590,7 +8602,7 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
                     abi_long guest_envp, int flags, bool is_execveat)
 {
     int ret;
-    char **argp, **envp;
+    char **argp = NULL, **envp = NULL;
     int argc, envc;
     abi_ulong gp;
     abi_ulong addr;
@@ -8616,6 +8628,35 @@ static int do_execv(CPUArchState *cpu_env, int dirfd,
         if (!addr) {
             break;
         }
+        /* QASAN: remove preloaded library */
+        if (use_qasan && !getenv("QASAN_PRESERVE_EXECVE")) {
+            /*
+            * If we need to clear the LD_PRELOAD list, run the memory
+            * lock and unlock methods to inspect the contents within
+            * the strings.
+            */
+            abi_long len = target_strlen(gp);
+            if (len < 0) {
+                return -TARGET_EFAULT;
+            }
+            char *env = lock_user(VERIFY_WRITE, gp, (long)(len + 1), 0);
+            if (!env)
+                goto execve_efault;
+            if (!strncmp("LD_PRELOAD=", env, 11)) {
+                char *p, *q, *r;
+                if ((q = r = strstr(env +11, "libqasan.so")) != NULL) {
+                    size_t mlen = strlen("libqasan.so");
+                    while ((r = strstr(p = r + mlen, "libqasan.so")) != NULL) {
+                        while (p < r)
+                            *q++ = *p++;
+                    }
+                    while ((*q++ = *p++) != '\0')
+                        continue;
+                }
+
+            }
+            unlock_user(env, gp, (long)(len + 1));
+        }
         envc++;
     }
 
@@ -13864,6 +13905,15 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1,
         return ret;
 #endif
 
+    case QASAN_FAKESYS_NR:
+        /* QASAN syscall */
+        if (use_qasan) {
+          return qasan_actions_dispatcher(cpu_env, arg1, arg2, arg3, arg4);
+        } else {
+          fprintf(stderr, "QAsan syscall unsupported without enabling QASan mode (AFL_USE_QASAN)\n");
+          return -TARGET_ENOSYS;
+        }
+
 #if defined(TARGET_NR_pivot_root)

syscall.c 文件主要处理系统调用相关指令。该补丁主要进行了三类修改：

新增 brk 相关的 AFL/QASAN 辅助函数
execve 环境变量处理增强：自动移除 LD_PRELOAD 中的 libqasan.so
新增 QASAN 的 “fake syscall” 接口，用于与 QASAN 交互

linux-user/mips/cpu_loop.c文件patch 内容如下所示：

diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c
index 6405806eb0..ce6a620b7f 100644
--- a/linux-user/mips/cpu_loop.c
+++ b/linux-user/mips/cpu_loop.c
@@ -26,6 +26,9 @@
 #include "internal.h"
 #include "fpu_helper.h"
 
+/* MIPS_PATCH */
+#include "qemuafl/common.h"
+
 # ifdef TARGET_ABI_MIPSO32
 #  define MIPS_SYSCALL_NUMBER_UNUSED -1
 static const int8_t mips_syscall_args[] = {
@@ -78,6 +81,18 @@ void cpu_loop(CPUMIPSState *env)
 
         switch(trapnr) {
         case EXCP_SYSCALL:
+            if (
+                persistent_exits &&
+                (
+                     env->active_tc.gpr[2] == TARGET_NR_exit_group ||
+                     // uclibc may use the following signal instead of
+                     // exit_group:
+                     env->active_tc.gpr[2] == TARGET_NR_exit
+                )
+            ) {
+              env->active_tc.PC = afl_persistent_addr;
+              continue;
+            }
             env->active_tc.PC += 4;
 # ifdef TARGET_ABI_MIPSO32
             syscall_num = env->active_tc.gpr[2] - 4000;

linux-user/{arch}/cpu_loop.c 文件包含 QEMU 执行客户端代码的主要函数。AFL 在该文件中添加的内容为：在persistent模式下，如果遇到exit或exit_group函数，并不会真正的退出，而是跳转到afl_persistent_addr地址。

接下来是accel/tcg目录下的代码，在默认情况下，QEMU 仿真的流程为：

1	Guest CPU 指令 → TCG IR（中间指令） → Host 机器码 → 执行

TCG IR 翻译为主机指令的操作即在该目录下完成。

accel/tcg/tcg-runtime.h文件patch 内容如下所示：

diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index c23b5e66c4..e71cf0ca52 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -323,3 +323,31 @@ DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 
 DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_1(afl_entry_routine, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_1(afl_persistent_routine, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_1(afl_maybe_log, TCG_CALL_NO_RWG, void, tl)
+DEF_HELPER_FLAGS_1(afl_maybe_log2, TCG_CALL_NO_RWG, void, tl)
+DEF_HELPER_FLAGS_1(afl_maybe_log_trace, TCG_CALL_NO_RWG, void, tl)
+DEF_HELPER_FLAGS_3(afl_compcov_16, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_3(afl_compcov_32, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_3(afl_compcov_64, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_3(afl_cmplog_8, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_3(afl_cmplog_16, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_3(afl_cmplog_32, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_3(afl_cmplog_64, TCG_CALL_NO_RWG, void, tl, tl, tl)
+DEF_HELPER_FLAGS_1(afl_cmplog_rtn, TCG_CALL_NO_RWG, void, env)
+
+DEF_HELPER_FLAGS_5(qasan_fake_instr, TCG_CALL_NO_RWG, tl, env, tl, tl, tl, tl)
+DEF_HELPER_FLAGS_2(qasan_load1, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_load2, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_load4, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_load8, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_store1, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_store2, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_store4, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(qasan_store8, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_1(qasan_shadow_stack_push, TCG_CALL_NO_RWG, void, tl)
+DEF_HELPER_FLAGS_1(qasan_shadow_stack_pop, TCG_CALL_NO_RWG, void, tl)
+
+DEF_HELPER_FLAGS_4(ijon_func_call, TCG_CALL_NO_RWG, void, tl, tl, tl, tl)

accel/tcg/tcg-runtime.h头文件主要是用来声明 TCG Helper函数，AFL 在该文件中新增了一些 AFL 相关的Helper 函数。

下面介绍 TCG Helper 函数的结构。

1). 在include/exec/helper-proto.h.inc函数中定义了Helper 函数声明的宏。

DEF_HELPER_FLAGS_x为Helper 函数宏，其中 x 标识有几个参数，比如：DEF_HELPER_FLAGS_2表示该函数有两个参数。

在helper-proto.h.inc文件中的宏为：

1 2	#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)) DEF_HELPER_ATTR;

若将 DEF_HELPER_FLAGS_2(qasan_load2, TCG_CALL_NO_RWG, void, env, tl) 展开，函数声明为：void helper_qasan_load2(CPUArchState *, i32) __attribute__((noinline));。

2). 在include/exec/helper-info.h.inc函数中定义了 Helper 函数相关的结构体宏。

在helper-info.h.inc文件中的宏为：

#define DEF_HELPER_FLAGS_2(NAME, FLAGS, RET, T1, T2)                    \
    TCGHelperInfo glue(helper_info_, NAME) = {                          \
        .func = HELPER(NAME), .name = str(NAME),                        \
        .flags = FLAGS | dh_callflag(RET),                              \
        .typemask = dh_typemask(RET, 0) | dh_typemask(T1, 1)            \
                  | dh_typemask(T2, 2)                                  \
    };

把该宏展开，如下所示：

TCGHelperInfo helper_info_qasan_load2 = {
.func = helper_qasan_load2, .name = qasan_load2,
.flags = TCG_CALL_NO_RWG | dh_callflag_void,
.typemask = ......
}

3). 在include/exec/helper-gen.h.inc函数中定义了 Helper 函数调用函数宏。

在helper-gen.h.inc文件中的宏为：

#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2)                    \
extern TCGHelperInfo glue(helper_info_, name);                          \
static inline void glue(gen_helper_, name)(dh_retvar_decl(ret)          \
    dh_arg_decl(t1, 1), dh_arg_decl(t2, 2))                             \
{                                                                       \
    tcg_gen_call2(glue(helper_info_,name).func,                         \
                  &glue(helper_info_,name), dh_retvar(ret),             \
                  dh_arg(t1, 1), dh_arg(t2, 2));                        \
}

把该宏展开，如下所示：

extern TCGHelperInfo helper_info_qasan_load2;
static inline void gen_helper_qasan_load2(TCGv_ptr arg1, TCGv_i32 arg2)
{
tcg_gen_call2(helper_info_qasan_load2.func,
&helper_info_qasan_load2, NULL,
tcgv_ptr_temp(arg1), tcgv_i32_temp(arg2));
}

最后就是HELPER 函数的实现，如下所示：

void HELPER(qasan_load2)(CPUArchState *env, target_ulong addr) {
......
}

宏展开后变为：
void helper_qasan_load2(CPUArchState *env, target_ulong addr) {
......
}

当需要调用 helper 函数时，调用的不是helper_qasan_load2(x, x)，而是gen_helper_qasan_load2(x, x)。

accel/tcg/tcg-runtime.c文件patch 内容如下所示：

diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index fa7ed9739c..56ebdb1261 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -31,6 +31,292 @@
 #include "exec/helper-info.c.inc"
 #undef  HELPER_H
 
+#include "qemuafl/common.h"
+#include "qemuafl/qemu-ijon-support.h"
+
+uint32_t afl_hash_ip(uint64_t);
+
+void HELPER(ijon_func_call)(target_ulong var_addr, target_ulong var_len, target_ulong itype, target_ulong idx)
+{
+  uint64_t buf = 0;
+  memcpy(&buf, var_addr, var_len);
+  ijon_dispatch(itype, idx, buf);
+  fprintf(stderr, "trigger ijon: addr=0x%016" PRIx64 " tag=%s value %ld\n", var_addr, ijon_to_str(itype), buf);
+}
+
+void HELPER(afl_entry_routine)(CPUArchState *env) {
+
+  afl_forkserver(env_cpu(env));
+
+}
+
...
+
+#include 
+#include "linux-user/qemu.h" /* access_ok decls. */
+
+/*
+static int area_is_mapped(void *ptr, size_t len) {
+
+  char *p = ptr;
+  char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
+
+  int r = msync(page, (p - page) + len, MS_ASYNC);
+  if (r < 0) return errno != ENOMEM;
+  return 1;
+
+}
+*/
+
... 
+/////////////////////////////////////////////////
+//                   QASAN
+/////////////////////////////////////////////////
+
+#include "qemuafl/qasan-qemu.h"
+
+// options
+int qasan_max_call_stack = 16; // QASAN_MAX_CALL_STACK
+int qasan_symbolize = 1; // QASAN_SYMBOLIZE
+int use_qasan = 0;
+
+__thread int qasan_disabled;
+
+__thread struct shadow_stack qasan_shadow_stack;
+
+#ifdef ASAN_GIOVESE
+
+#ifndef DO_NOT_USE_QASAN
+
+#include "qemuafl/asan-giovese-inl.h"
+
+#include 
+#include 
+
...
+
 int32_t HELPER(rem_i32)(int32_t arg1, int32_t arg2)

tcg-runtime.c 原本用于实现 TCG Helper 函数。AFL 在该文件中增加了相关 Helper 函数的实现代码。

accel/tcg/translator.c文件patch 内容如下所示：

diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index ef1538b4fc..f33048f522 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -21,6 +21,8 @@
 #include "disas/disas.h"
 #include "tb-internal.h"
 
+#include "qemuafl/common.h"
+
 static void set_can_do_io(DisasContextBase *db, bool val)
 {
     QEMU_BUILD_BUG_ON(sizeof_field(CPUState, neg.can_do_io) != 1);
@@ -167,6 +169,34 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
             plugin_gen_insn_start(cpu, db);
         }
 
+        if (db->pc_next == afl_entry_point) {
+            static bool first = true;
+            /*
+             * We guard this section since we flush the translation cache after
+             * we load the configuration, which in turn means we will need to
+             * re-translate our block. If we were to perform this flush every
+             * time (rather than just when our configuration is first loaded),
+             * we would just end up translation this block repeatedly.
+             */
+            if (first) {
+                afl_setup();
+                /*
+                 * We flush the translation cache here since we may already have
+                 * translated some blocks and included instrumentation in them
+                 * before we have processed the configuration from the
+                 * environment variables which configures which ranges to
+                 * include and exclude. Therefore we may have some blocks in our
+                 * cache which are incorrectly instrumented and cause some
+                 * fuzzing stability or performance problems.
+                 */
+                tb_flush(cpu);
+                first = false;
+            }
+            gen_helper_afl_entry_routine(cpu_env);
+        } else if (db->pc_next == afl_exit_point) {
+            _exit(0);
+        }
+
         /*
          * Disassemble one instruction.  The translate_insn hook should
          * update db->pc_next and db->is_jmp to indicate what should be

QEMU 进行 Guest CPU 指令 → TCG IR（中间指令） 翻译的主要流程代码位于 translator.c 的 translator_loop 函数中。

AFL 在该函数中添加了入口流程的代码，当翻译的地址为afl_entry_point时，则调用gen_helper_afl_entry_routine函数，而该函数就是 AFL forkserver 模式通信的核心函数。

AFL forkserver 工作模式如下：

AFL 首先 fork 出一个子进程，随后创建两个管道，一个负责输入，一个负责输出。默认情况下，这两个管道的描述符为：FORKSRV_FD, FORKSRV_FD+1。

在qemuafl/imported/config.h中定义了：#define FORKSRV_FD 198。

其中FORKSRV_FD负责 AFL->QEMU 通信，FORKSRV_FD+1负责QEMU->AFL 通信。

创建该管道的子进程使用 execve 执行 QEMU，QEMU 进程将继承这两个管道，从而实现 AFL 和 QEMU 的进程间通信。

gen_helper_afl_entry_routine函数的实现代码为：

1
2
3

void HELPER(afl_entry_routine)(CPUArchState *env) {
  afl_forkserver(env_cpu(env));
}

实际上调用的是 afl_forkserver 函数，该函数用于与 AFL 进行握手。若握手失败，则退出并进入原本的 QEMU 流程；若握手成功，则该进程保持与 AFL 通信，并 fork 出一个子进程以继续 QEMU 的后续流程。

由于是 fork 出的子进程，将完全拷贝一份内存数据，且不影响父进程的内存空间结构。AFL 可借此快速执行每次 fuzz 流程。

在 afl_forkserver 函数中，还会通过 afl_wait_tsl 函数接收子进程指令翻译的情况，并同步至父进程。这样父进程下次 fork 的子进程可直接执行 Host 机器码，无需重复指令翻译流程，从而大幅提升 QEMU 仿真速度。

accel/tcg/translate-all.c文件patch 内容如下所示：

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index a497c54b80..7a6554b730 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -67,6 +67,104 @@
 #include "tcg/perf.h"
 #include "tcg/insn-start-words.h"
 
+#include "qemuafl/common.h"
+#include "tcg/tcg-op.h"
+#include "qemuafl/imported/afl_hash.h"
+
+#include 
+
+__thread int cur_block_is_good;
+
+static int afl_track_unstable_log_fd(void) {
+    static bool initialized = false;
+    static int track_fd = -1;
+    if (unlikely(!initialized)) {
+        char * fname = getenv("AFL_QEMU_TRACK_UNSTABLE");
+        if (fname != NULL) {
+            track_fd = open(fname, O_WRONLY | O_APPEND | O_CREAT, S_IRUSR);
+        }
+        initialized = true;
+        if (track_fd > 0) dprintf(track_fd, "QEMU UNSTABLE TRACKING ENABLED\n");
+    }
+    return track_fd;
+}
+
+void HELPER(afl_maybe_log)(target_ulong cur_loc) {
+  register uintptr_t afl_idx = cur_loc ^ afl_prev_loc;
+
+  INC_AFL_AREA(afl_idx);
+
+  // afl_prev_loc = ((cur_loc & (MAP_SIZE - 1) >> 1)) |
+  //                ((cur_loc & 1) << ((int)ceil(log2(MAP_SIZE)) -1));
+  afl_prev_loc = cur_loc >> 1;
+}
+
+void HELPER(afl_maybe_log2)(target_ulong cur_loc) {
+  register uintptr_t afl_idx = cur_loc;
+  INC_AFL_AREA(afl_idx);
+}
+
+void HELPER(afl_maybe_log_trace)(target_ulong cur_loc) {
+  register uintptr_t afl_idx = cur_loc;
+  INC_AFL_AREA(afl_idx);
+}
+
+static target_ulong pc_hash(target_ulong x) {
+    x = ((x >> 16) ^ x) * 0x45d9f3b;
+    x = ((x >> 16) ^ x) * 0x45d9f3b;
+    x = (x >> 16) ^ x;
+    return x;
+}
+
+/* Generates TCG code for AFL's tracing instrumentation. */
+static void afl_gen_trace(target_ulong cur_loc) {
+
+  /* Optimize for cur_loc > afl_end_code, which is the most likely case on
+     Linux systems. */
+
+  cur_block_is_good = afl_must_instrument(cur_loc);
+
+  if (!cur_block_is_good)
+    return;
+
+  /* Looks like QEMU always maps to fixed locations, so ASLR is not a
+     concern. Phew. But instruction addresses may be aligned. Let's mangle
+     the value to get something quasi-uniform. */
+
+  if (block_cov) {
+
+    cur_loc = block_id;
+    ++block_id;
+    if (block_id >= MAP_SIZE) block_id = 5;
+
+    TCGv cur_loc_v = tcg_const_tl(cur_loc);
+    gen_helper_afl_maybe_log2(cur_loc_v);
+    tcg_temp_free(cur_loc_v);
+
+  } else {
+
+    // cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
+    // cur_loc &= MAP_SIZE - 1;
+    cur_loc = (uintptr_t)(afl_hash_ip((uint64_t)cur_loc));
+    cur_loc &= (MAP_SIZE - 1);
+
+    /* Implement probabilistic instrumentation by looking at scrambled block
+       address. This keeps the instrumented locations stable across runs. */
+
+    if (cur_loc >= afl_inst_rms) return;
+
+    TCGv cur_loc_v = tcg_const_tl(cur_loc);
+    if (unlikely(afl_track_unstable_log_fd() >= 0)) {
+      gen_helper_afl_maybe_log_trace(cur_loc_v);
+    } else {
+      gen_helper_afl_maybe_log(cur_loc_v);
+    }
+    tcg_temp_free(cur_loc_v);
+
+  }
+
+}
+
 TBContext tb_ctx;
 
 /*
@@ -276,6 +374,7 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
     tcg_func_start(tcg_ctx);
 
     CPUState *cs = env_cpu(env);
+    afl_gen_trace(pc);
     tcg_ctx->cpu = cs;
     cs->cc->tcg_ops->translate_code(cs, tb, max_insns, pc, host_pc);
 
@@ -283,9 +382,122 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
     tcg_ctx->cpu = NULL;
     *max_insns = tb->icount;
 
+    /* If we are tracking block instability, then since afl-fuzz will log the ids
+       of the unstable blocks, in fuzzer_stats, we must log these alongside the
+       instruction pointer so that the user can associate these back with the
+       actual binary */
+    int track_fd = afl_track_unstable_log_fd();
+    if (unlikely(track_fd >= 0)) {
+      uint64_t  ip = (uint64_t)pc;
+      uintptr_t block_id = (uintptr_t)(afl_hash_ip(ip));
+      block_id &= (MAP_SIZE - 1);
+      dprintf(track_fd, "BLOCK ID: 0x%016" PRIx64 ", PC: 0x%016zx-0x%016zx\n",
+              block_id, ip, ip + tb->size);
+    }
+
     return tcg_gen_code(tcg_ctx, tb, pc);
 }
 
+/* Called with mmap_lock held for user mode emulation.  */
+TranslationBlock *afl_gen_edge(CPUState *cpu, unsigned long afl_id)
+{
+    CPUArchState *env = cpu->env_ptr;
+    TranslationBlock *tb;
+    tcg_insn_unit *gen_code_buf;
+    int gen_code_size, search_size;
+
+    assert_memory_lock();
+
+ buffer_overflow1:
+    tb = tcg_tb_alloc(tcg_ctx);
+    if (unlikely(!tb)) {
+        /* flush must be done */
+        tb_flush(cpu);
+        mmap_unlock();
+        /* Make the execution loop process the flush as soon as possible.  */
+        cpu->exception_index = EXCP_INTERRUPT;
+        cpu_loop_exit(cpu);
+    }
+
+    gen_code_buf = tcg_ctx->code_gen_ptr;
+    tb->tc.ptr = gen_code_buf;
+    tb->pc = 0;
+    tb->cs_base = 0;
+    tb->flags = 0;
+    tb->cflags = 0;
+    tb->trace_vcpu_dstate = *cpu->trace_dstate;
+    tcg_ctx->tb_cflags = 0;
+
+    tcg_func_start(tcg_ctx);
+
+    tcg_ctx->cpu = env_cpu(env);
+
+    target_ulong afl_loc = afl_id & (MAP_SIZE -1);
+    //*afl_dynamic_size = MAX(*afl_dynamic_size, afl_loc);
+    TCGv tmp0 = tcg_const_tl(afl_loc);
+    if (block_cov) 
+      gen_helper_afl_maybe_log2(tmp0);
+    else
+      gen_helper_afl_maybe_log(tmp0);
+    tcg_temp_free(tmp0);
+    tcg_gen_goto_tb(0);
+    tcg_gen_exit_tb(tb, 0);
+
+    tcg_ctx->cpu = NULL;
+
+    trace_translate_block(tb, tb->pc, tb->tc.ptr);
+
+    /* generate machine code */
+    tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
+    tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID;
+    tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset;
+    if (TCG_TARGET_HAS_direct_jump) {
+        tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg;
+        tcg_ctx->tb_jmp_target_addr = NULL;
+    } else {
+        tcg_ctx->tb_jmp_insn_offset = NULL;
+        tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg;
+    }
+
+    /* ??? Overflow could be handled better here.  In particular, we
+       don't need to re-do gen_intermediate_code, nor should we re-do
+       the tcg optimization currently hidden inside tcg_gen_code.  All
+       that should be required is to flush the TBs, allocate a new TB,
+       re-initialize it per above, and re-do the actual code generation.  */
+    gen_code_size = tcg_gen_code(tcg_ctx, tb);
+    if (unlikely(gen_code_size < 0)) {
+        goto buffer_overflow1;
+    }
+    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
+    if (unlikely(search_size < 0)) {
+        goto buffer_overflow1;
+    }
+    tb->tc.size = gen_code_size;
+
+    qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
+        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
+                 CODE_GEN_ALIGN));
+
+    /* init jump list */
+    qemu_spin_init(&tb->jmp_lock);
+    tb->jmp_list_head = (uintptr_t)NULL;
+    tb->jmp_list_next[0] = (uintptr_t)NULL;
+    tb->jmp_list_next[1] = (uintptr_t)NULL;
+    tb->jmp_dest[0] = (uintptr_t)NULL;
+    tb->jmp_dest[1] = (uintptr_t)NULL;
+
+    /* init original jump addresses which have been set during tcg_gen_code() */
+    if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
+        tb_reset_jump(tb, 0);
+    }
+    if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
+        tb_reset_jump(tb, 1);
+    }
+
+    return tb;
+}
+
+
 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,

translate-all.c 文件主要用于将 TCG IR 翻译成 Host 机器码。

该 patch 将 AFL 的 qemu-mode 插桩深度整合进 TCG 代码，使 QEMU 在翻译TB(TCG Block，存储着 TCG IR)时自动生成各种覆盖记录，并添加了 edge TB、生存期概率插桩、不稳定 block 追踪等 AFL 功能。

accel/tcg/cpu-exec.c文件patch 内容如下所示：

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index ef3d967e3a..50435b8135 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -45,6 +45,1581 @@
 #include "internal-common.h"
 #include "internal-target.h"
 
+#include "qemuafl/common.h"
+#include "qemuafl/imported/snapshot-inl.h"
+#include "qemuafl/qemu-ijon-support.h"
+
+#include 
+#include 
+#ifndef AFL_QEMU_STATIC_BUILD
+  #include 
+#endif
+
+/***************************
+ * VARIOUS AUXILIARY STUFF *
+ ***************************/
+
+/* This is equivalent to afl-as.h: */
+
+static unsigned char
+               dummy[MAP_SIZE]; /* costs MAP_SIZE but saves a few instructions */
+unsigned char *afl_area_ptr = dummy;          /* Exported for afl_gen_trace */
+
+/* Exported variables populated by the code patched into elfload.c: */
+
+abi_ulong afl_entry_point,                      /* ELF entry point (_start) */
+    afl_exit_point,                             /* ELF exit point           */
+    afl_start_code,                             /* .text start pointer      */
+    afl_end_code;                               /* .text end pointer        */
+
+struct vmrange* afl_instr_code;
+
+abi_ulong    afl_persistent_addr, afl_persistent_ret_addr;
+unsigned int afl_persistent_cnt;
+
+unsigned int block_id = 5;
+
+u8 afl_compcov_level, block_cov;
+
+__thread abi_ulong afl_prev_loc;
+
+struct cmp_map *__afl_cmp_map;
+
+/* Set in the child process in forkserver mode: */
+
+static int forkserver_installed = 0;
+static int disable_caching = 0;
+
+unsigned char afl_fork_child;
+unsigned int  afl_forksrv_pid;
+unsigned char is_persistent;
+target_long   persistent_stack_offset;
+unsigned char persistent_first_pass = 1;
+unsigned char persistent_exits;
+unsigned char persistent_save_gpr;
+unsigned char persistent_memory;
+int           persisent_retaddr_offset;
+
+struct api_regs saved_regs;
+
+u8 * shared_buf;
+u32 *shared_buf_len;
+u8   sharedmem_fuzzing;
+
+afl_persistent_hook_fn afl_persistent_hook_ptr;
+
+/* Instrumentation ratio: */
+
+unsigned int afl_inst_rms = MAP_SIZE;         /* Exported for afl_gen_trace */
+
+/* Function declarations. */
+
+static void afl_wait_tsl(CPUState *, int);
+static void afl_request_tsl(target_ulong, target_ulong, uint32_t, uint32_t,
+                            TranslationBlock *, int);
+
+/* Data structures passed around by the translate handlers: */
+
+struct afl_tb {
+
+  target_ulong pc;
+  target_ulong cs_base;
+  uint32_t     flags;
+  uint32_t     cf_mask;
+
+};
+
+struct afl_chain {
+
+  struct afl_tb last_tb;
+  uint32_t      cf_mask;
+  int           tb_exit;
+
+};
+
+struct afl_tsl {
+
+  struct afl_tb tb;
+  struct afl_chain chain;
+  char is_chain;
+
+};
+
+/* Some forward decls: */
+
+static inline TranslationBlock *tb_find(CPUState *, TranslationBlock *, int,
+                                        uint32_t);
+static inline void              tb_add_jump(TranslationBlock *tb, int n,
+                                            TranslationBlock *tb_next);
+static void                     afl_map_shm_fuzz(void);
+
+/*************************
+ * ACTUAL IMPLEMENTATION *
+ *************************/
+
+/* Snapshot memory */
+
+struct saved_region {
+
+  void* addr;
+  size_t size;
+  void* saved;
+
+};
+
+abi_ulong saved_brk;
+int lkm_snapshot;
+struct saved_region* memory_snapshot;
+size_t memory_snapshot_len;
+
...
+void afl_setup(void) {
+
+  char *id_str = getenv(SHM_ENV_VAR), *inst_r = getenv("AFL_INST_RATIO");
+
+  int shm_id;
+
+  if (inst_r) {
+
+    unsigned int r;
+
+    r = atoi(inst_r);
+
+    if (r > 100) r = 100;
+    if (!r) r = 1;
+
+    afl_inst_rms = MAP_SIZE * r / 100;
+
+  }
+
+  if (id_str) {
+
+    shm_id = atoi(id_str);
+    afl_area_ptr = shmat(shm_id, NULL, 0);
+
+    if (afl_area_ptr == (void *)-1) exit(1);
+
+    /* With AFL_INST_RATIO set to a low value, we want to touch the bitmap
+       so that the parent doesn't give up on us. */
+
+    if (inst_r) afl_area_ptr[0] = 1;
+
+  }
+
+  disable_caching = getenv("AFL_QEMU_DISABLE_CACHE") != NULL;
+
+  if (getenv("___AFL_EINS_ZWEI_POLIZEI___")) {  // CmpLog forkserver
+
+    id_str = getenv(CMPLOG_SHM_ENV_VAR);
+
+    if (id_str) {
+
+      u32 shm_id = atoi(id_str);
+
+      __afl_cmp_map = shmat(shm_id, NULL, 0);
+
+      if (__afl_cmp_map == (void *)-1) exit(1);
+
+    }
+
+  }
+
+  if (getenv("AFL_INST_LIBS")) {
+
+    afl_start_code = 0;
+    afl_end_code = (abi_ulong)-1;
+
+  }
+
+  if (getenv("AFL_CODE_START"))
+    afl_start_code = strtoll(getenv("AFL_CODE_START"), NULL, 16);
+  if (getenv("AFL_CODE_END"))
+    afl_end_code = strtoll(getenv("AFL_CODE_END"), NULL, 16);
+
+  int have_names = 0;
+  if (getenv("AFL_QEMU_INST_RANGES")) {
+    char *str = getenv("AFL_QEMU_INST_RANGES");
+    char *saveptr1, *saveptr2 = NULL, *save_pt1 = NULL;
+    char *pt1, *pt2, *pt3 = NULL;
+
+    while (1) {
+
+      pt1 = strtok_r(str, ",", &saveptr1);
+      if (pt1 == NULL) break;
+      str = NULL;
+      save_pt1 = strdup(pt1);
+
+      pt2 = strtok_r(pt1, "-", &saveptr2);
+      pt3 = strtok_r(NULL, "-", &saveptr2);
+
+      struct vmrange* n = calloc(1, sizeof(struct vmrange));
+      n->next = afl_instr_code;
+
+      if (pt3 == NULL) { // filename
+        have_names = 1;
+        n->start = (target_ulong)-1;
+        n->end = 0;
+        n->name = save_pt1;
+      } else {
+        n->start = strtoull(pt2, NULL, 16);
+        n->end = strtoull(pt3, NULL, 16);
+        if (n->start && n->end) {
+          n->name = NULL;
+          free(save_pt1);
+        } else {
+          have_names = 1;
+          n->start = (target_ulong)-1;
+          n->end = 0;
+          n->name = save_pt1;
+        }
+      }
+
+      afl_instr_code = n;
+
+    }
+  }
+
+  if (getenv("AFL_QEMU_EXCLUDE_RANGES")) {
+    char *str = getenv("AFL_QEMU_EXCLUDE_RANGES");
+    char *saveptr1, *saveptr2 = NULL, *save_pt1;
+    char *pt1, *pt2, *pt3 = NULL;
+
+    while (1) {
+
+      pt1 = strtok_r(str, ",", &saveptr1);
+      if (pt1 == NULL) break;
+      str = NULL;
+      save_pt1 = strdup(pt1);
+
+      pt2 = strtok_r(pt1, "-", &saveptr2);
+      pt3 = strtok_r(NULL, "-", &saveptr2);
+
+      struct vmrange* n = calloc(1, sizeof(struct vmrange));
+      n->exclude = true; // These are "exclusion" regions.
+      n->next = afl_instr_code;
+
+      if (pt3 == NULL) { // filename
+        have_names = 1;
+        n->start = (target_ulong)-1;
+        n->end = 0;
+        n->name = save_pt1;
+      } else {
+        n->start = strtoull(pt2, NULL, 16);
+        n->end = strtoull(pt3, NULL, 16);
+        if (n->start && n->end) {
+          n->name = NULL;
+          free(save_pt1);
+        } else {
+          have_names = 1;
+          n->start = (target_ulong)-1;
+          n->end = 0;
+          n->name = save_pt1;
+        }
+      }
+
+      afl_instr_code = n;
+
+    }
+  }
+
+  if (have_names) {
+    GSList *map_info = read_self_maps();
+    for (GSList *s = map_info; s; s = g_slist_next(s)) {
+      MapInfo *e = (MapInfo *) s->data;
+
+      if (h2g_valid(e->start)) {
+        unsigned long min = e->start;
+        unsigned long max = e->end;
+        int flags = page_get_flags(h2g(min));
+
+        max = h2g_valid(max - 1) ? max : (uintptr_t) AFL_G2H(GUEST_ADDR_MAX) + 1;
+
+        if (page_check_range(h2g(min), max - min, flags) == -1) {
+          continue;
+        }
+
+        // Now that we have a valid guest address region, compare its
+        // name against the names we care about:
+        target_ulong gmin = h2g(min);
+        target_ulong gmax = h2g(max);
+
+        struct vmrange* n = afl_instr_code;
+        while (n) {
+          if (n->name && strstr(e->path, n->name)) {
+            if (gmin < n->start) n->start = gmin;
+            if (gmax > n->end) n->end = gmax;
+            break;
+          }
+          n = n->next;
+        }
+      }
+    }
+    free_self_maps(map_info);
+  }
+
+  if (getenv("AFL_DEBUG") && afl_instr_code) {
+    struct vmrange* n = afl_instr_code;
+    while (n) {
+      if (n->exclude) {
+        fprintf(stderr, "Exclude range: 0x%lx-0x%lx (%s)\n",
+                (unsigned long)n->start, (unsigned long)n->end,
+                n->name ? n->name : "");
+      } else {
+        fprintf(stderr, "Instrument range: 0x%lx-0x%lx (%s)\n",
+                (unsigned long)n->start, (unsigned long)n->end,
+                n->name ? n->name : "");
+      }
+      n = n->next;
+    }
+  }
+
+  /* Maintain for compatibility */
+  if (getenv("AFL_QEMU_COMPCOV")) { afl_compcov_level = 1; }
+  if (getenv("AFL_COMPCOV_LEVEL")) {
+
+    afl_compcov_level = atoi(getenv("AFL_COMPCOV_LEVEL"));
+
+  }
+
+  /* pthread_atfork() seems somewhat broken in util/rcu.c, and I'm
+     not entirely sure what is the cause. This disables that
+     behaviour, and seems to work alright? */
+
+  rcu_disable_atfork();
+
+  if (getenv("AFL_QEMU_PERSISTENT_HOOK")) {
+
+#ifdef AFL_QEMU_STATIC_BUILD
+
+    fprintf(stderr,
+            "[AFL] ERROR: you cannot use AFL_QEMU_PERSISTENT_HOOK when "
+            "afl-qemu-trace is static\n");
+    exit(1);
+
+#else
+
+    persistent_save_gpr = 1;
+
+    void *plib = dlopen(getenv("AFL_QEMU_PERSISTENT_HOOK"), RTLD_NOW);
+    if (!plib) {
+
+      fprintf(stderr, "[AFL] ERROR: invalid AFL_QEMU_PERSISTENT_HOOK=%s - %s\n",
+              getenv("AFL_QEMU_PERSISTENT_HOOK"),
+              dlerror());
+      exit(1);
+
+    }
+
+    int (*afl_persistent_hook_init_ptr)(void) =
+        dlsym(plib, "afl_persistent_hook_init");
+    if (afl_persistent_hook_init_ptr)
+      sharedmem_fuzzing = afl_persistent_hook_init_ptr();
+
+    afl_persistent_hook_ptr = dlsym(plib, "afl_persistent_hook");
+    if (!afl_persistent_hook_ptr) {
+
+      fprintf(stderr,
+              "[AFL] ERROR: failed to find the function "
+              "\"afl_persistent_hook\" in %s\n",
+              getenv("AFL_QEMU_PERSISTENT_HOOK"));
+      exit(1);
+
+    }
+
+#endif
+
+  }
+
+  if (__afl_cmp_map) return; // no persistent for cmplog
+
+  is_persistent = getenv("AFL_QEMU_PERSISTENT_ADDR") != NULL;
+
+  if (is_persistent)
+    afl_persistent_addr = strtoll(getenv("AFL_QEMU_PERSISTENT_ADDR"), NULL, 0);
+
+  if (getenv("AFL_QEMU_PERSISTENT_RET"))
+    afl_persistent_ret_addr =
+        strtoll(getenv("AFL_QEMU_PERSISTENT_RET"), NULL, 0);
+  /* If AFL_QEMU_PERSISTENT_RET is not specified patch the return addr */
+
+  if (getenv("AFL_QEMU_PERSISTENT_GPR")) persistent_save_gpr = 1;
+  if (getenv("AFL_QEMU_PERSISTENT_MEM"))
+    persistent_memory = 1;
+
+  if (getenv("AFL_QEMU_PERSISTENT_RETADDR_OFFSET"))
+    persisent_retaddr_offset =
+        strtoll(getenv("AFL_QEMU_PERSISTENT_RETADDR_OFFSET"), NULL, 0);
+
+  if (getenv("AFL_QEMU_PERSISTENT_CNT"))
+    afl_persistent_cnt = strtoll(getenv("AFL_QEMU_PERSISTENT_CNT"), NULL, 0);
+  else
+    afl_persistent_cnt = 0;
+
+  if (getenv("AFL_QEMU_PERSISTENT_EXITS")) persistent_exits = 1;
+
+  // TODO persistent exits for other archs not x86
+  // TODO persistent mode for other archs not x86
+  // TODO cmplog rtn for arm
+
+  if (getenv("AFL_QEMU_SNAPSHOT")) {
+
+    is_persistent = 1;
+    persistent_save_gpr = 1;
+    persistent_memory = 1;
+    persistent_exits = 1;
+
+    if (afl_persistent_addr == 0)
+      afl_persistent_addr = strtoll(getenv("AFL_QEMU_SNAPSHOT"), NULL, 0);
+
+  }
+
+  if (persistent_memory && afl_snapshot_init() >= 0)
+    lkm_snapshot = 1;
+
+  if (getenv("AFL_DEBUG")) {
+    if (is_persistent)
+      fprintf(stderr, "Persistent: 0x%lx [0x%lx] %s%s%s\n",
+              (unsigned long)afl_persistent_addr,
+              (unsigned long)afl_persistent_ret_addr,
+              (persistent_save_gpr ? "gpr ": ""),
+              (persistent_memory ? "mem ": ""),
+              (persistent_exits ? "exits ": ""));
+  }
+
+  qemu_ijon_init();
+
+}
+
+/* Fork server logic, invoked once we hit _start. */
+
+void afl_forkserver(CPUState *cpu) {
+
+  if (forkserver_installed == 1) return;
+  forkserver_installed = 1;
+
+  if (getenv("AFL_QEMU_DEBUG_MAPS")) open_self_maps(cpu->env_ptr, 1);
+
+  u32 __afl_old_forkserver = 0;
+  pid_t child_pid;
+  int   t_fd[2];
+  u8    child_stopped = 0;
+  u32   was_killed;
+  u32 version = 0x41464c00 + FS_NEW_VERSION_MAX;
+  u32 tmp = version ^ 0xffffffff, status2, status = version;
+  u8 *msg = (u8 *)&status;
+  u8 *reply = (u8 *)&status2;
+
+  if (getenv("AFL_DEBUG"))
+    fprintf(stderr, "Debug: Sending status 0x%08x\n", status);
+
+  if (getenv("AFL_OLD_FORKSERVER")) {
+
+    __afl_old_forkserver = 1;
+    status = 0;
+
+    fprintf(stderr, "The current version of afl++ qemu mode "
+      "supports forkserver v1, but afl-fuzz still retains "
+      "support for the old forkserver (qemu) version\n");
+
+  }
+
+  /* Tell the parent that we're alive. If the parent doesn't want
+     to talk, assume that we're not running in forkserver mode. */
+
+  if (write(FORKSRV_FD + 1, msg, 4) != 4) return;
+
+  afl_forksrv_pid = getpid();
+
+  int first_run = 1;
+
+  if (!__afl_old_forkserver) {
+
+    if (read(FORKSRV_FD, reply, 4) != 4) { _exit(1); }
+    if (tmp != status2) {
+
+      fprintf(stderr, "wrong forkserver message from AFL++ tool");
+      _exit(1);
+
+    }
+
+    // send the set/requested options to forkserver
+    status = FS_NEW_OPT_MAPSIZE;  // we always send the map size
+    if (lkm_snapshot) status |= FS_OPT_SNAPSHOT;
+    if (sharedmem_fuzzing) status |= FS_NEW_OPT_SHDMEM_FUZZ;
+
+    u32 __afl_map_size = MAP_SIZE;
+
+    if (use_ijon) {
+
+      __afl_map_size = (((__afl_map_size + 63) >> 6) << 6);
+      __afl_map_size += MAP_SIZE_IJON_MAP + MAP_SIZE_IJON_BYTES;
+
+      ijon_map_ptr = afl_area_ptr + MAP_SIZE;
+      ijon_max_ptr = (uint64_t*)(ijon_map_ptr + MAP_SIZE_IJON_MAP);
+
+      status |= FS_OPT_IJON;
+
+    }
+
+    if (write(FORKSRV_FD + 1, msg, 4) != 4) {
+
+      errno = 0;
+      _exit(1);
+
+    }
+
+    // Now send the parameters for the set options, increasing by option number
+
+    // FS_NEW_OPT_MAPSIZE - we always send the map size
+    status = __afl_map_size;
+    if (write(FORKSRV_FD + 1, msg, 4) != 4) { _exit(1); }
+
+    // send welcome message as final message
+    status = version;
+    if (write(FORKSRV_FD + 1, msg, 4) != 4) { _exit(1); }
+
+  }
+
+  // END forkserver handshake
+
+  if (sharedmem_fuzzing) { afl_map_shm_fuzz(); }
+
+  /* All right, let's await orders... */
+
+  while (1) {
+
+    /* Whoops, parent dead? */
+
+    if (read(FORKSRV_FD, &was_killed, 4) != 4) exit(2);
+
+    /* If we stopped the child in persistent mode, but there was a race
+       condition and afl-fuzz already issued SIGKILL, write off the old
+       process. */
+
+    if (child_stopped && was_killed) {
+
+      child_stopped = 0;
+      if (waitpid(child_pid, &status, 0) < 0) exit(8);
+
+    }
+
+    if (!child_stopped) {
+
+      /* Establish a channel with child to grab translation commands. We'll
+       read from t_fd[0], child will write to TSL_FD. */
+
+      if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3);
+      close(t_fd[1]);
+
+      child_pid = fork();
+      if (child_pid < 0) exit(4);
+
+      if (!child_pid) {
+
+        /* Child process. Close descriptors and run free. */
+
+        afl_fork_child = 1;
+        close(FORKSRV_FD);
+        close(FORKSRV_FD + 1);
+        close(t_fd[0]);
+        return;
+
+      }
+
+      /* Parent. */
+
+      close(TSL_FD);
+
+    } else {
+
+      /* Special handling for persistent mode: if the child is alive but
+         currently stopped, simply restart it with SIGCONT. */
+
+      kill(child_pid, SIGCONT);
+      child_stopped = 0;
+
+    }
+
+    /* Parent. */
+
+    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) exit(5);
+
+    /* Collect translation requests until child dies and closes the pipe. */
+
+    afl_wait_tsl(cpu, t_fd[0]);
+
+    /* Get and relay exit status to parent. */
+
+    if (waitpid(child_pid, &status, is_persistent ? WUNTRACED : 0) < 0) exit(6);
+
+    /* In persistent mode, the child stops itself with SIGSTOP to indicate
+       a successful run. In this case, we want to wake it up without forking
+       again. */
+
+    if (WIFSTOPPED(status))
+      child_stopped = 1;
+    else if (unlikely(first_run && is_persistent)) {
+
+      fprintf(stderr, "[AFL] ERROR: no persistent iteration executed\n");
+      exit(12);  // Persistent is wrong
+
+    }
+
+    first_run = 0;
+
+    if (write(FORKSRV_FD + 1, &status, 4) != 4) exit(7);
+
+  }
+
+}
+
+/* A simplified persistent mode handler, used as explained in
+ * llvm_mode/README.md. */
+
+static u32 cycle_cnt;
+
+void afl_persistent_iter(CPUArchState *env) {
+
+  static struct afl_tsl exit_cmd_tsl;
+
+  if (!afl_persistent_cnt || --cycle_cnt) {
+
+    if (persistent_memory) restore_memory_snapshot();
+
+    if (persistent_save_gpr && !afl_persistent_hook_ptr) {
+      afl_restore_regs(&saved_regs, env);
+    }
+
+    if (!disable_caching) {
+
+      memset(&exit_cmd_tsl, 0, sizeof(struct afl_tsl));
+      exit_cmd_tsl.tb.pc = (target_ulong)(-1);
+
+      if (write(TSL_FD, &exit_cmd_tsl, sizeof(struct afl_tsl)) !=
+          sizeof(struct afl_tsl)) {
+
+        /* Exit the persistent loop on pipe error */
+        afl_area_ptr = dummy;
+        exit(0);
+
+      }
+
+    }
+
+    // TODO use only pipe
+    raise(SIGSTOP);
+
+
+    // now we have shared_buf updated and ready to use
+    if (persistent_save_gpr && afl_persistent_hook_ptr) {
+
+      struct api_regs hook_regs = saved_regs;
+      afl_persistent_hook_ptr(&hook_regs, guest_base, shared_buf,
+                              *shared_buf_len);
+      afl_restore_regs(&hook_regs, env);
+
+    }
+
+    afl_area_ptr[0] = 1;
+    afl_prev_loc = 0;
+
+  } else {
+
+    afl_area_ptr = dummy;
+    exit(0);
+
+  }
+
+}
+
+void afl_persistent_loop(CPUArchState *env) {
+
+  if (!afl_fork_child) return;
+
+  if (persistent_first_pass) {
+
+    /* Make sure that every iteration of __AFL_LOOP() starts with a clean slate.
+       On subsequent calls, the parent will take care of that, but on the first
+       iteration, it's our job to erase any trace of whatever happened
+       before the loop. */
+
+    if (is_persistent) {
+
+      memset(afl_area_ptr, 0, MAP_SIZE);
+      afl_area_ptr[0] = 1;
+      afl_prev_loc = 0;
+
+    }
+
+    if (persistent_memory) collect_memory_snapshot();
+
+    if (persistent_save_gpr) {
+
+      afl_save_regs(&saved_regs, env);
+
+      if (afl_persistent_hook_ptr) {
+
+        struct api_regs hook_regs = saved_regs;
+        afl_persistent_hook_ptr(&hook_regs, guest_base, shared_buf,
+                                *shared_buf_len);
+        afl_restore_regs(&hook_regs, env);
+
+      }
+
+    }
+
+    cycle_cnt = afl_persistent_cnt;
+    persistent_first_pass = 0;
+    persistent_stack_offset = TARGET_LONG_BITS / 8;
+
+    return;
+
+  }
+
+  if (is_persistent) {
+
+    afl_persistent_iter(env);
+
+  }
+
+}
+
+/* This code is invoked whenever QEMU decides that it doesn't have a
+   translation of a particular block and needs to compute it, or when it
+   decides to chain two TBs together. When this happens, we tell the parent to
+   mirror the operation, so that the next fork() has a cached copy. */
+
+static void afl_request_tsl(target_ulong pc, target_ulong cb, uint32_t flags,
+                            uint32_t cf_mask, TranslationBlock *last_tb,
+                            int tb_exit) {
+
+  if (disable_caching) return;
+
+  struct afl_tsl t;
+
+  if (!afl_fork_child) return;
+
+  t.tb.pc = pc;
+  t.tb.cs_base = cb;
+  t.tb.flags = flags;
+  t.tb.cf_mask = cf_mask;
+  t.is_chain = (last_tb != NULL);
+
+  if (t.is_chain) {
+
+    t.chain.last_tb.pc = last_tb->pc;
+    t.chain.last_tb.cs_base = last_tb->cs_base;
+    t.chain.last_tb.flags = last_tb->flags;
+    t.chain.cf_mask = cf_mask;
+    t.chain.tb_exit = tb_exit;
+
+  }
+
+  if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl))
+    return;
+
+}
+
+static inline TranslationBlock *
+afl_tb_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base,
+                     uint32_t flags, uint32_t cf_mask)
+{
+    TranslationBlock *tb;
+    uint32_t hash;
+
+    hash = tb_jmp_cache_hash_func(pc);
+    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
+
+    cf_mask &= ~CF_CLUSTER_MASK;
+    cf_mask |= cpu->cluster_index << CF_CLUSTER_SHIFT;
+
+    if (likely(tb &&
+               tb->pc == pc &&
+               tb->cs_base == cs_base &&
+               tb->flags == flags &&
+               tb->trace_vcpu_dstate == *cpu->trace_dstate &&
+               (tb_cflags(tb) & (CF_HASH_MASK | CF_INVALID)) == cf_mask)) {
+        return tb;
+    }
+    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
+    if (tb == NULL) {
+        return NULL;
+    }
+    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
+    return tb;
+}
+
+/* This is the other side of the same channel. Since timeouts are handled by
+   afl-fuzz simply killing the child, we can just wait until the pipe breaks. */
+
+static void afl_wait_tsl(CPUState *cpu, int fd) {
+
+  struct afl_tsl t;
+  TranslationBlock *tb, *last_tb;
+
+  if (disable_caching) return;
+
+  while (1) {
+
+    u8 invalid_pc = 0;
+
+    /* Broken pipe means it's time to return to the fork server routine. */
+
+    if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl)) break;
+
+    /* Exit command for persistent */
+
+    if (t.tb.pc == (target_ulong)(-1)) return;
+
+    tb = afl_tb_lookup(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags, t.tb.cf_mask);
+
+    if (!tb) {
+
+      /* The child may request to transate a block of memory that is not
+         mapped in the parent (e.g. jitted code or dlopened code).
+         This causes a SIGSEV in gen_intermediate_code() and associated
+         subroutines. We simply avoid caching of such blocks. */
+
+      if (is_valid_addr(t.tb.pc)) {
+
+        mmap_lock();
+        tb = tb_gen_code(cpu, t.tb.pc, t.tb.cs_base, t.tb.flags, t.tb.cf_mask);
+        mmap_unlock();
+
+      } else {
+
+        invalid_pc = 1;
+
+      }
+
+    }
+
+    if (t.is_chain && !invalid_pc) {
+
+      last_tb = afl_tb_lookup(cpu, t.chain.last_tb.pc,
+                                 t.chain.last_tb.cs_base,
+                                 t.chain.last_tb.flags,
+                                 t.chain.cf_mask);
+#define TB_JMP_RESET_OFFSET_INVALID 0xffff
+        if (last_tb && (last_tb->jmp_reset_offset[t.chain.tb_exit] !=
+                        TB_JMP_RESET_OFFSET_INVALID)) {
+
+          tb_add_jump(last_tb, t.chain.tb_exit, tb);
+
+        }
+
+    }
+
+  }
+
+  close(fd);
+
+}
+
 /* -icount align implementation. */
 
 typedef struct SyncClocks {
@@ -946,6 +2521,7 @@ static int __attribute__((noinline))
 cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
 {
     int ret;
+    bool was_translated = false, was_chained = false;
 
     /* if an exception is pending, we execute it here */
     while (!cpu_handle_exception(cpu, &ret)) {
@@ -985,6 +2561,7 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
 
                 mmap_lock();
                 tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
+                was_translated = true;
                 mmap_unlock();
 
                 /*
@@ -1011,6 +2588,11 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
             /* See if we can patch the calling TB. */
             if (last_tb) {
                 tb_add_jump(last_tb, tb_exit, tb);
+                was_chained = true;
+            }
+            if (was_translated || was_chained) {
+                afl_request_tsl(s.pc, s.cs_base, s.flags, s.cf_mask,
+                                was_chained ? last_tb : NULL, tb_exit);
             }
 
             cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);

cpu-exec.c 中的 cpu_exec_loop 函数为 QEMU 仿真的核心函数，调用流程大致如下：

linux-user/main.c 的 main 函数初始化结束后，调用cpu_loop，开始执行目标程序代码。
cpu_loop 函数位于linux-user/{arch}/cpu_loop.c 文件中。
cpu_loop调用accel/tcg/cpu-exec.c 文件中的cpu_exec函数，进行一些初始化后，执行到cpu_exec_setjmp函数。
cpu_exec_setjmp 函数的作用相当于使用 try … catch … 执行 cpu_exec_loop 函数。
cpu_exec_loop 函数执行代码块中的 Host 机器码，若不存在则开始翻译。
在cpu_exec_loop 函数中，调用tb_lookup根据当前 pc 地址去搜索对应的代码块。如果存在，则直接调用cpu_loop_exec_tb去执行 Host 机器码。
如果不存在对应的代码块，则调用tb_gen_code开始翻译。
在tb_gen_code中调用到setjmp_gen_code函数。
在setjmp_gen_code函数中，首先使用translate_code函数将Guest 指令翻译成 TCG IR。
translate_code函数位于target/{arch}/tcg/cpu.c 文件中。
在翻译成 IR 指令后，再调用tcg_gen_code函数，把 IR 指令翻译成 Host 机器码。

AFL 在该文件中的 patch 是将IJON、snapshot、persistent 模式等功能集成进 QEMU 当中，如下所示：

加入覆盖率共享内存（SHM）映射
加入 AFL forkserver 支持
加入持久化模式（persistent mode）支持
加入持久化模式下 snapshot（快照）/ restore（恢复）功能
加入 IJON（高级指导型 fuzzing）支持
加入比较覆盖（CmpCov）支持
加入 AFL TB 链跟踪结构（afl_tb, afl_chain 等）
加入 sharedmem fuzzing 模式支持
加入对 shared memory / registers 的 IJON hook 支持
扫描并保存 /proc/self/maps 的内存区域
在翻译完指令后，会将翻译情况传输给父进程forkserver。

接下来是tcg目录下的代码。

tcg/tcg.c文件patch 内容如下所示：

diff --git a/tcg/tcg.c b/tcg/tcg.c
index b1a7465df2..964e91ee32 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -61,6 +61,8 @@
 #include "user/guest-base.h"
 #endif
 
+#include "qemuafl/common.h"
+
 /* Forward declarations for functions declared in tcg-target.c.inc and
    used here. */
 static void tcg_target_init(TCGContext *s);
@@ -2435,6 +2437,17 @@ bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
     return TCG_TARGET_deposit_valid(type, ofs, len);
 }
 
+void afl_gen_tcg_plain_call(void *func)
+{
+    TCGOp *op = tcg_emit_op(INDEX_op_call);
+
+    TCGOP_CALLO(op) = 0;
+
+    op->args[0] = (uintptr_t)func;
+    op->args[1] = 0;
+    TCGOP_CALLI(op) = 0;
+}
+
 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);

该部分 AFL patch 主要实现一个简单接口，允许在 TCG 翻译过程中插入对指定函数的纯粹函数调用。

tcg/tcg-op.c文件patch 内容如下所示：

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index e2e25ebf7d..e42766c20d 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -32,6 +32,25 @@
 #include "trace/mem.h"
 #include "exec/plugin-gen.h"
 
+#include "qemuafl/qasan-qemu.h"
+
+#define GEN_QASAN_OP(OP) \
+void qasan_gen_##OP(TCGv addr, int off) { \
+  \
+  if (use_qasan && cur_block_is_good) \
+    gen_helper_qasan_##OP(cpu_env, addr); \
+ \
+}
+
+GEN_QASAN_OP(load1)
+GEN_QASAN_OP(load2)
+GEN_QASAN_OP(load4)
+GEN_QASAN_OP(load8)
+GEN_QASAN_OP(store1)
+GEN_QASAN_OP(store2)
+GEN_QASAN_OP(store4)
+GEN_QASAN_OP(store8)
+

@@ -2836,9 +2857,18 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     addr = plugin_prep_mem_callbacks(addr);
+
+    switch (memop & MO_SIZE) {
+        case MO_64: qasan_gen_load8(addr, idx); break;
+        case MO_32: qasan_gen_load4(addr, idx); break;
+        case MO_16: qasan_gen_load2(addr, idx); break;
+        case MO_8:  qasan_gen_load1(addr, idx); break;
+        default: qasan_gen_load4(addr, idx); break;
+    }
+    
     gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
     plugin_gen_mem_callbacks(addr, info);
-
+    
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
         case MO_16:
@@ -2883,7 +2913,20 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     addr = plugin_prep_mem_callbacks(addr);
-    gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+
+    switch (memop & MO_SIZE) {
+        case MO_64: qasan_gen_store8(addr, idx); break;
+        case MO_32: qasan_gen_store4(addr, idx); break;
+        case MO_16: qasan_gen_store2(addr, idx); break;
+        case MO_8:  qasan_gen_store1(addr, idx); break;
+        default: qasan_gen_store4(addr, idx); break;
+    }
+
+    if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
+        gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
+    } else {
+        gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
+    }
     plugin_gen_mem_callbacks(addr, info);
 
     if (swap) {
@@ -2921,6 +2964,15 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     addr = plugin_prep_mem_callbacks(addr);
+
+    switch (memop & MO_SIZE) {
+        case MO_64: qasan_gen_load8(addr, idx); break;
+        case MO_32: qasan_gen_load4(addr, idx); break;
+        case MO_16: qasan_gen_load2(addr, idx); break;
+        case MO_8:  qasan_gen_load1(addr, idx); break;
+        default: qasan_gen_load8(addr, idx); break;
+    }
+    
     gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
     plugin_gen_mem_callbacks(addr, info);
 
@@ -2984,6 +3036,15 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
     }
 
     addr = plugin_prep_mem_callbacks(addr);
+
+    switch (memop & MO_SIZE) {
+        case MO_64: qasan_gen_store8(addr, idx); break;
+        case MO_32: qasan_gen_store4(addr, idx); break;
+        case MO_16: qasan_gen_store2(addr, idx); break;
+        case MO_8:  qasan_gen_store1(addr, idx); break;
+        default: qasan_gen_store8(addr, idx); break;
+    }
+
     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
     plugin_gen_mem_callbacks(addr, info);

这段代码是 QASan实现的核心部分。

简单来说，它的作用是：在 QEMU 将 Guest（客户机）的内存读写指令翻译成中间码（TCG Ops）时，强制插入一段“检查代码”。

最后就是target/{arch}/tcg目录下的代码。

target/mips/tcg/translate.c文件patch 内容如下所示：

diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index 78b848a6d9..c4d42da965 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -49,6 +49,27 @@
 STUB_HELPER(cache, TCGv_env env, TCGv val, TCGv_i32 reg)
 #endif
 
+/* MIPS_PATCH */
+#include "qemuafl/cpu-translate.h"
+
+/* MIPS_PATCH */
+#define AFL_QEMU_TARGET_MIPS_SNIPPET                                          \
+  if (is_persistent) {                                                        \
+    if (ctx->base.pc_next == afl_persistent_addr) {                           \
+      gen_helper_afl_persistent_routine(cpu_env);                             \
+                                                                              \
+      if (afl_persistent_ret_addr == 0 && !persistent_exits) {                \
+        tcg_gen_movi_tl(cpu_gpr[31], afl_persistent_addr);                    \
+      }                                                                       \
+                                                                              \
+      if (!persistent_save_gpr) afl_gen_tcg_plain_call(&afl_persistent_loop); \
+                                                                              \
+    } else if (afl_persistent_ret_addr &&                                     \
+               ctx->base.pc_next == afl_persistent_ret_addr) {                \
+      gen_goto_tb(ctx, 0, afl_persistent_addr);                               \
+    }                                                                         \
+  }
+
 enum {
     /* indirect opcode tables */
     OPC_SPECIAL  = (0x00 << 26),
@@ -1187,6 +1208,128 @@ static const char regnames_LO[][4] = {
     "LO0", "LO1", "LO2", "LO3",
 };
 
+/* MIPS_PATCH */
+void afl_save_regs(struct api_regs* r, CPUArchState *env) {
+    int i = 0;
+    int j = 0;
+    /* GP registers saving */
+    r->r0 = env->active_tc.gpr[0];
+    r->at = env->active_tc.gpr[1];
+    r->v0 = env->active_tc.gpr[2];
+    r->v1 = env->active_tc.gpr[3];
+    r->a0 = env->active_tc.gpr[4];
+    r->a1 = env->active_tc.gpr[5];
+    r->a2 = env->active_tc.gpr[6];
+    r->a3 = env->active_tc.gpr[7];
+    r->t0 = env->active_tc.gpr[8];
+    r->t1 = env->active_tc.gpr[9];
+    r->t2 = env->active_tc.gpr[10];
+    r->t3 = env->active_tc.gpr[11];
+    r->t4 = env->active_tc.gpr[12];
+    r->t5 = env->active_tc.gpr[13];
+    r->t6 = env->active_tc.gpr[14];
+    r->t7 = env->active_tc.gpr[15];
+    r->s0 = env->active_tc.gpr[16];
+    r->s1 = env->active_tc.gpr[17];
+    r->s2 = env->active_tc.gpr[18];
+    r->s3 = env->active_tc.gpr[19];
+    r->s4 = env->active_tc.gpr[20];
+    r->s5 = env->active_tc.gpr[21];
+    r->s6 = env->active_tc.gpr[22];
+    r->s7 = env->active_tc.gpr[23];
+    r->t8 = env->active_tc.gpr[24];
+    r->t9 = env->active_tc.gpr[25];
+    r->k0 = env->active_tc.gpr[26];
+    r->k1 = env->active_tc.gpr[27];
+    r->gp = env->active_tc.gpr[28];
+    r->sp = env->active_tc.gpr[29];
+    r->fp = env->active_tc.gpr[30];
+    r->ra = env->active_tc.gpr[31];
+    r->PC = env->active_tc.PC;
+#if defined(TARGET_MIPS64)
+    memcpy(r->gpr_hi, env->active_tc.gpr_hi, sizeof(r->gpr_hi));
+#endif
+    for (i = 0; i < MIPS_DSP_ACC; i++) {
+        r->HI[i] = env->active_tc.HI[i];
+        r->LO[i] = env->active_tc.LO[i];
+    }
+    /* FP registers saving */
+    for (i = 0; i < 32; i++) {
+        r->fpr[i].fd = env->active_fpu.fpr[i].fd;
+        for (j = 0; j < 2; j++) {
+            r->fpr[i].fs[j] = env->active_fpu.fpr[i].fs[j];
+        }
+        r->fpr[i].d = env->active_fpu.fpr[i].d;
+        for (j = 0; j < 2; j++) {
+            r->fpr[i].w[j] = env->active_fpu.fpr[i].w[j];
+        }
+        for (j = 0; j < MSA_WRLEN / 8; j++) {
+            r->fpr[i].wr.b[j] = env->active_fpu.fpr[i].wr.b[j];
+        }
+    }
+}
+
+/* MIPS_PATCH */
+void afl_restore_regs(struct api_regs* r, CPUArchState *env) {
+    int i = 0;
+    int j = 0;
+    /* GP registers restoring */
+    env->active_tc.gpr[0] = r->r0;
+    env->active_tc.gpr[1] = r->at;
+    env->active_tc.gpr[2] = r->v0;
+    env->active_tc.gpr[3] = r->v1;
+    env->active_tc.gpr[4] = r->a0;
+    env->active_tc.gpr[5] = r->a1;
+    env->active_tc.gpr[6] = r->a2;
+    env->active_tc.gpr[7] = r->a3;
+    env->active_tc.gpr[8] = r->t0;
+    env->active_tc.gpr[9] = r->t1;
+    env->active_tc.gpr[10] = r->t2;
+    env->active_tc.gpr[11] = r->t3;
+    env->active_tc.gpr[12] = r->t4;
+    env->active_tc.gpr[13] = r->t5;
+    env->active_tc.gpr[14] = r->t6;
+    env->active_tc.gpr[15] = r->t7;
+    env->active_tc.gpr[16] = r->s0;
+    env->active_tc.gpr[17] = r->s1;
+    env->active_tc.gpr[18] = r->s2;
+    env->active_tc.gpr[19] = r->s3;
+    env->active_tc.gpr[20] = r->s4;
+    env->active_tc.gpr[21] = r->s5;
+    env->active_tc.gpr[22] = r->s6;
+    env->active_tc.gpr[23] = r->s7;
+    env->active_tc.gpr[24] = r->t8;
+    env->active_tc.gpr[25] = r->t9;
+    env->active_tc.gpr[26] = r->k0;
+    env->active_tc.gpr[27] = r->k1;
+    env->active_tc.gpr[28] = r->gp;
+    env->active_tc.gpr[29] = r->sp;
+    env->active_tc.gpr[30] = r->fp;
+    env->active_tc.gpr[31] = r->ra;
+    env->active_tc.PC = r->PC;
+#if defined(TARGET_MIPS64)
+    memcpy(env->active_tc.gpr_hi, r->gpr_hi, sizeof(r->gpr_hi));
+#endif
+    for (i = 0; i < MIPS_DSP_ACC; i++) {
+        env->active_tc.HI[i] = r->HI[i];
+        env->active_tc.LO[i] = r->LO[i];
+    }
+    /* FP registers restoring */
+    for (i = 0; i < 32; i++) {
+        env->active_fpu.fpr[i].fd = r->fpr[i].fd;
+        for (j = 0; j < 2; j++) {
+            env->active_fpu.fpr[i].fs[j] = r->fpr[i].fs[j];
+        }
+        env->active_fpu.fpr[i].d = r->fpr[i].d;
+        for (j = 0; j < 2; j++) {
+            env->active_fpu.fpr[i].w[j] = r->fpr[i].w[j];
+        }
+        for (j = 0; j < MSA_WRLEN / 8; j++) {
+            env->active_fpu.fpr[i].wr.b[j] = r->fpr[i].wr.b[j];
+        }
+    }
+}
+
 /* General purpose registers moves. */
 void gen_load_gpr(TCGv t, int reg)
 {
@@ -15138,6 +15281,9 @@ static void mips_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     int insn_bytes;
     int is_slot;
 
+    /* MIPS_PATCH */
+    AFL_QEMU_TARGET_MIPS_SNIPPET
+
     is_slot = ctx->hflags & MIPS_HFLAG_BMASK;
     if (ctx->insn_flags & ISA_NANOMIPS32) {
         ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);

target/{arch}/tcg/translate.c 主要将目标程序的指令翻译成 TCG IR。AFL 对该文件的 patch 主要添加 AFL 持久化模式支持，包括持久化 fuzz 逻辑注入及 MIPS 完整寄存器状态的保存与恢复，使 AFL 能以高效循环方式 fuzz MIPS 程序。

由于测试案例使用 MIPS 架构，因此上述 patch 内容仅包含 MIPS 架构相关部分，其他架构暂未 patch。此外，由于暂未使用 QASAN 功能，许多 QASAN 相关 patch 亦暂未加入。