afl-gcc

ldrx30

2024-03-09

CSE

Fuzz

模糊测试插桩

想要阅读的更清楚，可以手动编译afl然后使用gdb调试，更清晰的了解每一步的结果

afl gcc && as

如何进行静态插桩

afl-gcc 是 gcc, g++, clang, clang++ 的包装器。它的作用是设置一些编译参数，然后调用这些编译器。事实上，编译出来的 afl-clang, afl-g++ 等文件都是指向 afl-gcc 的软链接。

afl-gcc 需要知道 afl-as 的路径。afl-as 是插桩器，我们将会在后文分析它的逻辑。可以通过 AFL_PATH 指定。

afl-gcc

main

afl-gcc 的入口点

int main(int argc, char** argv) {

  // isatty: 普通文件返回0，设备返回-1
  // 2: stderr  标准错误设备  
  // 环境变量：如果不是 QUIET 模式，则输出afl信息
  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <lcamtuf@google.com>\n");

  } else be_quiet = 1;

  // 判断用法，没有参数
  if (argc < 2) {

    SAYF("\n"
         "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n"
         "for gcc or clang, letting you recompile third-party code with the required\n"
         "runtime instrumentation. A common use pattern would be one of the following:\n\n"

         "  CC=%s/afl-gcc ./configure\n"
         "  CXX=%s/afl-g++ ./configure\n\n"

         "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n"
         "Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n",
         BIN_PATH, BIN_PATH);

    exit(1);

  }

  // 寻找 afl-as 路径
  find_as(argv[0]);

  // 设置afl-gcc参数
  edit_params(argc, argv);

  // execvp: 程序立即被实际命令替换。
  // 我们的程序已被完全接管，因此execvp()之后的所有内容均将execvp()执行！
  execvp(cc_params[0], (char**)cc_params);

  FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);

  return 0;
}

find_as

寻找 afl-as 路径

// argv0 是程序名，afl-gcc
static void find_as(u8* argv0) {

  // 环境变量寻找到 AFL_PATH
  u8 *afl_path = getenv("AFL_PATH");
  u8 *slash, *tmp;

  // 如果环境存在AFL_PATH 则在目录指定的path寻找
  if (afl_path) {

    tmp = alloc_printf("%s/as", afl_path);

    // access：判断as是否存在
    if (!access(tmp, X_OK)) {
      as_path = afl_path;
      ck_free(tmp);
      return;
    }

    ck_free(tmp);

  }

  // 寻找到最后一个 '/'
  // /usr/bin/afl-gcc => /afl-gcc
  slash = strrchr(argv0, '/');

  if (slash) {

    u8 *dir;

    // 这里比较奇妙
    // /usr/bin/afl-gcc => /usr/bin\0afl-gcc
    // strdup 就获得了 dir = /usr/bin
    *slash = 0;
    dir = ck_strdup(argv0);
    *slash = '/';

    tmp = alloc_printf("%s/afl-as", dir);

    if (!access(tmp, X_OK)) {
      as_path = dir;
      ck_free(tmp);
      return;
    }

    ck_free(tmp);
    ck_free(dir);

  }

  // fallback，如果前两个位置都找不到，则去编译 afl-gcc 时定义的 AFL_PATH 去找
  // AFL_PATH 由 Makefile 定义成 "/usr/local/lib/afl"
  if (!access(AFL_PATH "/as", X_OK)) {
    as_path = AFL_PATH;
    return;
  }
  FATAL("Unable to find AFL wrapper binary for 'as'. Please set AFL_PATH");
}

Makefile中，使用gcc -D指定环境变量

PREFIX     ?= /usr/local
BIN_PATH    = $(PREFIX)/bin
HELPER_PATH = $(PREFIX)/lib/afl
DOC_PATH    = $(PREFIX)/share/doc/afl
MISC_PATH   = $(PREFIX)/share/afl

# PROGS intentionally omit afl-as, which gets installed elsewhere.

PROGS       = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS    = afl-plot afl-cmin afl-whatsup

CFLAGS     ?= -O3 -funroll-loops
CFLAGS     += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
	      -DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \
	      -DBIN_PATH=\"$(BIN_PATH)\"

edit_param

编译器参数设置，忽略掉 __APPLE__

/* Copy argv to cc_params, making the necessary edits. */

static void edit_params(u32 argc, char** argv) {

  u8 fortify_set = 0, asan_set = 0;
  u8 *name;

#if defined(__FreeBSD__) && defined(__x86_64__)
  u8 m32_set = 0;
#endif

  cc_params = ck_alloc((argc + 128) * sizeof(u8*));

  name = strrchr(argv[0], '/');
  if (!name) name = argv[0]; else name++;

  // 首先将设置指定的编译器
  // 如果是afl-clang，设置编译器为 clang
  // alf-clang++同理
  // more: afl-clang-fast在 llvm mode 处理
  if (!strncmp(name, "afl-clang", 9)) {

    clang_mode = 1;

    setenv(CLANG_ENV_VAR, "1", 1);

    if (!strcmp(name, "afl-clang++")) {
      u8* alt_cxx = getenv("AFL_CXX");
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++";
    } else {
      u8* alt_cc = getenv("AFL_CC");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"clang";
    }

  } else {    // else 就是 afl-gcc

    /* With GCJ and Eclipse installed, you can actually compile Java! The
       instrumentation will work (amazingly). Alas, unhandled exceptions do
       not call abort(), so afl-fuzz would need to be modified to equate
       non-zero exit codes with crash conditions when working with Java
       binaries. Meh. */

#ifdef __APPLE__

    if (!strcmp(name, "afl-g++")) cc_params[0] = getenv("AFL_CXX");
    else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ");
    else cc_params[0] = getenv("AFL_CC");

    if (!cc_params[0]) {

      SAYF("\n" cLRD "[-] " cRST
           "On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n"
           "    'afl-clang' utility instead of 'afl-gcc'. If you really have GCC installed,\n"
           "    set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n");

      FATAL("AFL_CC or AFL_CXX required on MacOS X");

    }

#else

    if (!strcmp(name, "afl-g++")) {
      u8* alt_cxx = getenv("AFL_CXX");
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++";
    } else if (!strcmp(name, "afl-gcj")) {
      u8* alt_cc = getenv("AFL_GCJ");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj";
    } else {
      u8* alt_cc = getenv("AFL_CC");
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc";
    }

#endif /* __APPLE__ */

  }

  // 第二个参数，设置一些编译器参数
  while (--argc) {
    u8* cur = *(++argv);

    if (!strncmp(cur, "-B", 2)) {

      if (!be_quiet) WARNF("-B is already set, overriding");

      if (!cur[2] && argc > 1) { argc--; argv++; }
      continue;

    }

    if (!strcmp(cur, "-integrated-as")) continue;

    if (!strcmp(cur, "-pipe")) continue;

#if defined(__FreeBSD__) && defined(__x86_64__)
    if (!strcmp(cur, "-m32")) m32_set = 1;
#endif

    if (!strcmp(cur, "-fsanitize=address") ||
        !strcmp(cur, "-fsanitize=memory")) asan_set = 1;

    if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;

    cc_params[cc_par_cnt++] = cur;

  }

  // -B ，指定汇编器的目录，会被覆盖为 as_path
  cc_params[cc_par_cnt++] = "-B";
  cc_params[cc_par_cnt++] = as_path;

  // clang mode 的一些flag
  if (clang_mode)
    cc_params[cc_par_cnt++] = "-no-integrated-as";

  // AFL_HARDEN 开启栈保护，更容易发现栈溢出问题
  // 开启stack canary
  // 检测固定的函数
  if (getenv("AFL_HARDEN")) {

    cc_params[cc_par_cnt++] = "-fstack-protector-all";

    if (!fortify_set)
      cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2";

  }

  // 检测内存问题
  // ASan 是用来检测 释放后使用(use-after-free)、多次释放(double-free)
  // 缓冲区溢出(buffer overflows)和下溢(underflows) 的内存问题。
  if (asan_set) {

    /* Pass this on to afl-as to adjust map density. */

    setenv("AFL_USE_ASAN", "1", 1);

  } else if (getenv("AFL_USE_ASAN")) {

    if (getenv("AFL_USE_MSAN"))
      FATAL("ASAN and MSAN are mutually exclusive");

    if (getenv("AFL_HARDEN"))
      FATAL("ASAN and AFL_HARDEN are mutually exclusive");

    cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt++] = "-fsanitize=address";

  } else if (getenv("AFL_USE_MSAN")) {

    if (getenv("AFL_USE_ASAN"))
      FATAL("ASAN and MSAN are mutually exclusive");

    if (getenv("AFL_HARDEN"))
      FATAL("MSAN and AFL_HARDEN are mutually exclusive");

    cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt++] = "-fsanitize=memory";


  }

  // AFL_DONT_OPTIMIZE：编译器优化开关
  if (!getenv("AFL_DONT_OPTIMIZE")) {

#if defined(__FreeBSD__) && defined(__x86_64__)

    /* On 64-bit FreeBSD systems, clang -g -m32 is broken, but -m32 itself
       works OK. This has nothing to do with us, but let's avoid triggering
       that bug. */

    if (!clang_mode || !m32_set)
      cc_params[cc_par_cnt++] = "-g";

#else

      cc_params[cc_par_cnt++] = "-g";

#endif

    cc_params[cc_par_cnt++] = "-O3";
    cc_params[cc_par_cnt++] = "-funroll-loops";

    /* Two indicators that you're building for fuzzing; one of them is
       AFL-specific, the other is shared with libfuzzer. */

    cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
    cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1";

  }

  // 不使用某些内置函数
  // 比如编写某些操作系统时就不使用内置函数
  // -fno-builtin
  if (getenv("AFL_NO_BUILTIN")) {

    cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-memcmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strstr";
    cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr";

  }

  cc_params[cc_par_cnt] = NULL;

}

设置完参数后，gcc开始编译。一个c文件到可执行文件分为 预处理，编译，汇编，链接阶段

afl-as 静态插桩

-B 的含义： Add <directory> to the compiler’s search paths，也就是在此目录下寻找汇编器

C语言编译

$ gcc tmp.c -S tmp.S -O0 -fno-asynchronous-unwind-tables
cc1: warning: tmp.S is shorter than expected
# 0 "tmp.S"
# 0 "<built-in>"
# 0 "<command-line>"
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 0 "<command-line>" 2
# 1 "tmp.S"
 .file "tmp.c"
 .text
 .section .rodata
.LC0:
 .string "hello"
 .text
 .globl main
 .type main, @function
main:
 pushq %rbp
 movq %rsp, %rbp
 leaq .LC0(%rip), %rax
 movq %rax, %rdi
 movl $0, %eax
 call printf@PLT
 movl $0, %eax
 popq %rbp
 ret
 .size main, .-main
 .ident "GCC: (Debian 13.2.0-13) 13.2.0"
 .section .note.GNU-stack,"",@progbits

在汇编过程插桩，得到不一样的汇编代码

afl-clang和afl-gcc存在一定的区别，但是原理类似

llvm-mode

main

afl-as的入口，几乎和afl-gcc差不多

int main(int argc, char** argv) {

  s32 pid;
  u32 rand_seed;
  int status;
  u8* inst_ratio_str = getenv("AFL_INST_RATIO");

  struct timeval tv;
  struct timezone tz;

  clang_mode = !!getenv(CLANG_ENV_VAR);

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
 
  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("\n"
         "This is a helper application for afl-fuzz. It is a wrapper around GNU 'as',\n"
         "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
         "don't want to run this program directly.\n\n"

         "Rarely, when dealing with extremely complex projects, it may be advisable to\n"
         "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
         "instrumenting every discovered branch.\n\n");

    exit(1);

  }

  // 根据时间生成一个随机种子
  gettimeofday(&tv, &tz);

  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();

  srandom(rand_seed);

  // 修改参数
  edit_params(argc, argv);

  if (inst_ratio_str) {

    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) 
      FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

  }

  if (getenv(AS_LOOP_ENV_VAR))
    FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");

  setenv(AS_LOOP_ENV_VAR, "1", 1);

  /* When compiling with ASAN, we don't have a particularly elegant way to skip
     ASAN-specific branches. But we can probabilistically compensate for
     that... */
  // 检查内存问题的参数
  // 在进行ASAN的编译时，AFL无法识别出ASAN特定的分支，导致插入很多无意义的桩代码，所以直接暴力地将插桩概率/3；
  if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
    sanitizer = 1;
    inst_ratio /= 3;
  }

  // 插桩核心代码
  if (!just_version) add_instrumentation();

  // 在子进程进行执行 afl-as
  if (!(pid = fork())) {

    execvp(as_params[0], (char**)as_params);
    FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);

  }

  if (pid < 0) PFATAL("fork() failed");

  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  // 可以设置这个参数选择是否删除掉.S文件
  if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file);

  exit(WEXITSTATUS(status));
}

edit param

还是afl-as参数的设置，这里已经被 afl-gcc 设置过一次

static void edit_params(int argc, char** argv) {

  // 仍然是环境变量
  u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
  u32 i;

#ifdef __APPLE__

  u8 use_clang_as = 0;

  /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
     with the code generated by newer versions of clang that are hand-built
     by the user. See the thread here: http://goo.gl/HBWDtn.

     To work around this, when using clang and running without AFL_AS
     specified, we will actually call 'clang -c' instead of 'as -q' to
     compile the assembly file.

     The tools aren't cmdline-compatible, but at least for now, we can
     seemingly get away with this by making only very minor tweaks. Thanks
     to Nico Weber for the idea. */

  if (clang_mode && !afl_as) {

    use_clang_as = 1;

    afl_as = getenv("AFL_CC");
    if (!afl_as) afl_as = getenv("AFL_CXX");
    if (!afl_as) afl_as = "clang";

  }

#endif /* __APPLE__ */

  /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
     is not set. We need to check these non-standard variables to properly
     handle the pass_thru logic later on. */

  // 获得一个 tmp 目录，生成中间文件
  if (!tmp_dir) tmp_dir = getenv("TEMP");
  if (!tmp_dir) tmp_dir = getenv("TMP");
  if (!tmp_dir) tmp_dir = "/tmp";

  as_params = ck_alloc((argc + 32) * sizeof(u8*));

  as_params[0] = afl_as ? afl_as : (u8*)"as";

  as_params[argc] = 0;

  // 设置编译器参数
  for (i = 1; i < argc - 1; i++) {
	// 64/32位
    if (!strcmp(argv[i], "--64")) use_64bit = 1;
    else if (!strcmp(argv[i], "--32")) use_64bit = 0;

#ifdef __APPLE__

    /* The Apple case is a bit different... */

    if (!strcmp(argv[i], "-arch") && i + 1 < argc) {

      if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1;
      else if (!strcmp(argv[i + 1], "i386"))
        FATAL("Sorry, 32-bit Apple platforms are not supported.");

    }

    /* Strip options that set the preference for a particular upstream
       assembler in Xcode. */

    if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
      continue;

#endif /* __APPLE__ */

    as_params[as_par_cnt++] = argv[i];

  }

#ifdef __APPLE__

  /* When calling clang as the upstream assembler, append -c -x assembler
     and hope for the best. */

  if (use_clang_as) {

    as_params[as_par_cnt++] = "-c";
    as_params[as_par_cnt++] = "-x";
    as_params[as_par_cnt++] = "assembler";

  }

#endif /* __APPLE__ */
  // 可能是文件名，需要处理
  input_file = argv[argc - 1];
  // -version
  if (input_file[0] == '-') {

    if (!strcmp(input_file + 1, "-version")) {
      just_version = 1;
      modified_file = input_file;
      goto wrap_things_up;
    }

    if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)");
      else input_file = NULL;

  } else {

    /* Check if this looks like a standard invocation as a part of an attempt
       to compile a program, rather than using gcc on an ad-hoc .s file in
       a format we may not understand. This works around an issue compiling
       NSS. */

    if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
        strncmp(input_file, "/var/tmp/", 9) &&
        strncmp(input_file, "/tmp/", 5)) pass_thru = 1;

  }

  // 在tmp目录下生成临时文件，会根据 AFL_KEEP_ASSEMBLY 选择删除
  modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(),
                               (u32)time(NULL));

wrap_things_up:
  // 最后一个参数设置为/tmp/.afl-xxx.S 
  as_params[as_par_cnt++] = modified_file;
  as_params[as_par_cnt]   = NULL;

}

add_instrumentation

插桩代码，如果想要比较详细的理解，需要使用afl-gcc编译一下，使用AFL_KEEP_ASSEMBLY将汇编代码保存，或者使用IDA这样的反汇编工具打开二进制文件

static void add_instrumentation(void) {

  static u8 line[MAX_LINE];

  FILE* inf;
  FILE* outf;
  s32 outfd;
  u32 ins_lines = 0;

  u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
      skip_intel = 0, skip_app = 0, instrument_next = 0;

#ifdef __APPLE__

  u8* colon_pos;

#endif /* __APPLE__ */
  
  if (input_file) {

    inf = fopen(input_file, "r");
    if (!inf) PFATAL("Unable to read '%s'", input_file);

  } else inf = stdin;

  // modified_file 是    /tmp/.afl-xxx.s 汇编文件
  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);

  if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file);

  outf = fdopen(outfd, "w");

  if (!outf) PFATAL("fdopen() failed");  

  while (fgets(line, MAX_LINE, inf)) {

    /* In some cases, we want to defer writing the instrumentation trampoline
       until after all the labels, macros, comments, etc. If we're in this
       mode, and if the line starts with a tab followed by a character, dump
       the trampoline now. */

    /*
        instr_ok: 是否位于指令段中, eg:  .text
        skip_csect: 是否要跳过code section
        skip_next_label: 是否跳过对下一个标签的插桩

        skip_intel: 是否为intel语法的汇编, afl-as不对intel汇编语法进行插桩, 会跳过
        skip_app: 是否遇到内联汇编, afl-as不对内联汇编插桩, 会跳过
        instrument_next: 表示已经遇到合适的标签, 需要在该标签的第一条指令前进行插桩.
    */

    if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
        instrument_next && line[0] == '\t' && isalpha(line[1])) {

      fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
              R(MAP_SIZE));

      instrument_next = 0;
      ins_lines++;

    }

    /* Output the actual line, call it a day in pass-thru mode. */

    fputs(line, outf);

    if (pass_thru) continue;

    /* All right, this is where the actual fun begins. For one, we only want to
       instrument the .text section. So, let's keep track of that in processed
       files - and let's set instr_ok accordingly. */

    if (line[0] == '\t' && line[1] == '.') {

      /* OpenBSD puts jump tables directly inline with the code, which is
         a bit annoying. They use a specific format of p2align directives
         around them, so we use that as a signal. */
      if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
          isdigit(line[10]) && line[11] == '\n') skip_next_label = 1;

      if (!strncmp(line + 2, "text\n", 5) ||
          !strncmp(line + 2, "section\t.text", 13) ||
          !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
          !strncmp(line + 2, "section __TEXT,__text", 21)) {
        instr_ok = 1;
        continue; 
      }
      if (!strncmp(line + 2, "section\t", 8) ||
          !strncmp(line + 2, "section ", 8) ||
          !strncmp(line + 2, "bss\n", 4) ||
          !strncmp(line + 2, "data\n", 5)) {
        instr_ok = 0;
        continue;
      }

    }

    /* Detect off-flavor assembly (rare, happens in gdb). When this is
       encountered, we set skip_csect until the opposite directive is
       seen, and we do not instrument. */

    // 代码段，x86还是x64
    if (strstr(line, ".code")) {

      if (strstr(line, ".code32")) skip_csect = use_64bit;
      if (strstr(line, ".code64")) skip_csect = !use_64bit;

    }

    /* Detect syntax changes, as could happen with hand-written assembly.
       Skip Intel blocks, resume instrumentation when back to AT&T. */

    // 两种汇编样式：intel和AT&T，跳过intel格式汇编
    if (strstr(line, ".intel_syntax")) skip_intel = 1;
    if (strstr(line, ".att_syntax")) skip_intel = 0;

    /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */

    // 不用管，因为代表着注释
    if (line[0] == '#' || line[1] == '#') {

      if (strstr(line, "#APP")) skip_app = 1;
      if (strstr(line, "#NO_APP")) skip_app = 0;

    }

    /* If we're in the right mood for instrumenting, check for function
       names or conditional labels. This is a bit messy, but in essence,
       we want to catch:

         ^main:      - function entry point (always instrumented)
         ^.L0:       - GCC branch label
         ^.LBB0_0:   - clang branch label (but only in clang mode)
         ^\tjnz foo  - conditional branches

       ...but not:

         ^# BB#0:    - clang comments
         ^ # BB#0:   - ditto
         ^.Ltmp0:    - clang non-branch labels
         ^.LC0       - GCC non-branch labels
         ^.LBB0_0:   - ditto (when in GCC mode)
         ^\tjmp foo  - non-conditional jumps

       Additionally, clang and GCC on MacOS X follow a different convention
       with no leading dots on labels, hence the weird maze of #ifdefs
       later on.

     */

    if (skip_intel || skip_app || skip_csect || !instr_ok ||
        line[0] == '#' || line[0] == ' ') continue;

    /* Conditional branch instruction (jnz, etc). We append the instrumentation
       right after the branch (to instrument the not-taken path) and at the
       branch destination label (handled later on). */

    // 插桩：基本块
    // jump类指令
    if (line[0] == '\t') {

      if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) {

        fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
                R(MAP_SIZE));

        ins_lines++;

      }

      continue;

    }

    /* Label of some sort. This may be a branch destination, but we need to
       tread carefully and account for several different formatting
       conventions. */

// 暂时忽略对苹果的处理
#ifdef __APPLE__

    /* Apple: L<whatever><digit>: */

    if ((colon_pos = strstr(line, ":"))) {

      if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {

#else

    /* Everybody else: .L<whatever>: */

    if (strstr(line, ":")) {

      if (line[0] == '.') {

#endif /* __APPLE__ */

        /* .L0: or LBB0_0: style jump destination */

#ifdef __APPLE__

        /* Apple: L<num> / LBB<num> */

        if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
            && R(100) < inst_ratio) {

#else

        /* Apple: .L<num> / .LBB<num> */

        if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
            && R(100) < inst_ratio) {

#endif /* __APPLE__ */

          /* An optimization is possible here by adding the code only if the
             label is mentioned in the code in contexts other than call / jmp.
             That said, this complicates the code by requiring two-pass
             processing (messy with stdin), and results in a speed gain
             typically under 10%, because compilers are generally pretty good
             about not generating spurious intra-function jumps.

             We use deferred output chiefly to avoid disrupting
             .Lfunc_begin0-style exception handling calculations (a problem on
             MacOS X). */

          if (!skip_next_label) instrument_next = 1; else skip_next_label = 0;

        }

      } else {

        /* Function label (always instrumented, deferred mode). */

        instrument_next = 1;
    
      }

    }

  }

  if (ins_lines)
    fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

  if (input_file) fclose(inf);
  fclose(outf);

  if (!be_quiet) {

    if (!ins_lines) WARNF("No instrumentation targets found%s.",
                          pass_thru ? " (pass-thru mode)" : "");
    else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
             ins_lines, use_64bit ? "64" : "32",
             getenv("AFL_HARDEN") ? "hardened" : 
             (sanitizer ? "ASAN/MSAN" : "non-hardened"),
             inst_ratio);
 
  }

}

主要存在两种插桩：在每一个基本块入口，afl-as 插入了一段代码。除此之外，在整个程序的末尾，插入了一段 300 多行的 AFL main payload

基本块：在ida graph可以看到分块，这里的判断就是函数执行的开头以及每一个 j 指令但是第二个不是 m
jump 指令，也就是编译器的每个 label 开头插入一些

检查函数名或者条件跳转标签, 判断是否正确插桩. 我们希望捕获下列情况, (^表示行开头)
    1. ^main        - 函数入口点
    2. ^.L0         - GCC跳转标签
    3. ^.LBB0_0     - clang跳转标签
    4. ^\tjnz foo   - 条件跳转标签
    ...
但不希望捕获下列标签
    1. ^# BB#0      - clang注释
    2. ^ # BB#0     - clang注释
    3. ^.Ltmp0      - clang非分支标签
    4. ^.LC0        - GCC非分支标签
    5. ^.LBB0_0     - GCC非分支标签
    6. ^\tjmp foo   - 非条件跳转

trampoline

基本块入口，跳板

将 rsp 下降一段距离
将 rdx, rcx, rax 的值存放到栈上
将 rcx 设为一个立即数（由 afl-as 随机生成）
调用 __afl_maybe_log
恢复 rdx, rcx, rax 和 rsp

fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE));
// fprtinf
// 其中R(MAP_SIZE))是ecx/rcx要设置的值 %08x。
// MAP_SIZE定义为64K，R(x)定义为(random() % (x)) ，故R(MAP_SIZE))为0~64K的一个随机数。

static const u8* trampoline_fmt_64 =
  "\n"
  "/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
  "\n"
  ".align 4\n"
  "\n"
  "leaq -(128+24)(%%rsp), %%rsp\n"
  "movq %%rdx,  0(%%rsp)\n"
  "movq %%rcx,  8(%%rsp)\n"
  "movq %%rax, 16(%%rsp)\n"
  "movq $0x%08x, %%rcx\n"   
  "call __afl_maybe_log\n"
  "movq 16(%%rsp), %%rax\n"
  "movq  8(%%rsp), %%rcx\n"
  "movq  0(%%rsp), %%rdx\n"
  "leaq (128+24)(%%rsp), %%rsp\n"
  "\n"
  "/* --- END --- */\n"
  "\n";

main_payload

程序末尾插入main_payload

比较长，推荐在IDA等工具里看一下CFG

__afl_maybe_log

afl_maybe_log

lahf：load ah with flags，将eflags寄存器的值加载到ah
seto %al记录此时OF(溢出标志)的状态，当标志寄存器中的此标志位置位时，将AL寄存器置位
从注释可以看出来：__afl_maybe_log 先检查共享内存区域是否已经映射。
- 如果还未映射，则跳转到 __afl_setup 进行初始化；
- 否则继续执行 __afl_store 逻辑，rdx 寄存器指向共享内存区块。

首先会判断__afl_area_ptr 是否初始化，否则就会进入afl_setup 假设已经初始化

__afl_maybe_log:

  lahf
  seto  %al

  /* Check if SHM region is already mapped. */

  movq  __afl_area_ptr(%rip), %rdx
  testq %rdx, %rdx
  je    __afl_setup

__afl_store:

  /* Calculate and store hit for the code location specified in rcx. */
  xorq __afl_prev_loc(%rip), %rcx
  xorq %rcx, __afl_prev_loc(%rip)
  shrq $1, __afl_prev_loc(%rip)

  incb (%rdx, %rcx, 1)

__afl_return:

  addb $127, %al
  sahf
  ret

__afl_maybe_log 先检查共享内存区域是否已经映射。如果还未映射，则跳转到 __afl_setup 进行初始化；否则继续执行 __afl_store 逻辑，rdx 寄存器指向共享内存区块。

__afl_store 执行过程为：

将目前存储着 cur_location 的 rcx 寄存器异或上 prev_loc
将 prev_loc 设为 cur_loc （这里利用了异或运算的自反性）
将 prev_loc 右移一位
增加 hit count

非常巧妙的代码覆盖率测量：Coverage measurements (1)

1
2
3

cur_location = <COMPILE_TIME_RANDOM>;
shared_mem[cur_location ^ prev_location]++; 
prev_location = cur_location >> 1;

这些过程执行完后，恢复 eflags 并返回。现在，我们弄清了插入到基本块起始处的桩代码的逻辑。

__afl_setup

没有创建共享内存，会跳转到这里创建一块共享内存

__afl_setup:

  /* Do not retry setup if we had previous failures. */

  cmpb $0, __afl_setup_failure(%rip)
  jne __afl_return

  /* Check out if we have a global pointer on file. */

  movq  __afl_global_area_ptr@GOTPCREL(%rip), %rdx
  movq  (%rdx), %rdx
  /* 判断是否是0 也就是没有创建*/
  testq %rdx, %rdx
  je    __afl_setup_first

  movq %rdx, __afl_area_ptr(%rip)
  jmp  __afl_store

/* 它首先把一些 caller-save 寄存器保存在栈上（这是为了方便后续在 fork server 中恢复原有寄存器状态）。*/
__afl_setup_first:

  /* Save everything that is not yet saved and that may be touched by
     getenv() and several other libcalls we'll be relying on. */

  /* 首先保存所有的寄存器 */
  leaq -352(%rsp), %rsp

  movq %rax,   0(%rsp)
  movq %rcx,   8(%rsp)
  movq %rdi,  16(%rsp)
  movq %rsi,  32(%rsp)
  movq %r8,   40(%rsp)
  movq %r9,   48(%rsp)
  movq %r10,  56(%rsp)
  movq %r11,  64(%rsp)

  movq %xmm0,  96(%rsp)
  movq %xmm1,  112(%rsp)
  movq %xmm2,  128(%rsp)
  movq %xmm3,  144(%rsp)
  movq %xmm4,  160(%rsp)
  movq %xmm5,  176(%rsp)
  movq %xmm6,  192(%rsp)
  movq %xmm7,  208(%rsp)
  movq %xmm8,  224(%rsp)
  movq %xmm9,  240(%rsp)
  movq %xmm10, 256(%rsp)
  movq %xmm11, 272(%rsp)
  movq %xmm12, 288(%rsp)
  movq %xmm13, 304(%rsp)
  movq %xmm14, 320(%rsp)
  movq %xmm15, 336(%rsp)

  /* Map SHM, jumping to __afl_setup_abort if something goes wrong. */

  /* The 64-bit ABI requires 16-byte stack alignment. We'll keep the
     original stack ptr in the callee-saved r12. */

  pushq %r12
  movq  %rsp, %r12
  subq  $16, %rsp
  andq  $0xfffffffffffffff0, %rsp

  leaq .AFL_SHM_ENV(%rip), %rdi
call getenv@PLT

  testq %rax, %rax
  je    __afl_setup_abort

  movq  %rax, %rdi
call atoi@PLT

  xorq %rdx, %rdx   /* shmat flags    */
  xorq %rsi, %rsi   /* requested addr */
  movq %rax, %rdi   /* SHM ID: 这个ID是由环境变量指定的         */
call shmat@PLT

  cmpq $-1, %rax
  je   __afl_setup_abort

  /* Store the address of the SHM region. */

  movq %rax, %rdx
  movq %rax, __afl_area_ptr(%rip)

  movq __afl_global_area_ptr@GOTPCREL(%rip), %rdx
  movq %rax, (%rdx)
  movq %rax, %rdx

调用 getenv(“__AFL_SHM_ID”)，如果返回 0 则进入 __afl_setup_abort 错误处理流程，否则往下继续执行。我们先去看错误处理流程

就是把 __afl_setup_failure 变量自增，还原所有寄存器并返回。
如果环境变量 __AFL_SHM_ID 不存在，则共享内存的初始化会失败
但整个桩代码对于目标程序是透明的——无非是保存了一些寄存器、执行了一些无副作用的代码、最后恢复寄存器——因此目标程序可以正常执行。
这片虚拟内存是什么时候创建的？答案是 afl-fuzz 在 setup_shm() 流程中调用 shmget() 创建了虚拟内存，并将 shm id 写入 __AFL_SHM_ID 环境变量。

__afl_setup_abort:

  /* Record setup failure so that we don't keep calling
     shmget() / shmat() over and over again. */

  incb __afl_setup_failure(%rip)

  movq %r12, %rsp
  popq %r12

  movq  0(%rsp), %rax
  movq  8(%rsp), %rcx
  movq 16(%rsp), %rdi
  movq 32(%rsp), %rsi
  movq 40(%rsp), %r8
  movq 48(%rsp), %r9
  movq 56(%rsp), %r10
  movq 64(%rsp), %r11

  movq  96(%rsp), %xmm0
  movq 112(%rsp), %xmm1
  movq 128(%rsp), %xmm2
  movq 144(%rsp), %xmm3
  movq 160(%rsp), %xmm4
  movq 176(%rsp), %xmm5
  movq 192(%rsp), %xmm6
  movq 208(%rsp), %xmm7
  movq 224(%rsp), %xmm8
  movq 240(%rsp), %xmm9
  movq 256(%rsp), %xmm10
  movq 272(%rsp), %xmm11
  movq 288(%rsp), %xmm12
  movq 304(%rsp), %xmm13
  movq 320(%rsp), %xmm14
  movq 336(%rsp), %xmm15

  leaq 352(%rsp), %rsp

  jmp __afl_return

如果存在环境变量，就会调用 shmat(shmid, 0, 0),将共享内存区域的地址存进 __afl_area_ptr，并写进 GOT 表中的 __afl_global_area_ptr 条目。

__afl_area_ptr 是一个 lcomm，而 __afl_global_area_ptr 是一个 comm 。
lcomm 有点类似于全局 static 变量，不同文件中的重名 lcomm 是指向不同的地址；但 comm 有点类似于全局变量，不同文件中的重名 comm 指向同一个地址
假如目标程序是多份代码链接形成的，那么，每份汇编文件都拥有 AFL main payload；对于编译出的每个代码片段，都会有自己对应的 __afl_area_ptr 。
但 __afl_global_area_ptr 只有一份，只需要优先参考 __afl_global_area_ptr ，就能让所有桩代码访问的 shm 保持一致。

.AFL_VARS:

  .lcomm   __afl_area_ptr, 8
  .lcomm   __afl_prev_loc, 8
  .lcomm   __afl_fork_pid, 4
  .lcomm   __afl_temp, 4
  .lcomm   __afl_setup_failure, 1
  .comm    __afl_global_area_ptr, 8, 8

.AFL_SHM_ENV:
  .asciz "__AFL_SHM_ID"

最后，用 rdx 寄存器存放共享内存地址，进入 fork server。

fork server

execve() 的效率比较低。fuzzer 需要高频率地执行目标程序，显然不宜在 execve() 上浪费过多时间。

AFL 的解决方案是使用 fork server：

让程序在第一个基本块处停下，等待 fuzzer 发送指令；收到指令后继续执行程序；

执行完毕后，恢复 fork 时的状态。AFL高效的实现这一需求。

把 shm 地址连续压栈两次（以保证 esp 仍然是 16 的倍数），然后调用 write(FORKSRV_FD+1, __afl_temp, 4)。
- config.h 中的 FORKSRV_FD 常量，fork server 使用 198、199 这两个 fd 传递指令。
- __afl_temp 是 bss 段的长度 4 字节的变量。
- 使用pipe在进程间通信，这个pipe在afl-fuzz时进行创建
如果这四个字节写入失败（write() 的返回不等于 4），则直接进入 __afl_fork_resume 逻辑；否则，进入 __afl_fork_wait_loop 逻辑。

afl-fuzz初始化pipe

1
2
3

// st_pipe: status pipe
if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");

进入forkserver

__afl_forkserver:

  /* Enter the fork server mode to avoid the overhead of execve() calls. We
     push rdx (area ptr) twice to keep stack alignment neat. */

  /* %rdx 内保存 shm 地址 */
  pushq %rdx
  pushq %rdx

  /* Phone home and tell the parent that we're OK. (Note that signals with
     no SA_RESTART will mess it up). If this fails, assume that the fd is
     closed because we were execve()d from an instrumented binary, or because
     the parent doesn't want to use the fork server. */

  // 往st_pipe里写入数据，表示forkserver启动成功，通知fuzzer
  movq $4, %rdx               /* length    */
  leaq __afl_temp(%rip), %rsi /* data      */
  movq $" STRINGIFY((FORKSRV_FD + 1)) ", %rdi       /* file desc */
call write@PLT

  cmpq $4, %rax
  jne  __afl_fork_resume

__afl_fork_wait_loop:

写入成功，会进入 __afl_fork_wait_loop

read读取ctl_pipe，如果没成功进入__afl_die
成功了，调用fork

__afl_fork_wait_loop:

  /* Wait for parent by reading from the pipe. Abort if read fails. */

  // read ctl_pipe 数据，从fuzzer读取
  movq $4, %rdx               /* length    */
  leaq __afl_temp(%rip), %rsi /* data      */
  movq $" STRINGIFY(FORKSRV_FD) ", %rdi             /* file desc */
call read@PLT
  cmpq $4, %rax
  jne  __afl_die
call fork@PLT
  cmpq $0, %rax
  jl   __afl_die
  je   __afl_fork_resume

对于父进程，则继续执行以下逻辑：

  /* In parent process: write PID to pipe, then wait for child. */

  movl %eax, __afl_fork_pid(%rip)

  // 写入st_pipe，通知fuzzer进程 pid 是多少
  movq $4, %rdx                   /* length    */
  leaq __afl_fork_pid(%rip), %rsi /* data      */
  movq $(198 + 1), %rdi             /* file desc */
call write@PLT
 // waitpid等待子进程结束
  movq $0, %rdx                   /* no flags  */
  leaq __afl_temp(%rip), %rsi     /* status    */
  movq __afl_fork_pid(%rip), %rdi /* PID       */
call waitpid@PLT
  cmpq $0, %rax
  jle  __afl_die

  /* Relay wait status to pipe, then loop back. */
  // st_pipe，告诉fuzzer子进程结束
  movq $4, %rdx               /* length    */
  leaq __afl_temp(%rip), %rsi /* data      */
  movq $(198 + 1), %rdi         /* file desc */
call write@PLT

  jmp  __afl_fork_wait_loop

对于子进程，跳转到 __afl_fork_resume

关闭描述符，恢复寄存器
调整sp指针，让程序继续执行

__afl_fork_resume:

  /* In child process: close fds, resume execution. */

  movq $198, %rdi
call close@PLT

  movq $(198 + 1), %rdi
call close@PLT

  popq %rdx
  popq %rdx

  movq %r12, %rsp
  popq %r12

  movq  0(%rsp), %rax
  movq  8(%rsp), %rcx
  movq 16(%rsp), %rdi
  movq 32(%rsp), %rsi
  movq 40(%rsp), %r8
  movq 48(%rsp), %r9
  movq 56(%rsp), %r10
  movq 64(%rsp), %r11

  movq  96(%rsp), %xmm0
  movq 112(%rsp), %xmm1
  movq 128(%rsp), %xmm2
  movq 144(%rsp), %xmm3
  movq 160(%rsp), %xmm4
  movq 176(%rsp), %xmm5
  movq 192(%rsp), %xmm6
  movq 208(%rsp), %xmm7
  movq 224(%rsp), %xmm8
  movq 240(%rsp), %xmm9
  movq 256(%rsp), %xmm10
  movq 272(%rsp), %xmm11
  movq 288(%rsp), %xmm12
  movq 304(%rsp), %xmm13
  movq 320(%rsp), %xmm14
  movq 336(%rsp), %xmm15

  leaq 352(%rsp), %rsp

  jmp  __afl_store

__afl_store 在增加 hit count 之后，跳转回 AFL 往基本块头部插入的桩代码——到此为止子进程恢复所有状态，开始了程序中第一个基本块的执行。

AFL 通过 fork server，得以避免频繁的 execve 调用，从而提升了 fuzz 效率。

代码覆盖率检测

AFL用了一块64KB的共享内存来存放tuple的信息，而且是采用byte来记录tuple的信息，之所以采用byte不是bit是因为还要记录命中数。使用这块有限的共享内存存在碰撞，会导致边覆盖率不准确，这是AFL的一个缺点。

桩函数（__afl_store）所完成的任务可以概括为

1
2
3

cur_location = <COMPILE_TIME_RANDOM>;        //不同桩独有的随机数, 对应插桩时的R(MAP_SIZE)
shared_mem[cur_location ^ prev_location]++;  //prev_location类似全局变量, 表示上一个分支. shared_mem是共享内存
prev_location = cur_location >> 1;           //更新prev_location

cur_location是一个随机数, 这样cur_location ^ prev_location也是随机的, 可以均匀的分步在shared_mem中, 防止不同桩产生冲突

shared_mem[] 数组是一个调用者 (caller) 传给被插桩的二进制程序的64kb的共享空间。其中的每一字节表示元组(branch_src, branch_dst)的命中次数.

选择这个数组大小的原因是让冲突(collisions)尽可能减少。这样通常能处理2k到10k的分支点。同时，它的大小也足以达到毫秒级的分析。

而设置prev_location = cur_location >> 1则可以让元组具有方向性, 区分A->B与B->A两种情况.

若prev_location 没有 >> 1
    则对于A->B有:
        prev_location = A;    //进入A时设置的
        cur_location = B;
        shared_mem[A^B]++;
    对于B->A有:
        prev_location = B;    //进入B时设置的
        cur_location = A;
        shared_mem[B^A]++;

参考

AFL 白皮书翻译与读书笔记