llama2.c を解析してみよう

本当はllama3を解析しようかなと思ったんだけど、C言語なllama2.cを解析してみたいと思う。

関数の構成はこんな感じですね。

run.c  906 <0>main()
(null)         |-<1>if()
run.c  891     |-<1>error_usage()
(null)         |     |-<2>fprintf()
(null)         |     +-<2>exit()
(null)         |-<1>for()
(null)         |-<1>strlen()
(null)         |-<1>atof()
(null)         |-<1>atoi()
(null)         |-<1>time()
run.c  164     |-<1>build_transformer()
run.c  142     |     |-<2>read_checkpoint()
(null)         |     |     |-<3>fopen()
(null)         |     |     |-<3>if()
(null)         |     |     |-<3>fprintf()
(null)         |     |     |-<3>exit()
(null)         |     |     |-<3>fread()
(null)         |     |     |-<3>sizeof()
(null)         |     |     |-<3>abs()
(null)         |     |     |-<3>fseek()
(null)         |     |     |-<3>ftell()
(null)         |     |     |-<3>fclose()
(null)         |     |     |-<3>open()
(null)         |     |     |-<3>mmap()
(null)         |     |     |-<3>ssizeof()
run.c  111     |     |     +-<3>memory_map_weights()
run.c   77     |     +-<2>malloc_run_state()
(null)         |           |-<3>calloc()
(null)         |           |-<3>sizeof()
(null)         |           |-<3>if()
(null)         |           |-<3>fprintf()
(null)         |           +-<3>exit()
run.c  385     |-<1>build_tokenizer()
(null)         |     |-<2>malloc()
(null)         |     |-<2>sizeof()
(null)         |     |-<2>for()
(null)         |     |-<2>fopen()
(null)         |     |-<2>if()
(null)         |     |-<2>fprintf()
(null)         |     |-<2>exit()
(null)         |     |-<2>fread()
(null)         |     +-<2>fclose()
run.c  667     |-<1>build_sampler()
(null)         |     |-<2>malloc()
(null)         |     +-<2>sizeof()
(null)         |-<1>strcmp()
run.c  729     |-<1>generate()
(null)         |     |-<2>if()
(null)         |     |-<2>malloc()
(null)         |     |-<2>strlen()
(null)         |     |-<2>sizeof()
run.c  452     |     |-<2>encode()
(null)         |     |     |-<3>if()
(null)         |     |     |-<3>fprintf()
(null)         |     |     |-<3>exit()
(null)         |     |     |-<3>malloc()
(null)         |     |     |-<3>sizeof()
(null)         |     |     |-<3>for()
(null)         |     |     |-<3>qsort()
run.c  445     |     |     |-<3>str_lookup()
(null)         |     |     |     |-<4>bsearch()
(null)         |     |     |     +-<4>sizeof()
(null)         |     |     |-<3>while()
(null)         |     |     |-<3>sprintf()
(null)         |     |     +-<3>free()
(null)         |     |-<2>fprintf()
(null)         |     |-<2>exit()
(null)         |     |-<2>while()
run.c  231     |     |-<2>forward()
(null)         |     |     |-<3>memcpy()
(null)         |     |     |-<3>sizeof()
(null)         |     |     |-<3>for()
run.c  182     |     |     |-<3>rmsnorm()
(null)         |     |     |     |-<4>for()
(null)         |     |     |     +-<4>sqrtf()
run.c  217     |     |     |-<3>matmul()
(null)         |     |     |     +-<4>for()
(null)         |     |     |-<3>powf()
(null)         |     |     |-<3>head_dim()
(null)         |     |     |-<3>cosf()
(null)         |     |     |-<3>sinf()
(null)         |     |     |-<3>sqrtf()
run.c  197     |     |     |-<3>softmax()
(null)         |     |     |     |-<4>for()
(null)         |     |     |     |-<4>if()
(null)         |     |     |     +-<4>expf()
(null)         |     |     |-<3>memset()
(null)         |     |     |-<3>f()
(null)         |     |     +-<3>expf()
run.c  691     |     |-<2>sample()
(null)         |     |     |-<3>if()
run.c  590     |     |     |-<3>sample_argmax()
(null)         |     |     |     |-<4>for()
(null)         |     |     |     +-<4>if()
(null)         |     |     |-<3>for()
run.c  197     |     |     |-<3>softmax() ・・・
run.c  687     |     |     |-<3>random_f32()
(null)         |     |     |     |-<4>return()
run.c  680     |     |     |     +-<4>random_u32()
(null)         |     |     |           +-<5>return()
run.c  603     |     |     |-<3>sample_mult()
(null)         |     |     |     |-<4>for()
(null)         |     |     |     +-<4>if()
run.c  624     |     |     +-<3>sample_topp()
(null)         |     |           |-<4>for()
(null)         |     |           |-<4>if()
(null)         |     |           |-<4>qsort()
(null)         |     |           +-<4>sizeof()
run.c  418     |     |-<2>decode()
(null)         |     |     |-<3>if()
(null)         |     |     +-<3>sscanf()
run.c  431     |     |-<2>safe_printf()
(null)         |     |     |-<3>if()
(null)         |     |     |-<3>isprint()
(null)         |     |     |-<3>isspace()
(null)         |     |     +-<3>printf()
(null)         |     |-<2>fflush()
run.c  719     |     |-<2>time_in_ms()
(null)         |     |     +-<3>clock_gettime()
(null)         |     |-<2>printf()
(null)         |     +-<2>free()
run.c  802     |-<1>chat()
(null)         |     |-<2>malloc()
(null)         |     |-<2>sizeof()
(null)         |     |-<2>while()
(null)         |     |-<2>if()
run.c  785     |     |-<2>read_stdin()
(null)         |     |     |-<3>printf()
(null)         |     |     |-<3>if()
(null)         |     |     |-<3>fgets()
(null)         |     |     +-<3>strlen()
(null)         |     |-<2>strcpy()
(null)         |     |-<2>sprintf()
run.c  452     |     |-<2>encode() ・・・
(null)         |     |-<2>printf()
run.c  231     |     |-<2>forward() ・・・
run.c  691     |     |-<2>sample() ・・・
run.c  418     |     |-<2>decode() ・・・
run.c  431     |     |-<2>safe_printf() ・・・
(null)         |     |-<2>fflush()
(null)         |     +-<2>free()
(null)         |-<1>fprintf()
run.c  676     |-<1>free_sampler()
(null)         |     +-<2>free()
run.c  411     |-<1>free_tokenizer()
(null)         |     |-<2>for()
(null)         |     +-<2>free()
run.c  171     +-<1>free_transformer()
(null)               |-<2>if()
(null)               |-<2>munmap()
(null)               |-<2>close()
run.c   98           +-<2>free_run_state()
(null)                     +-<3>free()
run.c  381 <0>compare_tokens()
(null)         +-<1>strcmp()
run.c  616 <0>compare()
(null)         +-<1>if()