@@ -32,6 +32,12 @@ default: run-native
3232run_llm.c : ./build/test_gpt2 dawnlib
3333 $(LIBSPEC ) && $<
3434
35+ run_llm.c_with_metal_profiler : ./build/test_gpt2_with_metal_profiler dawnlib
36+ $(LIBSPEC ) && export METAL_CAPTURE_ENABLED=1 && $<
37+
38+ run_llm.c_with_time_profiler : ./build/test_gpt2_with_metal_profiler dawnlib
39+ $(LIBSPEC ) && xcrun xctrace record --template ' Time Profiler' --launch -- $<
40+
3541run_llm.c_train : ./build/train_gpt2 dawnlib
3642 if [ ! -d dev ] ; then ln -s $( GPUCPP) /third_party/llm.c/dev ; fi
3743 if [ ! -f gpt2_tokenizer.bin ] ; then ln -s $( GPUCPP) /third_party/llm.c/gpt2_tokenizer.bin ; fi
@@ -48,8 +54,9 @@ gpt2_124M.bin: llm.c
4854 ln -s ./llm.c/gpt2_tokenizer.bin ; \
4955 fi
5056
51- build/test_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
52- mkdir -p build
57+ define preprocess_file
58+ sed -i -e 's/int main(/int MAIN(/g' llm.c/test_gpt2.c
59+ sed -i -e 's/int main(/int MAIN(/g' llm.c/train_gpt2.c
5360 sed -i -e 's/void encoder_forward(/void ENCODER_FORWARD_CPU(/g' llm.c/train_gpt2.c
5461 sed -i -e 's/void layernorm_forward(/void LAYERNORM_FORWARD_CPU(/g' llm.c/train_gpt2.c
5562 sed -i -e 's/void matmul_forward(/void MATMUL_FORWARD_CPU(/g' llm.c/train_gpt2.c
@@ -67,26 +74,22 @@ build/test_gpt2: llm.c build/unittest_kernels.o gpt2_124M.bin
6774 sed -i -e 's/void crossentropy_softmax_backward(/void CROSSENTROPY_SOFTMAX_BACKWARD_CPU(/g' llm.c/train_gpt2.c
6875 grep -q "^# include \"unittest_kernels.h\"" llm.c/train_gpt2.c || \
6976 printf '1i\n#include "unittest_kernels.h"\n.\nw\nq\n' | ed -s llm.c/train_gpt2.c
77+ endef
78+
79+ build/test_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
80+ mkdir -p build
81+ $(call preprocess_file)
7082 $(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/test_gpt2.c build/unittest_kernels.o
7183
84+ build/test_gpt2_with_metal_profiler : llm.c build/unittest_kernels.o gpt2_124M.bin
85+ mkdir -p build
86+ $(call preprocess_file)
87+ $(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/test_gpt2.c build/unittest_kernels.o -I$(GPUCPP ) $(GPUCPP ) /experimental/profiler/metal.mm -framework metal -framework Foundation -DMETAL_PROFILER -g
88+ install_name_tool -change @rpath/libdawn.dylib $(GPUCPP ) /third_party/lib/libdawn.dylib $@
89+
7290build/train_gpt2 : llm.c build/unittest_kernels.o gpt2_124M.bin
7391 mkdir -p build
74- sed -i -e ' s/void encoder_forward(/void ENCODER_FORWARD_CPU(/g' llm.c/train_gpt2.c
75- sed -i -e ' s/void layernorm_forward(/void LAYERNORM_FORWARD_CPU(/g' llm.c/train_gpt2.c
76- sed -i -e ' s/void matmul_forward(/void MATMUL_FORWARD_CPU(/g' llm.c/train_gpt2.c
77- sed -i -e ' s/void attention_forward(/void ATTENTION_FORWARD_CPU(/g' llm.c/train_gpt2.c
78- sed -i -e ' s/void gelu_forward(/void GELU_FORWARD_CPU(/g' llm.c/train_gpt2.c
79- sed -i -e ' s/void residual_forward(/void RESIDUAL_FORWARD_CPU(/g' llm.c/train_gpt2.c
80- sed -i -e ' s/void softmax_forward(/void SOFTMAX_FORWARD_CPU(/g' llm.c/train_gpt2.c
81- sed -i -e ' s/void crossentropy_forward(/void CROSSENTROPY_FORWARD_CPU(/g' llm.c/train_gpt2.c
82- sed -i -e ' s/void encoder_backward(/void ENCODER_BACKWARD_CPU(/g' llm.c/train_gpt2.c
83- sed -i -e ' s/void layernorm_backward(/void LAYERNORM_BACKWARD_CPU(/g' llm.c/train_gpt2.c
84- sed -i -e ' s/void matmul_backward(/void MATMUL_BACKWARD_CPU(/g' llm.c/train_gpt2.c
85- sed -i -e ' s/void attention_backward(/void ATTENTION_BACKWARD_CPU(/g' llm.c/train_gpt2.c
86- sed -i -e ' s/void gelu_backward(/void GELU_BACKWARD_CPU(/g' llm.c/train_gpt2.c
87- sed -i -e ' s/void residual_backward(/void RESIDUAL_BACKWARD_CPU(/g' llm.c/train_gpt2.c
88- sed -i -e ' s/void crossentropy_softmax_backward(/void CROSSENTROPY_SOFTMAX_BACKWARD_CPU(/g' llm.c/train_gpt2.c
89- grep -q " ^#include \" unittest_kernels.h\" " llm.c/train_gpt2.c || sed -i ' 1i#include \"unittest_kernels.h\"' llm.c/train_gpt2.c
92+ $(call preprocess_file)
9093 $(CC ) $(CFLAGS ) $(LDFLAGS ) -o $@ llm.c/train_gpt2.c build/unittest_kernels.o
9194
9295build/ops.o : ops.cpp ops.hpp kernels.h llm.c
0 commit comments