-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
102 lines (82 loc) · 2.98 KB
/
Makefile
File metadata and controls
102 lines (82 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
TESTS_TARGETS = test_gevm test_gemm
MODELS = gpt2 llama
CXX = g++
CXXFLAGS = -std=c++17 -I./include/ -Wall -Wextra -Wshadow -Wformat -Wnoexcept -Wcast-qual -Wunused -Wdouble-promotion \
-Wlogical-op -Wcast-align -fno-exceptions -fno-rtti -pthread
LDFLAGS = -lm
UNAME_M = $(shell uname -m)
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64))
# Use all available CPU extensions, x86 only
CXXFLAGS += -march=native -mtune=native
endif
# At some point I should introduce "levels", for example logging each time a tensor is created could be enabled
# only at the highest debug level.
# Note that -fsanitize=address can't be used when compiling a shared object
ifdef YAMI_DEBUG
CXXFLAGS += -DYAMI_DEBUG -O0 -g
else
CXXFLAGS += -DYAMI_FAST -Ofast -ffp-contract=fast -funroll-loops -flto=auto -fuse-linker-plugin
endif
ifdef YAMI_TRACE
CXXFLAGS += -DYAMI_TRACE
endif
ifeq ($(MAKECMDGOALS),pyyami)
CXXFLAGS += -fPIC
endif
# Link OpenBLAS when compiling the tests
ifeq ($(MAKECMDGOALS), $(filter $(MAKECMDGOALS), $(TESTS_TARGETS)))
LDFLAGS += -lopenblas
endif
UNAME_S = $(shell uname -s)
$(info YAMI build info: )
$(info OS: $(UNAME_S))
$(info ARCH: $(UNAME_M))
$(info CXXFLAGS: $(CXXFLAGS))
$(info LDFLAGS: $(LDFLAGS))
$(info CXX: $(shell $(CXX) --version | head -n 1))
$(info )
.PHONY: clean pyyami $(MODELS) $(TESTS_TARGETS)
clean:
rm -rf *.o *.so *.old $(MODELS) $(TESTS_TARGETS)
pyyami: src/yami.cpp include/yami.h yami_blas.o
$(CXX) $(CXXFLAGS) -shared $< -o yami.so yami_blas.o
test_gevm: tests/test_gevm.cpp yami_blas.o
$(CXX) $(CXXFLAGS) $< -o $@ yami_blas.o $(LDFLAGS)
OMP_NUM_THREADS=1 ./test_gevm
# perf record -e cycles,cache-misses,cache-references --call-graph dwarf ./tests/test_gemm
# hotspot
test_gemm: tests/test_gemm.cpp yami_blas.o
$(CXX) $(CXXFLAGS) $< -o $@ yami_blas.o $(LDFLAGS)
OMP_NUM_THREADS=1 ./test_gemm
# perf record -e cycles,cache-misses,cache-references --call-graph dwarf ./tests/test_gemm
# hotspot
#test: $(TESTS_TARGETS)
# @fail=0; \
# total_tests=0; \
# for t in $(TESTS_TARGETS); do \
# echo "======================================"; \
# echo "Running $$t"; \
# echo "======================================"; \
# total_tests=$$((total_tests + 1)); \
# ./$$t; \
# if [ $$? -ne 0 ]; then \
# echo "Test $$t failed!"; \
# fail=$$((fail + 1)); \
# fi; \
# echo "======================================"; \
# done; \
# if [ $${fail} -gt 0 ]; then \
# echo "Failed $$fail/$$total_tests tests!"; \
# else \
# echo "All tests passed!"; \
# fi;
yami.o: src/yami.cpp include/yami.h
$(CXX) $(CXXFLAGS) -c $< -o $@
yami_utils.o: src/yami_utils.cpp include/yami_utils.h
$(CXX) $(CXXFLAGS) -c $< -o $@
yami_blas.o: src/yami_blas.cpp include/yami_blas.h
$(CXX) $(CXXFLAGS) -c $< -o $@
gpt2: models/gpt2.cpp yami.o yami_utils.o yami_blas.o
$(CXX) $(CXXFLAGS) $< -o $@ yami.o yami_utils.o yami_blas.o $(LDFLAGS)
llama: models/llama.cpp yami.o yami_utils.o yami_blas.o
$(CXX) $(CXXFLAGS) $< -o $@ yami.o yami_utils.o yami_blas.o $(LDFLAGS)