forked from dkaschek/dMod
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfigure
More file actions
executable file
·166 lines (142 loc) · 7.45 KB
/
configure
File metadata and controls
executable file
·166 lines (142 loc) · 7.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/bin/sh
# Path to R
R_BIN="${R_HOME:-$(R RHOME)}/bin/R"
# Compiler and flags from R
CC="$($R_BIN CMD config CC)"
CFLAGS="$($R_BIN CMD config CFLAGS)"
CPPFLAGS="$($R_BIN CMD config CPPFLAGS)"
BLAS_LIBS="$($R_BIN CMD config BLAS_LIBS)"
LAPACK_LIBS="$($R_BIN CMD config LAPACK_LIBS)"
FLIBS="$($R_BIN CMD config FLIBS)"
PKG_CPPFLAGS=""
PKG_LIBS="${LAPACK_LIBS} ${BLAS_LIBS} ${FLIBS}"
# Optional MKL flags if an MKL module is loaded (HPC)
MKL_FLAGS=""
if [ -n "$MKLROOT" ]; then
MKL_FLAGS="-I${MKLROOT}/include -L${MKLROOT}/lib/intel64 -lmkl_rt -lpthread -lm -ldl"
fi
echo "* Checking for batched BLAS (cblas_dgemm_batch + cblas_dgemm_batch_strided) ..."
cat > conftest.c <<'EOF'
#include <stdio.h>
/* Test both cblas_dgemm_batch (pointer API) and cblas_dgemm_batch_strided */
extern void cblas_dgemm_batch(
const int,
const int*, const int*,
const int*, const int*, const int*,
const double*,
const double* const*, const int*,
const double* const*, const int*,
const double*,
double* const*, const int*,
const int, const int*);
extern void cblas_dgemm_batch_strided(
const int,
const int, const int,
const int, const int, const int,
const double,
const double*, const int, const int,
const double*, const int, const int,
const double,
double*, const int, const int,
const int);
int main(void) {
/* --- Test cblas_dgemm_batch (pointer API) --- */
/* 2 batches of 1x1 * 1x1, B broadcasted */
double a1 = 2.0, a2 = 3.0;
double b1 = 4.0;
double c1 = 0.0, c2 = 0.0;
const double *a_ptrs[] = { &a1, &a2 };
const double *b_ptrs[] = { &b1, &b1 }; /* broadcast */
double *c_ptrs[] = { &c1, &c2 };
int ta = 111, tb = 111; /* CblasNoTrans */
int m = 1, n = 1, k = 1, lda = 1, ldb = 1, ldc = 1;
double alpha = 1.0, beta = 0.0;
int group_count = 1, group_size = 2;
cblas_dgemm_batch(102, /* CblasColMajor */
&ta, &tb,
&m, &n, &k, &alpha,
a_ptrs, &lda,
b_ptrs, &ldb,
&beta,
c_ptrs, &ldc,
group_count, &group_size);
/* Verify: 2*4=8, 3*4=12 */
if (c1 != 8.0 || c2 != 12.0) return 1;
/* --- Test cblas_dgemm_batch_strided --- */
double A[] = { 2.0, 3.0, 5.0 };
double B[] = { 4.0, 7.0, 2.0 };
double C[] = { 0.0, 0.0, 0.0 };
cblas_dgemm_batch_strided(102, 111, 111,
1, 1, 1,
1.0,
A, 1, 1,
B, 1, 1,
0.0,
C, 1, 1,
3);
/* Verify: 2*4=8, 3*7=21, 5*2=10 */
if (C[0] != 8.0 || C[1] != 21.0 || C[2] != 10.0) return 1;
return 0;
}
EOF
have_batch=no
# Try R's BLAS first, then add MKL flags if available
for libs in "$PKG_LIBS" "$MKL_FLAGS $PKG_LIBS"; do
[ -z "$libs" ] && continue
if ${CC} ${CFLAGS} -o conftest conftest.c ${CPPFLAGS} ${libs} >/dev/null 2>&1; then
# Symbol found - verify it actually works at runtime
if ./conftest >/dev/null 2>&1; then
have_batch=yes
PKG_LIBS="${libs}"
break
else
echo " ⚠ batched BLAS found but not functional at runtime - skipping"
fi
fi
done
rm -f conftest conftest.c conftest.o
if [ "${have_batch}" = "yes" ]; then
PKG_CPPFLAGS="${PKG_CPPFLAGS} -DHAVE_BATCH_GEMM"
echo " ✓ cblas_dgemm_batch and cblas_dgemm_batch_strided found and verified"
else
echo " ✗ batched BLAS not available - using fallback implementation"
echo ""
echo " ┌────────────────────────────────────────────────────────────────────────────────────────────┐"
echo " │ For optimized batched matrix multiplication, a BLAS with cblas_dgemm_batch and │"
echo " │ cblas_dgemm_batch_strided is needed. │"
echo " │ │"
echo " │ Recommended: Intel oneAPI Math Kernel Library (MKL) │"
echo " │ All MKL versions include both batched BLAS functions. │"
echo " │ │"
echo " │ Ubuntu/Debian: sudo apt install intel-mkl │"
echo " │ Fedora/RHEL: sudo dnf install intel-oneapi-mkl-devel │"
echo " │ macOS: conda install mkl mkl-devel (in conda env) │"
echo " │ │"
echo " │ Alternative: OpenBLAS (>= 0.3.28) │"
echo " │ NOTE: Batched BLAS was added in OpenBLAS 0.3.28 and may be unstable in early │"
echo " │ versions. Some Linux distributions ship OpenBLAS without it or with a │"
echo " │ non-functional implementation. │"
echo " │ │"
echo " │ Make sure your chosen BLAS is set as R's backend. On Linux, ALL FOUR │"
echo " │ alternatives must be switched consistently: │"
echo " │ │"
echo " │ sudo update-alternatives --config libblas.so.3-x86_64-linux-gnu │"
echo " │ sudo update-alternatives --config libblas.so-x86_64-linux-gnu │"
echo " │ sudo update-alternatives --config liblapack.so.3-x86_64-linux-gnu │"
echo " │ sudo update-alternatives --config liblapack.so-x86_64-linux-gnu │"
echo " │ │"
echo " │ macOS: R links against the BLAS found at install time; reinstall R or set │"
echo " │ DYLD_LIBRARY_PATH to point to your preferred BLAS │"
echo " │ │"
echo " │ Reinstall this package to pick up the optimized backend. │"
echo " └────────────────────────────────────────────────────────────────────────────────────────────┘"
echo ""
fi
# Write src/Makevars
echo "* Generating src/Makevars ..."
sed -e "s|@PKG_CPPFLAGS@|${PKG_CPPFLAGS}|" \
-e "s|@PKG_LIBS@|${PKG_LIBS}|" \
src/Makevars.in > src/Makevars
echo " PKG_CPPFLAGS: ${PKG_CPPFLAGS}"
echo " PKG_LIBS: ${PKG_LIBS}"
echo "* Configuration complete."