-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinit.sh
More file actions
92 lines (84 loc) · 2.52 KB
/
init.sh
File metadata and controls
92 lines (84 loc) · 2.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
shopt -s expand_aliases
source ~/.bash_profile
set -ex
dir=$(dirname $(realpath $0))
export BASE_DIR=$(dirname ${dir})/deploy
source ${dir}/../deploy/env.sh
export schema='{
"autoId": false,
"enabledDynamicField": false,
"fields": [
{
"fieldName": "id",
"dataType": "Int64",
"isPrimary": true
},
{
"fieldName": "embedding",
"dataType": "FloatVector",
"elementTypeParams": {
"dim": "8"
}
},
{
"fieldName": "name",
"dataType": "VarChar",
"elementTypeParams": {
"max_length": 512
}
}
]
}'
export indexParams='[
{
"fieldName": "embedding",
"metricType": "COSINE",
"indexName": "embedding",
"indexType": "AUTOINDEX"
},
{
"fieldName": "id",
"indexName": "id",
"indexType": "AUTOINDEX"
}
]'
export CLUSTER_ENDPOINT="http://${NODE_IP}:${MILVUS_PORT}"
export TOKEN="root:Milvus"
curl --request POST \
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create" \
--header "Authorization: Bearer ${TOKEN}" \
--header "Content-Type: application/json" \
-d "{
\"collectionName\": \"item_embedding\",
\"schema\": $schema,
\"indexParams\": $indexParams
}"
envsubst < ${dir}/init.sql > ${dir}/init.sql.tmp
beeline -u "jdbc:hive2://${NODE_IP}:${SQLREC_THRIFT_PORT}/default;auth=noSasl" -f ${dir}/init.sql.tmp
python3 -m venv ${dir}/.venv
source ${dir}/.venv/bin/activate
pip install -r ${dir}/requirements.txt
python ${dir}/mock_data.py
PARQUET_FILE="${dir}/behavior_sample.parquet"
HDFS_WAREHOUSE_DIR="/user/hive/warehouse"
TABLE_NAME="behavior_sample"
PARTITION_DATE="dt=2024-01-01"
if [ -f "$PARQUET_FILE" ]; then
echo "Uploading parquet file to HDFS..."
hdfs dfs -mkdir -p ${HDFS_WAREHOUSE_DIR}/${TABLE_NAME}/${PARTITION_DATE}
hdfs dfs -put -f ${PARQUET_FILE} ${HDFS_WAREHOUSE_DIR}/${TABLE_NAME}/${PARTITION_DATE}/
echo "Parquet file uploaded successfully"
else
echo "Warning: Parquet file not found: ${PARQUET_FILE}"
exit 1
fi
hive -f ${dir}/init_hive.sql
# Check if wrk is installed, install it if not (Debian system)
if ! which wrk > /dev/null 2>&1; then
echo "wrk not found, installing..."
sudo apt-get update
sudo apt-get install -y wrk
else
echo "wrk is already installed"
fi