-
-
Notifications
You must be signed in to change notification settings - Fork 49
Expand file tree
/
Copy pathgpu.sh
More file actions
executable file
·230 lines (190 loc) · 6.42 KB
/
gpu.sh
File metadata and controls
executable file
·230 lines (190 loc) · 6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#!/usr/bin/env bash
#
# GPU Information - Interactive Demo Script
# Run: ./gpu.sh [section]
# Note: Requires NVIDIA GPU and nvidia-smi
#
set -euo pipefail
readonly NC='\033[0m'
readonly BOLD='\033[1m'
readonly GREEN='\033[0;32m'
readonly CYAN='\033[0;36m'
readonly YELLOW='\033[0;33m'
readonly RED='\033[0;31m'
section() { echo -e "\n${BOLD}${GREEN}=== $1 ===${NC}\n"; }
code() { echo -e "${CYAN}$ $1${NC}"; }
output() { echo -e "${YELLOW}$1${NC}"; }
warn() { echo -e "${RED}$1${NC}"; }
check_cmd() { command -v "$1" &>/dev/null; }
# -----------------------------------------------------------------------------
demo_basic() {
section "Basic GPU Information"
if check_cmd lspci; then
code 'lspci | grep -i vga'
lspci | grep -i vga || echo "(no VGA device)"
echo
code 'lspci | grep -i nvidia'
lspci | grep -i nvidia || echo "(no NVIDIA device)"
fi
if check_cmd nvidia-smi; then
echo
code 'nvidia-smi -L'
nvidia-smi -L
else
warn "nvidia-smi not found - NVIDIA driver not installed"
fi
}
# -----------------------------------------------------------------------------
demo_status() {
section "GPU Status"
if ! check_cmd nvidia-smi; then
warn "nvidia-smi not found"
return
fi
code 'nvidia-smi'
nvidia-smi
}
# -----------------------------------------------------------------------------
demo_query() {
section "Query Format Examples"
if ! check_cmd nvidia-smi; then
warn "nvidia-smi not found"
echo
echo "Example query commands:"
code 'nvidia-smi --query-gpu=name,memory.total,memory.used --format=csv'
code 'nvidia-smi --query-gpu=utilization.gpu,temperature.gpu --format=csv'
return
fi
code 'nvidia-smi --query-gpu=name,driver_version --format=csv'
nvidia-smi --query-gpu=name,driver_version --format=csv
echo
code 'nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv'
nvidia-smi --query-gpu=memory.total,memory.used,memory.free --format=csv
echo
code 'nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv'
nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv
echo
code 'nvidia-smi --query-gpu=temperature.gpu,power.draw,power.limit --format=csv'
nvidia-smi --query-gpu=temperature.gpu,power.draw,power.limit --format=csv
echo
code 'nvidia-smi --query-gpu=clocks.gr,clocks.mem --format=csv'
nvidia-smi --query-gpu=clocks.gr,clocks.mem --format=csv
}
# -----------------------------------------------------------------------------
demo_processes() {
section "GPU Processes"
if ! check_cmd nvidia-smi; then
warn "nvidia-smi not found"
return
fi
code 'nvidia-smi --query-compute-apps=pid,process_name,used_memory --format=csv'
nvidia-smi --query-compute-apps=pid,process_name,used_memory --format=csv 2>/dev/null || echo "No compute processes running"
}
# -----------------------------------------------------------------------------
demo_monitoring() {
section "Monitoring Commands"
if ! check_cmd nvidia-smi; then
warn "nvidia-smi not found"
echo
echo "Example monitoring commands:"
code 'nvidia-smi -l 1 # Update every second'
code 'nvidia-smi dmon -s u # Utilization monitoring'
code 'nvidia-smi pmon -s m # Process memory monitoring'
return
fi
echo "Monitoring commands (Ctrl+C to stop):"
code 'nvidia-smi -l 1 # Update every second'
code 'nvidia-smi dmon -s u # Device utilization'
code 'nvidia-smi dmon -s p # Power monitoring'
code 'nvidia-smi dmon -s t # Temperature monitoring'
code 'nvidia-smi pmon -s u # Process utilization'
echo
code 'watch -n 1 nvidia-smi --query-gpu=utilization.gpu,memory.used,temperature.gpu --format=csv'
echo
echo "Running dmon for 3 seconds..."
timeout 3 nvidia-smi dmon -s u 2>/dev/null || true
}
# -----------------------------------------------------------------------------
demo_topology() {
section "Multi-GPU Topology"
if ! check_cmd nvidia-smi; then
warn "nvidia-smi not found"
return
fi
code 'nvidia-smi topo -m'
nvidia-smi topo -m 2>/dev/null || echo "(topology not available)"
}
# -----------------------------------------------------------------------------
demo_cuda() {
section "CUDA Environment"
if check_cmd nvcc; then
code 'nvcc --version'
nvcc --version
else
echo "nvcc not found - CUDA toolkit not installed"
fi
echo
echo "CUDA environment variables:"
code 'export CUDA_VISIBLE_DEVICES=0 # Use only GPU 0'
code 'export CUDA_VISIBLE_DEVICES=0,1 # Use GPUs 0 and 1'
code 'export CUDA_VISIBLE_DEVICES="" # Hide all GPUs'
if [[ -n "${CUDA_VISIBLE_DEVICES:-}" ]]; then
echo
output "Current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
fi
}
# -----------------------------------------------------------------------------
demo_management() {
section "Management Commands (require root)"
echo "These commands typically require root privileges:"
echo
code 'nvidia-smi -pm 1 # Enable persistence mode'
code 'nvidia-smi -pl 250 # Set power limit to 250W'
code 'nvidia-smi -lgc 1200,1800 # Lock graphics clock range'
code 'nvidia-smi -rgc # Reset graphics clocks'
code 'nvidia-smi -c 0 # Set compute mode (default)'
code 'nvidia-smi -r # Reset GPU'
}
# -----------------------------------------------------------------------------
run_all() {
demo_basic
demo_status
demo_query
demo_processes
demo_monitoring
demo_topology
demo_cuda
demo_management
}
# -----------------------------------------------------------------------------
usage() {
cat <<EOF
GPU Information - Interactive Demo
Usage: $0 [section]
Sections:
basic Basic GPU information
status nvidia-smi status
query Query format examples
processes GPU processes
monitor Monitoring commands
topology Multi-GPU topology
cuda CUDA environment
manage Management commands
all Run all demos (default)
Note: Requires NVIDIA GPU and nvidia-smi for most features.
EOF
}
# Main
case "${1:-all}" in
basic) demo_basic ;;
status) demo_status ;;
query) demo_query ;;
processes) demo_processes ;;
monitor) demo_monitoring ;;
topology) demo_topology ;;
cuda) demo_cuda ;;
manage) demo_management ;;
all) run_all ;;
-h|--help) usage ;;
*) echo "Unknown section: $1"; usage; exit 1 ;;
esac