-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathconcurrent-inferences.f90
More file actions
188 lines (155 loc) · 7.48 KB
/
concurrent-inferences.f90
File metadata and controls
188 lines (155 loc) · 7.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
! Copyright (c) 2023-2025, The Regents of the University of California
! Terms of use are as specified in LICENSE.txt
program concurrent_inferences
!! This program demonstrates how to read a neural network from a JSON file
!! and use the network to perform concurrent inferences.
use fiats_m, only : neural_network_t, tensor_t, double_precision, double_precision_file_t
use julienne_m, only : string_t, command_line_t, file_t
use iso_fortran_env, only : int64, real64
use omp_lib
implicit none
type(string_t) network_file_name
type(command_line_t) command_line
type(neural_network_t) neural_network
type(tensor_t), allocatable :: inputs(:,:,:), outputs(:,:,:)
integer, parameter :: lat=263, lev=15, lon=317 ! latitudes, levels (elevations), longitudes
integer i, num_trials
network_file_name = string_t(command_line%flag_value("--network"))
if (len(network_file_name%string())==0) then
error stop new_line('') // new_line('') &
// 'Usage:' // new_line('') &
// ' fpm run \' // new_line('') &
// ' --example concurrent-inferences \' // new_line('') &
// ' --compiler flang-new \' // new_line('') &
// ' --flag -O3 \' // new_line('') &
// ' -- --network "<file-name>" \' // new_line('') &
// ' [--do-concurrent] [--openmp] [--elemental] [--double-precision] [--trials <integer>]' // new_line('') &
// 'where <> indicates user input and [] indicates an optional argument.'
end if
inputs = random_inputs()
allocate(outputs, mold=inputs)
associate( &
run_do_concurrent => command_line%argument_present(["--do-concurrent" ]), &
run_openmp => command_line%argument_present(["--openmp" ]), &
run_elemental => command_line%argument_present(["--elemental" ]), &
run_double_precision => command_line%argument_present(["--double-precision"]) &
)
num_trials = trials()
block
real(real64) t_dc(num_trials), t_omp(num_trials), t_elem(num_trials), t_dp_dc(num_trials)
associate(run_all => merge(.false., .true., any([run_do_concurrent,run_openmp,run_elemental,run_double_precision])))
do i = 1, num_trials
if (run_all .or. run_do_concurrent ) t_dc(i) = do_concurrent_time()
if (run_all .or. run_openmp ) t_omp(i) = openmp_time()
if (run_all .or. run_elemental ) t_elem(i) = elemental_time()
if (run_all .or. run_double_precision) t_dp_dc(i) = double_precision_do_concurrent_time()
end do
print *,"variable mean stdev"
if (run_all .or. run_do_concurrent ) call print_stats("t_dc ", t_dc)
if (run_all .or. run_openmp ) call print_stats("t_omp ", t_omp)
if (run_all .or. run_elemental ) call print_stats("t_elem ", t_elem)
if (run_all .or. run_double_precision) call print_stats("t_dp_dc ", t_dp_dc)
end associate
end block
end associate
contains
subroutine print_stats(label, x)
character(len=*), intent(in) :: label
real(real64), intent(in) :: x(:)
associate(n => size(x))
associate(mean => sum(x)/real(n))
associate(stdev => sum((x-mean)**2)/real(n))
print '(1x,a,2(en10.2,:,", "))', label, mean, stdev
end associate
end associate
end associate
end subroutine
integer function trials()
associate(trials_string => command_line%flag_value("--trials"))
if (len(trials_string)==0) then
trials = 1
else
read(trials_string,*) trials
end if
end associate
end function
function random_inputs()
real, allocatable :: input_components(:,:,:,:)
type(tensor_t), allocatable :: random_inputs(:,:,:)
integer i, k, j
print *, "Constructing a new neural_network_t object from the file " // network_file_name%string()
neural_network = neural_network_t(file_t(network_file_name))
print *,"Defining an array of tensor_t input objects with random normalized components"
allocate(random_inputs(lat,lev,lon))
allocate(input_components(lat,lev,lon,neural_network%num_inputs()))
call random_number(input_components)
do concurrent(i=1:lat, k=1:lev, j=1:lon)
random_inputs(i,k,j) = tensor_t(input_components(i,k,j,:))
end do
end function
real(real64) function do_concurrent_time()
integer(int64) t_start, t_finish, clock_rate
integer i, k, j
print *,"Performing",lat*lev*lon," inferences inside `do concurrent`."
call system_clock(t_start, clock_rate)
do concurrent(i=1:lat, k=1:lev, j=1:lon)
outputs(i,k,j) = neural_network%infer(inputs(i,k,j))
end do
call system_clock(t_finish)
do_concurrent_time = real(t_finish - t_start, real64)/real(clock_rate, real64)
print '(1x,a,en10.2)',"Elapsed system clock during `do concurrent` inference: ", do_concurrent_time
end function
real(real64) function openmp_time()
integer(int64) t_start, t_finish, clock_rate
integer i, k, j
print *,"Performing",lat*lev*lon," inferences inside `omp parallel do`."
call system_clock(t_start, clock_rate)
!$omp parallel do default(none) shared(neural_network,inputs,outputs) collapse(3)
do j=1,lon
do k=1,lev
do i=1,lat
outputs(i,k,j) = neural_network%infer(inputs(i,k,j))
end do
end do
end do
call system_clock(t_finish)
openmp_time = real(t_finish - t_start, real64)/real(clock_rate, real64)
print '(1x,a,en10.2)',"Elapsed system clock during `OpenMP` inference: ", openmp_time
end function
real(real64) function elemental_time()
integer(int64) t_start, t_finish, clock_rate
print *,"Performing elemental inferences inside `omp workshare`"
call system_clock(t_start, clock_rate)
!$omp workshare
outputs = neural_network%infer(inputs)
!$omp end workshare
call system_clock(t_finish)
elemental_time = real(t_finish - t_start, real64)/real(clock_rate, real64)
print '(1x,a,en10.2)',"Elapsed system clock during `elemental` inference: ", elemental_time
end function
real(real64) function double_precision_do_concurrent_time()
integer(int64) t_start, t_finish, clock_rate
integer i, k, j
type(neural_network_t(double_precision)) neural_network
type(tensor_t(double_precision)), allocatable :: inputs(:,:,:), outputs(:,:,:)
double precision, allocatable :: input_components(:,:,:,:)
print *, "Constructing a new neural_network_t object from the file " // network_file_name%string()
neural_network = neural_network_t(double_precision_file_t(network_file_name))
print *,"Defining an array of tensor_t input objects with random normalized components"
allocate(outputs(lat,lev,lon))
allocate( inputs(lat,lev,lon))
allocate(input_components(lat,lev,lon,neural_network%num_inputs()))
call random_number(input_components)
do concurrent(i=1:lat, k=1:lev, j=1:lon)
inputs(i,k,j) = tensor_t(input_components(i,k,j,:))
end do
print *,"Performing double-precision inference inside `do concurrent`"
call system_clock(t_start, clock_rate)
do concurrent(i=1:lat, k=1:lev, j=1:lon)
outputs(i,k,j) = neural_network%infer(inputs(i,k,j))
end do
call system_clock(t_finish)
double_precision_do_concurrent_time = real(t_finish - t_start, real64)/real(clock_rate, real64)
print '(1x,a,en10.2)',"Elapsed system clock during double precision concurrent inference: ", double_precision_do_concurrent_time
end function
end program