-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscript.py
More file actions
57 lines (47 loc) · 1.61 KB
/
script.py
File metadata and controls
57 lines (47 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sys
import pandas as pd
import matplotlib.pyplot as plt
def read_data(file_path):
# Read the data from the file and convert it to a list of floats
with open(file_path, 'r') as file:
data = file.read().strip().split(',')
data = [float(num) for num in data if num.strip()]
# Process data in chunks of 2 to compute the average
averages = []
for i in range(0, len(data), 2):
chunk = data[i:i+2]
if chunk: # Check if chunk is not empty
average = sum(chunk) / len(chunk)
averages.append(average)
return averages
def plot_data(data1, data2):
# Create a DataFrame from the data
df = pd.DataFrame({
'Desql Average': data1,
'Vanilla Spark Average': data2
})
# Plotting the data
plt.figure(figsize=(10, 5))
plt.plot(df['Desql Average'], label='Desql', marker='o')
plt.plot(df['Vanilla Spark Average'], label='Vanilla Spark', marker='x')
# Adding title and labels
plt.title('Comparison of Desql Overhead with Vanilla Spark')
plt.xlabel('Query #')
plt.ylabel('Average Time (s)')
plt.legend()
# Show the plot
plt.grid(True)
plt.show()
if __name__ == "__main__":
# Check if enough arguments have been passed
if len(sys.argv) < 3:
print("Usage: python script.py <datafile1> <datafile2>")
sys.exit(1)
# Paths to the data files from command line arguments
data1_path = sys.argv[1]
data2_path = sys.argv[2]
# Read the data
data1 = read_data(data1_path)
data2 = read_data(data2_path)
# Plot the data
plot_data(data1, data2)