cetinca commited on
Commit
dade0d0
1 Parent(s): 1aebac7

Update scaling files

Browse files
test_api.py → api_scaling.py RENAMED
File without changes
test_api.sh → api_scaling.sh RENAMED
File without changes
plot_calls.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from datetime import datetime
2
 
3
  import matplotlib.pyplot as plt
@@ -14,14 +15,44 @@ log_files = [
14
  for log_file in log_files:
15
  path_ = f"./data/{log_file}"
16
  df = pd.read_csv(filepath_or_buffer=path_, sep=";")
17
- df["elapsed"] = df["finished"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")) - df["started"].apply(
18
- lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f"))
19
- df["elapsed"] = df["elapsed"].apply(lambda x: x.total_seconds())
 
 
 
20
  df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
21
- df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
22
 
23
  student_numbers = sorted(df['active_students'].unique())
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  result = df.groupby(['active_students', 'success']) \
26
  .agg({
27
  'elapsed': ['mean', 'median', 'min', 'max'],
@@ -38,6 +69,7 @@ for log_file in log_files:
38
  title = "API result for 'text2int' endpoint"
39
 
40
  for student_number in student_numbers:
 
41
  try:
42
  failed_calls = result.loc[(student_number, 0), 'success'][0]
43
  except:
@@ -48,25 +80,17 @@ for log_file in log_files:
48
 
49
  rows = len(student_numbers)
50
 
51
- # plt.figure(figsize=(16, 10))
52
- # for index, student_number in enumerate(student_numbers, 1):
53
- # data = df[df["active_students"] == student_number]
54
- # fig = plt.subplot(rows, 2, 2 * index - 1)
55
- # plt.title("y=seconds, x=active students", x=0.75, y=0.75)
56
- # plt.boxplot(x=data["elapsed"], labels=[student_number])
57
- # plt.subplot(rows, 2, 2 * index)
58
- # plt.title("y=count of seconds, x=seconds", x=0.75, y=0.75)
59
- # plt.hist(x=data["elapsed"], bins=25, edgecolor='white')
60
-
61
  fig, axs = plt.subplots(rows, 2) # (rows, columns)
62
 
63
  for index, student_number in enumerate(student_numbers):
 
64
  data = df[df["active_students"] == student_number]
65
  axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
66
  # axs[index][0].set_title(f'Boxplot for {student_number} students')
67
  axs[index][0].set_xlabel(f'student number {student_number}')
68
  axs[index][0].set_ylabel('Elapsed time (s)')
69
 
 
70
  axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
71
  # axs[index][1].set_title(f'Histogram for {student_number} students')
72
  axs[index][1].set_xlabel('seconds')
@@ -74,4 +98,19 @@ for log_file in log_files:
74
 
75
  fig.suptitle(title, fontsize=16)
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  plt.show()
 
1
+ import math
2
  from datetime import datetime
3
 
4
  import matplotlib.pyplot as plt
 
15
  for log_file in log_files:
16
  path_ = f"./data/{log_file}"
17
  df = pd.read_csv(filepath_or_buffer=path_, sep=";")
18
+ df["finished_ts"] = df["finished"].apply(
19
+ lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
20
+ df["started_ts"] = df["started"].apply(
21
+ lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
22
+ df["elapsed"] = df["finished_ts"] - df["started_ts"]
23
+
24
  df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
 
25
 
26
  student_numbers = sorted(df['active_students'].unique())
27
 
28
+ bins_dict = dict() # bins size for each group
29
+ min_finished_dict = dict() # zero time for each group
30
+
31
+ for student_number in student_numbers:
32
+ # for each student group calculates bins size and zero time
33
+ min_finished = df["finished_ts"][df["active_students"] == student_number].min()
34
+ max_finished = df["finished_ts"][df["active_students"] == student_number].max()
35
+ bins = math.ceil(max_finished - min_finished)
36
+ bins_dict.update({student_number: bins})
37
+ min_finished_dict.update({student_number: min_finished})
38
+ print(f"student number: {student_number}")
39
+ print(f"min finished: {min_finished}")
40
+ print(f"max finished: {max_finished}")
41
+ print(f"bins finished seconds: {bins}, minutes: {bins / 60}")
42
+
43
+ df["time_line"] = None
44
+ for student_number in student_numbers:
45
+ # calculates time-line for each student group
46
+ df["time_line"] = df.apply(
47
+ lambda x: x["finished_ts"] - min_finished_dict[student_number]
48
+ if x["active_students"] == student_number
49
+ else x["time_line"],
50
+ axis=1
51
+ )
52
+
53
+ # creates a '.csv' from the dataframe
54
+ df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
55
+
56
  result = df.groupby(['active_students', 'success']) \
57
  .agg({
58
  'elapsed': ['mean', 'median', 'min', 'max'],
 
69
  title = "API result for 'text2int' endpoint"
70
 
71
  for student_number in student_numbers:
72
+ # Prints percentage of the successful and failed calls
73
  try:
74
  failed_calls = result.loc[(student_number, 0), 'success'][0]
75
  except:
 
80
 
81
  rows = len(student_numbers)
82
 
 
 
 
 
 
 
 
 
 
 
83
  fig, axs = plt.subplots(rows, 2) # (rows, columns)
84
 
85
  for index, student_number in enumerate(student_numbers):
86
+ # creates a boxplot for each test group
87
  data = df[df["active_students"] == student_number]
88
  axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
89
  # axs[index][0].set_title(f'Boxplot for {student_number} students')
90
  axs[index][0].set_xlabel(f'student number {student_number}')
91
  axs[index][0].set_ylabel('Elapsed time (s)')
92
 
93
+ # creates a histogram for each test group
94
  axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
95
  # axs[index][1].set_title(f'Histogram for {student_number} students')
96
  axs[index][1].set_xlabel('seconds')
 
98
 
99
  fig.suptitle(title, fontsize=16)
100
 
101
+ fig, axs = plt.subplots(rows, 1) # (rows, columns)
102
+
103
+ for index, student_number in enumerate(student_numbers):
104
+ # creates a histogram and shows API calls on a timeline for each test group
105
+ data = df[df["active_students"] == student_number]
106
+
107
+ print(data["time_line"].head(10))
108
+
109
+ axs[index].hist(x=data["time_line"], bins=bins_dict[student_number]) # axs[row][column]
110
+ # axs[index][1].set_title(f'Histogram for {student_number} students')
111
+ axs[index].set_xlabel('seconds')
112
+ axs[index].set_ylabel('Count of API calls')
113
+
114
+ fig.suptitle(title, fontsize=16)
115
+
116
  plt.show()
requirements.txt CHANGED
@@ -6,3 +6,4 @@ python-dotenv
6
  transformers
7
  torch
8
  httpx
 
 
6
  transformers
7
  torch
8
  httpx
9
+ matplotlib