espejelomar commited on
Commit
151eb1b
β€’
1 Parent(s): 549c95d

Upload folder using huggingface_hub

Browse files
data/source/all_networks_developer_classification.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/source/all_networks_developer_classification_updated_february.csv ADDED
The diff for this file is too large to render. See raw diff
 
debug.csv CHANGED
The diff for this file is too large to render. See raw diff
 
github_metrics/__pycache__/utils.cpython-311.pyc CHANGED
Binary files a/github_metrics/__pycache__/utils.cpython-311.pyc and b/github_metrics/__pycache__/utils.cpython-311.pyc differ
 
github_metrics/developer_survival_plot.py CHANGED
@@ -4,7 +4,6 @@ import pandas as pd
4
  import seaborn as sns
5
  from lifelines import KaplanMeierFitter
6
  from matplotlib.colors import LinearSegmentedColormap
7
-
8
  from utils import save_plot
9
 
10
 
 
4
  import seaborn as sns
5
  from lifelines import KaplanMeierFitter
6
  from matplotlib.colors import LinearSegmentedColormap
 
7
  from utils import save_plot
8
 
9
 
github_metrics/main.py CHANGED
@@ -2,60 +2,131 @@ import gradio as gr
2
  import pandas as pd
3
  import plotly.express as px
4
  import plotly.graph_objects as go
5
- from termcolor import colored
6
  from scipy.stats import mannwhitneyu
 
7
  from utils import load_all_developers_dataset
8
 
 
9
  def process_input(input_text, uploaded_file, program_end_date=None, event_name=None):
10
  try:
11
  print(colored("Processing input...", "blue"))
12
  if uploaded_file is not None:
13
  print(colored("Reading from uploaded file...", "blue"))
14
  file_content = uploaded_file.decode("utf-8")
15
- github_handles = [handle.strip() for handle in file_content.split("\n") if handle.strip()]
 
 
16
  else:
17
  github_handles = [handle.strip() for handle in input_text.split(",")]
18
  print(colored(f"GitHub handles: {github_handles}", "blue"))
19
 
 
 
 
20
  df = load_all_developers_dataset()
21
  print(colored("Filtering dataset...", "blue"))
22
  one_year_ago = pd.Timestamp.now() - pd.DateOffset(years=1)
23
- filtered_df = df[(df["developer"].isin(github_handles)) & (df["month_year"] >= one_year_ago)]
 
 
24
  filtered_df = filtered_df.sort_values(by=["developer", "month_year"])
25
  filtered_df.loc[:, "month_year"] = pd.to_datetime(filtered_df["month_year"])
26
 
27
  line_fig = create_line_plot(filtered_df, github_handles, program_end_date)
28
- analysis_result = perform_statistical_analysis(filtered_df, github_handles, program_end_date)
29
- new_developers_count = count_new_developers(filtered_df, github_handles, program_end_date)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  last_3_months = pd.Timestamp.now() - pd.DateOffset(months=3)
32
  recent_activity_user = filtered_df[filtered_df["month_year"] >= last_3_months]
33
  all_devs_df = load_all_developers_dataset()
34
  all_devs_filtered_df = all_devs_df[(all_devs_df["month_year"] >= last_3_months)]
35
- other_devs_recent_activity = all_devs_filtered_df[~all_devs_filtered_df["developer"].isin(github_handles)]
 
 
 
 
 
 
 
 
 
36
 
37
- user_specified_active = recent_activity_user[recent_activity_user["total_commits"] > 0]
38
- other_developers_active = other_devs_recent_activity[other_devs_recent_activity["total_commits"] > 0]
39
  box_fig = create_box_plot(user_specified_active, other_developers_active)
40
 
41
  print(colored("Classifying developers...", "blue"))
42
  classification_df = classify_developers(github_handles, recent_activity_user)
43
  print(colored("Classification completed.", "blue"))
44
 
45
- comparison_result = compare_user_developers_to_others(user_specified_active, other_developers_active, df, program_end_date)
46
- growth_rate_result = compare_growth_rate(user_specified_active, other_developers_active, df)
 
 
 
 
 
47
 
48
- tldr_summary = generate_tldr_summary(github_handles, classification_df, analysis_result, new_developers_count, comparison_result, growth_rate_result, event_name)
 
 
 
 
 
 
 
 
49
 
50
- return line_fig, box_fig, classification_df, analysis_result, new_developers_count, comparison_result, growth_rate_result, tldr_summary
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
  print(colored(f"Error processing input: {e}", "red"))
53
- return None, None, None, None, "Error in processing input.", None, None, "Error in processing input."
 
 
 
 
 
 
 
 
 
 
54
 
55
  def create_line_plot(filtered_df, github_handles, program_end_date):
56
- all_developers = pd.DataFrame({"developer": github_handles, "month_year": pd.Timestamp.now(), "total_commits": 0})
 
 
 
 
 
 
57
  plot_df = pd.concat([filtered_df, all_developers])
58
- plot_df = plot_df.groupby(["developer", "month_year"])["total_commits"].sum().reset_index()
 
 
 
 
59
  line_fig = px.line(
60
  plot_df,
61
  x="month_year",
@@ -66,13 +137,22 @@ def create_line_plot(filtered_df, github_handles, program_end_date):
66
  )
67
  if program_end_date:
68
  program_end_date = pd.to_datetime(program_end_date)
69
- line_fig.add_vline(x=program_end_date, line_width=2, line_dash="dash", line_color="red")
 
 
70
  return line_fig
71
 
 
72
  def create_box_plot(user_specified_active, other_developers_active):
73
  box_fig = go.Figure()
74
- box_fig.add_trace(go.Box(y=user_specified_active["total_commits"], name="User Specified Developers"))
75
- box_fig.add_trace(go.Box(y=other_developers_active["total_commits"], name="Other Developers"))
 
 
 
 
 
 
76
  box_fig.update_layout(
77
  title="Comparison of Monthly Commits in the Last 3 Months: User Specified vs. Other Developers (Active Only)",
78
  yaxis_title="Total Monthly Commits",
@@ -80,6 +160,7 @@ def create_box_plot(user_specified_active, other_developers_active):
80
  )
81
  return box_fig
82
 
 
83
  def classify_developers(github_handles, recent_activity_user):
84
  classification = []
85
  for handle in github_handles:
@@ -99,12 +180,17 @@ def classify_developers(github_handles, recent_activity_user):
99
  "Previously active but no longer": 3,
100
  "Always been inactive": 4,
101
  }
102
- classification_df = pd.DataFrame(classification, columns=["Developer", "Classification", "Total Recent Commits"])
 
 
103
  classification_df["Sort Key"] = classification_df["Classification"].map(sort_keys)
104
- classification_df.sort_values(by=["Sort Key", "Total Recent Commits"], ascending=[True, False], inplace=True)
 
 
105
  classification_df.drop(["Sort Key", "Total Recent Commits"], axis=1, inplace=True)
106
  return classification_df
107
 
 
108
  def perform_statistical_analysis(filtered_df, github_handles, program_end_date_str):
109
  if program_end_date_str is None:
110
  return "Program end date not provided. Unable to perform statistical analysis."
@@ -120,33 +206,53 @@ def perform_statistical_analysis(filtered_df, github_handles, program_end_date_s
120
  before_counts = before_counts.reindex(all_developers.index, fill_value=0)
121
  after_counts = after_counts.reindex(all_developers.index, fill_value=0)
122
 
123
- if len(before_counts) < 2 or len(after_counts) < 2:
124
- return "Not enough data for statistical analysis."
125
 
126
  stat, p_value = mannwhitneyu(after_counts, before_counts)
127
- analysis_result = f"Mann-Whitney U test statistic: {stat:.3f}, P-value: {p_value:.3f}\n"
 
 
128
 
129
  if p_value < 0.2:
130
  if stat > 0:
131
- analysis_result += "Difference in commit activity before and after the program is considered significant. " \
132
- "The commit activity is higher after the program."
 
 
133
  else:
134
- analysis_result += "Difference in commit activity before and after the program is considered significant. " \
135
- "The commit activity is lower after the program."
 
 
136
  else:
137
- analysis_result += "No significant difference in commit activity before and after the program."
 
 
138
 
139
  return analysis_result
140
 
 
141
  def count_new_developers(filtered_df, github_handles, program_end_date_str):
142
  if program_end_date_str is None:
143
- return "Program end date not provided. Unable to count new developers."
 
 
 
 
 
 
 
 
144
 
145
  program_end_date = pd.to_datetime(program_end_date_str)
146
  two_months_after_program = program_end_date + pd.DateOffset(months=2)
147
 
148
  before_program = filtered_df[filtered_df["month_year"] < program_end_date]
149
- after_program = filtered_df[(filtered_df["month_year"] >= program_end_date) & (filtered_df["month_year"] <= two_months_after_program)]
 
 
 
150
 
151
  before_developers = before_program["developer"].unique()
152
  after_developers = after_program["developer"].unique()
@@ -156,17 +262,41 @@ def count_new_developers(filtered_df, github_handles, program_end_date_str):
156
 
157
  return f"Number of new developers committing code within 2 months after the program: {len(new_developers)}\nNew developers: {new_developers_str}"
158
 
159
- def compare_user_developers_to_others(user_specified_active, other_developers_active, df, program_end_date_str):
 
 
 
160
  if program_end_date_str is None:
161
- return "Program end date not provided. Unable to compare user-specified developers to others."
 
 
 
 
 
 
162
 
163
  program_end_date = pd.to_datetime(program_end_date_str)
164
-
165
- user_commits = df[(df["developer"].isin(user_specified_active["developer"])) & (df["month_year"] >= program_end_date)]["total_commits"]
166
- other_commits = df[(df["developer"].isin(other_developers_active["developer"])) & (df["month_year"] >= program_end_date)]["total_commits"]
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  stat, p_value = mannwhitneyu(user_commits, other_commits)
169
- comparison_result = f"Mann-Whitney U test statistic: {stat:.3f}, P-value: {p_value:.3f}\n"
 
 
170
 
171
  if p_value < 0.25:
172
  if stat > 0:
@@ -178,6 +308,7 @@ def compare_user_developers_to_others(user_specified_active, other_developers_ac
178
 
179
  return comparison_result
180
 
 
181
  def compare_growth_rate(user_specified_active, other_developers_active, df):
182
  user_growth_rates = []
183
  other_growth_rates = []
@@ -197,7 +328,9 @@ def compare_growth_rate(user_specified_active, other_developers_active, df):
197
  other_growth_rates.append(other_growth_rate)
198
 
199
  stat, p_value = mannwhitneyu(user_growth_rates, other_growth_rates)
200
- comparison_result = f"Mann-Whitney U test statistic: {stat:.3f}, P-value: {p_value:.3f}\n"
 
 
201
 
202
  if p_value < 0.25:
203
  if stat > 0:
@@ -209,6 +342,7 @@ def compare_growth_rate(user_specified_active, other_developers_active, df):
209
 
210
  return comparison_result
211
 
 
212
  def calculate_average_growth_rate(commits):
213
  growth_rates = []
214
  for i in range(1, len(commits)):
@@ -220,10 +354,21 @@ def calculate_average_growth_rate(commits):
220
  else:
221
  return 0
222
 
223
- def generate_tldr_summary(github_handles, classification_df, analysis_result, new_developers_count, comparison_result, growth_rate_result, event_name):
 
 
 
 
 
 
 
 
 
224
  summary = f"### πŸ“ TLDR Summary for {', '.join(github_handles)}\n\n"
225
 
226
- highly_involved_devs = classification_df[classification_df["Classification"] == "Highly involved"]["Developer"].tolist()
 
 
227
  if highly_involved_devs:
228
  summary += f"**🌟 High Performers:** {', '.join(highly_involved_devs)}\n\n"
229
 
@@ -235,7 +380,9 @@ def generate_tldr_summary(github_handles, classification_df, analysis_result, ne
235
  summary += "**πŸ”„ Commit Activity:** No significant change after the program.\n\n"
236
 
237
  if new_developers_count.startswith("Number of new developers"):
238
- summary += f"**πŸ†• New Developers:** {new_developers_count.split(':')[1].strip()}\n\n"
 
 
239
 
240
  if "significantly higher number of commits" in comparison_result:
241
  summary += "**πŸ” Comparison with Other Developers:** User-specified developers have a significantly higher number of commits.\n\n"
@@ -266,6 +413,11 @@ with gr.Blocks() as app:
266
  to see their monthly commit activity, involvement classification, and comparisons with other developers.
267
  """
268
  )
 
 
 
 
 
269
  with gr.Row():
270
  with gr.Column():
271
  text_input = gr.Textbox(
@@ -282,8 +434,14 @@ with gr.Blocks() as app:
282
  """
283
  )
284
  with gr.Row():
285
- program_end_date_input = gr.Textbox(label="Program End Date (YYYY-MM-DD)", placeholder="e.g., 2023-06-30")
286
- event_name_input = gr.Textbox(label="Event Name (optional)", placeholder="e.g., Basecamp, Hackathon")
 
 
 
 
 
 
287
  gr.Markdown(
288
  """
289
  πŸ’‘ *Tip: Specifying a program end date allows you to analyze the impact of events like Basecamp or Hackathons on developer activity. Leave it blank to analyze overall activity.*
@@ -360,11 +518,20 @@ with gr.Blocks() as app:
360
  btn.click(
361
  process_input,
362
  inputs=[text_input, file_input, program_end_date_input, event_name_input],
363
- outputs=[plot_output, box_plot_output, table_output, stat_analysis_output, new_developers_output, comparison_output, growth_rate_output, tldr_output],
 
 
 
 
 
 
 
 
 
364
  )
365
 
366
  print(colored("Gradio app initialized.", "blue"))
367
 
368
  if __name__ == "__main__":
369
  print(colored("Launching app...", "blue"))
370
- app.launch(share=True)
 
2
  import pandas as pd
3
  import plotly.express as px
4
  import plotly.graph_objects as go
 
5
  from scipy.stats import mannwhitneyu
6
+ from termcolor import colored
7
  from utils import load_all_developers_dataset
8
 
9
+
10
  def process_input(input_text, uploaded_file, program_end_date=None, event_name=None):
11
  try:
12
  print(colored("Processing input...", "blue"))
13
  if uploaded_file is not None:
14
  print(colored("Reading from uploaded file...", "blue"))
15
  file_content = uploaded_file.decode("utf-8")
16
+ github_handles = [
17
+ handle.strip() for handle in file_content.split("\n") if handle.strip()
18
+ ]
19
  else:
20
  github_handles = [handle.strip() for handle in input_text.split(",")]
21
  print(colored(f"GitHub handles: {github_handles}", "blue"))
22
 
23
+ if program_end_date == "":
24
+ program_end_date = None
25
+
26
  df = load_all_developers_dataset()
27
  print(colored("Filtering dataset...", "blue"))
28
  one_year_ago = pd.Timestamp.now() - pd.DateOffset(years=1)
29
+ filtered_df = df[
30
+ (df["developer"].isin(github_handles)) & (df["month_year"] >= one_year_ago)
31
+ ]
32
  filtered_df = filtered_df.sort_values(by=["developer", "month_year"])
33
  filtered_df.loc[:, "month_year"] = pd.to_datetime(filtered_df["month_year"])
34
 
35
  line_fig = create_line_plot(filtered_df, github_handles, program_end_date)
36
+
37
+ # Debug
38
+ # print(colored("Debugging filtered dataset and github handles...", "blue"))
39
+ # print(filtered_df.head(100))
40
+ # print(filtered_df["developer"].unique())
41
+ # print(github_handles)
42
+ filtered_df.to_csv("debug.csv", index=False)
43
+ # Debug
44
+
45
+ analysis_result = perform_statistical_analysis(
46
+ filtered_df, github_handles, program_end_date
47
+ )
48
+
49
+ new_developers_count = count_new_developers(
50
+ filtered_df, github_handles, program_end_date
51
+ )
52
 
53
  last_3_months = pd.Timestamp.now() - pd.DateOffset(months=3)
54
  recent_activity_user = filtered_df[filtered_df["month_year"] >= last_3_months]
55
  all_devs_df = load_all_developers_dataset()
56
  all_devs_filtered_df = all_devs_df[(all_devs_df["month_year"] >= last_3_months)]
57
+ other_devs_recent_activity = all_devs_filtered_df[
58
+ ~all_devs_filtered_df["developer"].isin(github_handles)
59
+ ]
60
+
61
+ user_specified_active = recent_activity_user[
62
+ recent_activity_user["total_commits"] > 0
63
+ ]
64
+ other_developers_active = other_devs_recent_activity[
65
+ other_devs_recent_activity["total_commits"] > 0
66
+ ]
67
 
 
 
68
  box_fig = create_box_plot(user_specified_active, other_developers_active)
69
 
70
  print(colored("Classifying developers...", "blue"))
71
  classification_df = classify_developers(github_handles, recent_activity_user)
72
  print(colored("Classification completed.", "blue"))
73
 
74
+ comparison_result = compare_user_developers_to_others(
75
+ user_specified_active, other_developers_active, df, program_end_date
76
+ )
77
+
78
+ growth_rate_result = compare_growth_rate(
79
+ user_specified_active, other_developers_active, df
80
+ )
81
 
82
+ tldr_summary = generate_tldr_summary(
83
+ github_handles,
84
+ classification_df,
85
+ analysis_result,
86
+ new_developers_count,
87
+ comparison_result,
88
+ growth_rate_result,
89
+ event_name,
90
+ )
91
 
92
+ return (
93
+ line_fig,
94
+ box_fig,
95
+ classification_df,
96
+ analysis_result,
97
+ new_developers_count,
98
+ comparison_result,
99
+ growth_rate_result,
100
+ tldr_summary,
101
+ )
102
  except Exception as e:
103
  print(colored(f"Error processing input: {e}", "red"))
104
+ return (
105
+ None,
106
+ None,
107
+ None,
108
+ None,
109
+ "Error in processing input. Check logs for more details on the error",
110
+ None,
111
+ None,
112
+ "Error in processing input. Check logs for more details on the error",
113
+ )
114
+
115
 
116
  def create_line_plot(filtered_df, github_handles, program_end_date):
117
+ all_developers = pd.DataFrame(
118
+ {
119
+ "developer": github_handles,
120
+ "month_year": pd.Timestamp.now(),
121
+ "total_commits": 0,
122
+ }
123
+ )
124
  plot_df = pd.concat([filtered_df, all_developers])
125
+ plot_df = (
126
+ plot_df.groupby(["developer", "month_year"])["total_commits"]
127
+ .sum()
128
+ .reset_index()
129
+ )
130
  line_fig = px.line(
131
  plot_df,
132
  x="month_year",
 
137
  )
138
  if program_end_date:
139
  program_end_date = pd.to_datetime(program_end_date)
140
+ line_fig.add_vline(
141
+ x=program_end_date, line_width=2, line_dash="dash", line_color="red"
142
+ )
143
  return line_fig
144
 
145
+
146
  def create_box_plot(user_specified_active, other_developers_active):
147
  box_fig = go.Figure()
148
+ box_fig.add_trace(
149
+ go.Box(
150
+ y=user_specified_active["total_commits"], name="User Specified Developers"
151
+ )
152
+ )
153
+ box_fig.add_trace(
154
+ go.Box(y=other_developers_active["total_commits"], name="Other Developers")
155
+ )
156
  box_fig.update_layout(
157
  title="Comparison of Monthly Commits in the Last 3 Months: User Specified vs. Other Developers (Active Only)",
158
  yaxis_title="Total Monthly Commits",
 
160
  )
161
  return box_fig
162
 
163
+
164
  def classify_developers(github_handles, recent_activity_user):
165
  classification = []
166
  for handle in github_handles:
 
180
  "Previously active but no longer": 3,
181
  "Always been inactive": 4,
182
  }
183
+ classification_df = pd.DataFrame(
184
+ classification, columns=["Developer", "Classification", "Total Recent Commits"]
185
+ )
186
  classification_df["Sort Key"] = classification_df["Classification"].map(sort_keys)
187
+ classification_df.sort_values(
188
+ by=["Sort Key", "Total Recent Commits"], ascending=[True, False], inplace=True
189
+ )
190
  classification_df.drop(["Sort Key", "Total Recent Commits"], axis=1, inplace=True)
191
  return classification_df
192
 
193
+
194
  def perform_statistical_analysis(filtered_df, github_handles, program_end_date_str):
195
  if program_end_date_str is None:
196
  return "Program end date not provided. Unable to perform statistical analysis."
 
206
  before_counts = before_counts.reindex(all_developers.index, fill_value=0)
207
  after_counts = after_counts.reindex(all_developers.index, fill_value=0)
208
 
209
+ if (before_counts == 0).all() or (after_counts == 0).all():
210
+ return "Not enough data for statistical analysis. All values are zero in either before or after counts."
211
 
212
  stat, p_value = mannwhitneyu(after_counts, before_counts)
213
+ analysis_result = (
214
+ f"Mann-Whitney U test statistic: {stat:.3f}, P-value: {p_value:.3f}\n"
215
+ )
216
 
217
  if p_value < 0.2:
218
  if stat > 0:
219
+ analysis_result += (
220
+ "Difference in commit activity before and after the program is considered significant. "
221
+ "The commit activity is higher after the program."
222
+ )
223
  else:
224
+ analysis_result += (
225
+ "Difference in commit activity before and after the program is considered significant. "
226
+ "The commit activity is lower after the program."
227
+ )
228
  else:
229
+ analysis_result += (
230
+ "No significant difference in commit activity before and after the program."
231
+ )
232
 
233
  return analysis_result
234
 
235
+
236
  def count_new_developers(filtered_df, github_handles, program_end_date_str):
237
  if program_end_date_str is None:
238
+ print(
239
+ colored(
240
+ "Program end date not provided. Unable to count new developers. No problem.",
241
+ "yellow",
242
+ )
243
+ )
244
+ return (
245
+ "Program end date not provided. Unable to count new developers. No problem."
246
+ )
247
 
248
  program_end_date = pd.to_datetime(program_end_date_str)
249
  two_months_after_program = program_end_date + pd.DateOffset(months=2)
250
 
251
  before_program = filtered_df[filtered_df["month_year"] < program_end_date]
252
+ after_program = filtered_df[
253
+ (filtered_df["month_year"] >= program_end_date)
254
+ & (filtered_df["month_year"] <= two_months_after_program)
255
+ ]
256
 
257
  before_developers = before_program["developer"].unique()
258
  after_developers = after_program["developer"].unique()
 
262
 
263
  return f"Number of new developers committing code within 2 months after the program: {len(new_developers)}\nNew developers: {new_developers_str}"
264
 
265
+
266
+ def compare_user_developers_to_others(
267
+ user_specified_active, other_developers_active, df, program_end_date_str
268
+ ):
269
  if program_end_date_str is None:
270
+ print(
271
+ colored(
272
+ "Program end date not provided. Unable to compare user-specified developers to others. No problem.",
273
+ "yellow",
274
+ )
275
+ )
276
+ return "Program end date not provided. Unable to compare user-specified developers to others. No problem."
277
 
278
  program_end_date = pd.to_datetime(program_end_date_str)
279
+ user_commits = df[
280
+ (df["developer"].isin(user_specified_active["developer"]))
281
+ & (df["month_year"] >= program_end_date)
282
+ ]["total_commits"]
283
+ other_commits = df[
284
+ (df["developer"].isin(other_developers_active["developer"]))
285
+ & (df["month_year"] >= program_end_date)
286
+ ]["total_commits"]
287
+
288
+ if len(user_commits) == 0 or len(other_commits) == 0:
289
+ print(
290
+ colored(
291
+ "Not enough data for comparison. Either user-specified developers or developers in the database have no commits after the program end date. Update database",
292
+ "red",
293
+ )
294
+ )
295
 
296
  stat, p_value = mannwhitneyu(user_commits, other_commits)
297
+ comparison_result = (
298
+ f"Mann-Whitney U test statistic: {stat:.3f}, P-value: {p_value:.3f}\n"
299
+ )
300
 
301
  if p_value < 0.25:
302
  if stat > 0:
 
308
 
309
  return comparison_result
310
 
311
+
312
  def compare_growth_rate(user_specified_active, other_developers_active, df):
313
  user_growth_rates = []
314
  other_growth_rates = []
 
328
  other_growth_rates.append(other_growth_rate)
329
 
330
  stat, p_value = mannwhitneyu(user_growth_rates, other_growth_rates)
331
+ comparison_result = (
332
+ f"Mann-Whitney U test statistic: {stat:.3f}, P-value: {p_value:.3f}\n"
333
+ )
334
 
335
  if p_value < 0.25:
336
  if stat > 0:
 
342
 
343
  return comparison_result
344
 
345
+
346
  def calculate_average_growth_rate(commits):
347
  growth_rates = []
348
  for i in range(1, len(commits)):
 
354
  else:
355
  return 0
356
 
357
+
358
+ def generate_tldr_summary(
359
+ github_handles,
360
+ classification_df,
361
+ analysis_result,
362
+ new_developers_count,
363
+ comparison_result,
364
+ growth_rate_result,
365
+ event_name,
366
+ ):
367
  summary = f"### πŸ“ TLDR Summary for {', '.join(github_handles)}\n\n"
368
 
369
+ highly_involved_devs = classification_df[
370
+ classification_df["Classification"] == "Highly involved"
371
+ ]["Developer"].tolist()
372
  if highly_involved_devs:
373
  summary += f"**🌟 High Performers:** {', '.join(highly_involved_devs)}\n\n"
374
 
 
380
  summary += "**πŸ”„ Commit Activity:** No significant change after the program.\n\n"
381
 
382
  if new_developers_count.startswith("Number of new developers"):
383
+ summary += (
384
+ f"**πŸ†• New Developers:** {new_developers_count.split(':')[1].strip()}\n\n"
385
+ )
386
 
387
  if "significantly higher number of commits" in comparison_result:
388
  summary += "**πŸ” Comparison with Other Developers:** User-specified developers have a significantly higher number of commits.\n\n"
 
413
  to see their monthly commit activity, involvement classification, and comparisons with other developers.
414
  """
415
  )
416
+ gr.Markdown(
417
+ """
418
+ πŸ“Ί **Video Tutorial:** Please watch this [5-minute video tutorial](https://www.loom.com/share/b60e7f1bd1ee473b97e9c84c74df692a) examining an African Bootcamp and the Basecamp bootcamp as examples to start using the app effectively.
419
+ """
420
+ )
421
  with gr.Row():
422
  with gr.Column():
423
  text_input = gr.Textbox(
 
434
  """
435
  )
436
  with gr.Row():
437
+ program_end_date_input = gr.Textbox(
438
+ label="Program End Date (YYYY-MM-DD)",
439
+ placeholder="e.g., 2023-06-30",
440
+ )
441
+ event_name_input = gr.Textbox(
442
+ label="Event Name (optional)",
443
+ placeholder="e.g., Basecamp, Hackathon",
444
+ )
445
  gr.Markdown(
446
  """
447
  πŸ’‘ *Tip: Specifying a program end date allows you to analyze the impact of events like Basecamp or Hackathons on developer activity. Leave it blank to analyze overall activity.*
 
518
  btn.click(
519
  process_input,
520
  inputs=[text_input, file_input, program_end_date_input, event_name_input],
521
+ outputs=[
522
+ plot_output,
523
+ box_plot_output,
524
+ table_output,
525
+ stat_analysis_output,
526
+ new_developers_output,
527
+ comparison_output,
528
+ growth_rate_output,
529
+ tldr_output,
530
+ ],
531
  )
532
 
533
  print(colored("Gradio app initialized.", "blue"))
534
 
535
  if __name__ == "__main__":
536
  print(colored("Launching app...", "blue"))
537
+ app.launch(share=True)
github_metrics/utils.py CHANGED
@@ -1,6 +1,7 @@
1
  from datetime import datetime
2
- from termcolor import colored
3
  import pandas as pd
 
4
 
5
 
6
  def load_all_developers_dataset():
 
1
  from datetime import datetime
2
+
3
  import pandas as pd
4
+ from termcolor import colored
5
 
6
 
7
  def load_all_developers_dataset():