espejelomar's picture
Upload folder using huggingface_hub
0faf251 verified
raw
history blame
4.45 kB
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO
from termcolor import colored
# Load the dataset with debug prints
def load_dataset():
try:
print(colored("Loading dataset...", "blue"))
df = pd.read_csv("data/source/all_networks_developer_classification.csv")
# Ensure the month_year column is in the correct datetime format
df['month_year'] = pd.to_datetime(df['month_year'], format='%B_%Y') # Adjust format if necessary
return df
except Exception as e:
print(colored(f"Error loading dataset: {e}", "red"))
raise
# Process input and generate plot and classification with debug prints
def process_input(input_text, uploaded_file):
try:
print(colored("Processing input...", "blue"))
# Read GitHub handles from input text or uploaded file
if uploaded_file is not None:
print(colored("Reading from uploaded file...", "blue"))
input_text = uploaded_file.read().decode("utf-8")
github_handles = [handle.strip() for handle in input_text.split(",")]
print(colored(f"GitHub handles: {github_handles}", "blue"))
# Load dataset
df = load_dataset()
# Filter dataset for the provided GitHub handles
print(colored("Filtering dataset...", "blue"))
filtered_df = df[df['developer'].isin(github_handles)]
# Generate plot
print(colored("Generating plot...", "blue"))
fig, ax = plt.subplots()
for handle in github_handles:
dev_df = filtered_df[filtered_df['developer'] == handle]
dev_df = dev_df.sort_values('month_year')
ax.plot(dev_df['month_year'], dev_df['total_commits'], label=handle)
ax.set_xlabel("Month")
ax.set_ylabel("Number of Commits")
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
# Generate classification table
print(colored("Classifying developers...", "blue"))
classification = []
for handle in github_handles:
dev_df = filtered_df[filtered_df['developer'] == handle]
last_3_months = pd.Timestamp.now() - pd.DateOffset(months=3)
recent_activity = dev_df[dev_df['month_year'] >= last_3_months]
total_recent_commits = recent_activity['total_commits'].sum()
if dev_df.empty:
status = "Always been inactive"
elif recent_activity.empty:
status = "Previously active but no longer"
elif total_recent_commits < 20:
status = "Low-level active"
else:
status = "Highly involved"
classification.append((handle, status))
classification_df = pd.DataFrame(classification, columns=["Developer", "Classification"]).sort_values("Classification", ascending=False)
print(colored("Classification completed.", "blue"))
# Return plot and classification table
return fig, classification_df
except Exception as e:
print(colored(f"Error processing input: {e}", "red"))
raise
# Gradio interface with descriptions and debug prints
with gr.Blocks() as app:
gr.Markdown("## GitHub Starknet Developer Insights")
gr.Markdown("""
This tool allows you to analyze the GitHub activity of developers within the Starknet ecosystem.
Enter GitHub handles separated by commas or upload a CSV file with GitHub handles in a single column
to see their monthly commit activity and their current involvement classification.
""")
with gr.Row():
text_input = gr.Textbox(label="Enter GitHub handles separated by commas", placeholder="e.g., user1,user2,user3")
file_input = gr.File(label="Or upload a CSV file with GitHub handles in a single column", type="binary")
gr.Markdown("""
*Note:* When uploading a CSV, ensure it contains a single column of GitHub handles without a header row.
""")
btn = gr.Button("Analyze")
plot_output = gr.Plot(label="Commits per Month")
table_output = gr.Dataframe(label="Developer Classification")
btn.click(process_input, inputs=[text_input, file_input], outputs=[plot_output, table_output])
print(colored("Gradio app initialized.", "blue"))
if __name__ == "__main__":
print(colored("Launching app...", "blue"))
app.launch(share=True)