Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from github import Github | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
g = Github(st.secrets["ACCESS_TOKEN"]) | |
repo = g.get_repo(st.secrets["REPO_NAME"]) | |
def fetch_data(): | |
issues_data = [] | |
issues = repo.get_issues(state="all") | |
for issue in issues: | |
issues_data.append( | |
{ | |
'Issue': f"{issue.number} - {issue.title}", | |
'State': issue.state, | |
'Created at': issue.created_at, | |
'Closed at': issue.closed_at, | |
'Last update': issue.updated_at, | |
'Labels': [label.name for label in issue.labels], | |
'Reactions': issue.reactions['total_count'], | |
'Comments': issue.comments, | |
'URL': issue.html_url | |
} | |
) | |
return pd.DataFrame(issues_data) | |
def save_data(df): | |
df.to_json("issues.json", orient="records", indent=4, index=False) | |
st.title(f"GitHub Issues Dashboard for {repo.name}") | |
status = st.status(label="Loading data...", state="running") | |
try: | |
df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms") | |
except: | |
df = fetch_data() | |
save_data(df) | |
# Section 1: Issue activity metrics | |
st.header("Issue activity metrics") | |
col1, col2, col3 = st.columns(3) | |
state_counts = df['State'].value_counts() | |
open_issues = df.loc[df['State'] == 'open'] | |
closed_issues = df.loc[df['State'] == 'closed'] | |
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at'] | |
with col1: | |
st.metric(label="Open Issues", value=state_counts['open']) | |
with col2: | |
st.metric(label="Closed Issues", value=state_counts['closed']) | |
with col3: | |
average_time_to_close = closed_issues['Time to Close'].mean().days | |
st.metric(label="Avg. Days to Close", value=average_time_to_close) | |
# TODO Plot: number of open vs closed issues by date | |
# TODO Dataframe: Unresolved conversations | |
## Issues with new comments (or updates?). Sorted by number of new comments (based on timeframe above) and/or date of last comment. | |
st.subheader("Latest bugs π") | |
bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)] | |
bug_issues = bug_issues[["Issue","Labels","Created at","URL"]] | |
st.dataframe( | |
bug_issues.sort_values(by="Created at", ascending=False), | |
hide_index=True, | |
column_config={ | |
"Issue": st.column_config.TextColumn("Issue", width=400), | |
"Labels": st.column_config.TextColumn("Labels"), | |
"Created at": st.column_config.DatetimeColumn("Created at"), | |
"URL": st.column_config.LinkColumn("π", display_text="π") | |
} | |
) | |
st.subheader("Latest updates π") | |
st.dataframe( | |
open_issues[["Issue","Last update","URL"]].sort_values(by="Last update", ascending=False).head(10), | |
hide_index=True, | |
column_config={ | |
"Issue": st.column_config.TextColumn("Issue", width=400), | |
"Last update": st.column_config.DatetimeColumn("Last update"), | |
"URL": st.column_config.LinkColumn("π", display_text="π") | |
} | |
) | |
# Section 2: Issue classification | |
st.header("Issue classification") | |
col1, col2 = st.columns(2) | |
## Dataframe: Number of open issues by label. | |
with col1: | |
st.subheader("Top ten labels π") | |
open_issues_exploded = open_issues.explode("Labels") | |
label_counts = open_issues_exploded.value_counts("Labels").to_frame() | |
def generate_labels_link(labels): | |
links = [] | |
for label in labels: | |
label = label.replace(" ", "+") | |
links.append(f"https://github.com/argilla-io/argilla/issues?q=is:open+is:issue+label:%22{label}%22") | |
return links | |
label_counts['Link'] = generate_labels_link(label_counts.index) | |
st.dataframe( | |
label_counts.head(10), | |
column_config={ | |
"Labels": st.column_config.TextColumn("Labels"), | |
"count": st.column_config.NumberColumn("Count"), | |
"Link": st.column_config.LinkColumn("Link", display_text="π") | |
} | |
) | |
## Dataframe: Number of open bugs ordered by date | |
## Cloud of words: Issue titles | |
with col2: | |
st.subheader("Word cloud of issue titles") | |
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(" ".join(open_issues["Issue"])) | |
plt.figure(figsize=(10, 5)) | |
plt.imshow(wordcloud, interpolation="bilinear") | |
plt.axis("off") | |
st.pyplot(plt) | |
# # Community engagement | |
st.header("Community engagement") | |
# ## Dataframe: Latest issues open by the community | |
# ## Dataframe: issues sorted by number of comments | |
st.subheader("Top engaging issues π¬") | |
engagement_df = open_issues[["Issue","Reactions","Comments","URL"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10) | |
st.dataframe( | |
engagement_df, | |
hide_index=True, | |
use_container_width=True, | |
column_config={ | |
"Issue": st.column_config.TextColumn("Issue", width=400), | |
"Reactions": st.column_config.NumberColumn("Reactions", format="%d π"), | |
"Comments": st.column_config.NumberColumn("Comments", format="%d π¬"), | |
"URL": st.column_config.LinkColumn("π", display_text="π") | |
} | |
) | |
# ## Cloud of words: Comments?? | |
# ## Dataframe: Contributor leaderboard. | |
# # Issue dependencies | |
# st.header("Issue dependencies") | |
# ## Map: dependencies between issues. Network of issue mentions.x | |
status.update(label="Checking for updated data...", state="running") | |
updated_data = fetch_data() | |
if df.equals(updated_data): | |
status.update(label="Data is up to date!", state="complete") | |
else: | |
save_data(updated_data) | |
status.update(label="Refresh for updated data!", state="complete") | |