nataliaElv
Could of words
fe06679
raw
history blame
5.71 kB
import streamlit as st
import pandas as pd
from github import Github
from wordcloud import WordCloud
import matplotlib.pyplot as plt
g = Github(st.secrets["ACCESS_TOKEN"])
repo = g.get_repo(st.secrets["REPO_NAME"])
def fetch_data():
issues_data = []
issues = repo.get_issues(state="all")
for issue in issues:
issues_data.append(
{
'Issue': f"{issue.number} - {issue.title}",
'State': issue.state,
'Created at': issue.created_at,
'Closed at': issue.closed_at,
'Last update': issue.updated_at,
'Labels': [label.name for label in issue.labels],
'Reactions': issue.reactions['total_count'],
'Comments': issue.comments,
'URL': issue.html_url
}
)
return pd.DataFrame(issues_data)
def save_data(df):
df.to_json("issues.json", orient="records", indent=4, index=False)
st.title(f"GitHub Issues Dashboard for {repo.name}")
status = st.status(label="Loading data...", state="running")
try:
df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
except:
df = fetch_data()
save_data(df)
# Section 1: Issue activity metrics
st.header("Issue activity metrics")
col1, col2, col3 = st.columns(3)
state_counts = df['State'].value_counts()
open_issues = df.loc[df['State'] == 'open']
closed_issues = df.loc[df['State'] == 'closed']
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']
with col1:
st.metric(label="Open Issues", value=state_counts['open'])
with col2:
st.metric(label="Closed Issues", value=state_counts['closed'])
with col3:
average_time_to_close = closed_issues['Time to Close'].mean().days
st.metric(label="Avg. Days to Close", value=average_time_to_close)
# TODO Plot: number of open vs closed issues by date
# TODO Dataframe: Unresolved conversations
## Issues with new comments (or updates?). Sorted by number of new comments (based on timeframe above) and/or date of last comment.
st.subheader("Latest bugs 🐞")
bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
st.dataframe(
bug_issues.sort_values(by="Created at", ascending=False),
hide_index=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width=400),
"Labels": st.column_config.TextColumn("Labels"),
"Created at": st.column_config.DatetimeColumn("Created at"),
"URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
}
)
st.subheader("Latest updates πŸ“")
st.dataframe(
open_issues[["Issue","Last update","URL"]].sort_values(by="Last update", ascending=False).head(10),
hide_index=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width=400),
"Last update": st.column_config.DatetimeColumn("Last update"),
"URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
}
)
# Section 2: Issue classification
st.header("Issue classification")
col1, col2 = st.columns(2)
## Dataframe: Number of open issues by label.
with col1:
st.subheader("Top ten labels πŸ”–")
open_issues_exploded = open_issues.explode("Labels")
label_counts = open_issues_exploded.value_counts("Labels").to_frame()
def generate_labels_link(labels):
links = []
for label in labels:
label = label.replace(" ", "+")
links.append(f"https://github.com/argilla-io/argilla/issues?q=is:open+is:issue+label:%22{label}%22")
return links
label_counts['Link'] = generate_labels_link(label_counts.index)
st.dataframe(
label_counts.head(10),
column_config={
"Labels": st.column_config.TextColumn("Labels"),
"count": st.column_config.NumberColumn("Count"),
"Link": st.column_config.LinkColumn("Link", display_text="πŸ”—")
}
)
## Dataframe: Number of open bugs ordered by date
## Cloud of words: Issue titles
with col2:
st.subheader("Word cloud of issue titles")
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(" ".join(open_issues["Issue"]))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
st.pyplot(plt)
# # Community engagement
st.header("Community engagement")
# ## Dataframe: Latest issues open by the community
# ## Dataframe: issues sorted by number of comments
st.subheader("Top engaging issues πŸ’¬")
engagement_df = open_issues[["Issue","Reactions","Comments","URL"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
st.dataframe(
engagement_df,
hide_index=True,
use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width=400),
"Reactions": st.column_config.NumberColumn("Reactions", format="%d πŸ‘"),
"Comments": st.column_config.NumberColumn("Comments", format="%d πŸ’¬"),
"URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
}
)
# ## Cloud of words: Comments??
# ## Dataframe: Contributor leaderboard.
# # Issue dependencies
# st.header("Issue dependencies")
# ## Map: dependencies between issues. Network of issue mentions.x
status.update(label="Checking for updated data...", state="running")
updated_data = fetch_data()
if df.equals(updated_data):
status.update(label="Data is up to date!", state="complete")
else:
save_data(updated_data)
status.update(label="Refresh for updated data!", state="complete")