nataliaElv
Links first
842bf87
raw
history blame
7.86 kB
import streamlit as st
import pandas as pd
from github import Github
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
import datetime
g = Github(st.secrets["ACCESS_TOKEN"])
repos = st.secrets["REPO_NAME"].split(",")
repos = [g.get_repo(repo) for repo in repos]
@st.cache_data
def fetch_data():
issues_data = []
for repo in repos:
issues = repo.get_issues(state="all")
for issue in issues:
issues_data.append(
{
'Issue': f"{issue.number} - {issue.title}",
'State': issue.state,
'Created at': issue.created_at,
'Closed at': issue.closed_at,
'Last update': issue.updated_at,
'Labels': [label.name for label in issue.labels],
'Reactions': issue.reactions['total_count'],
'Comments': issue.comments,
'URL': issue.html_url,
'Repository': repo.name,
}
)
return pd.DataFrame(issues_data)
# def save_data(df):
# df.to_json("issues.json", orient="records", indent=4, index=False)
# @st.cache_data
# def load_data():
# try:
# df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
# except:
# df = fetch_data()
# save_data(df)
# return df
st.title(f"GitHub Issues Dashboard")
with st.status(label="Loading data...", state="running") as status:
df = fetch_data()
status.update(label="Data loaded!", state="complete")
today = datetime.date.today()
# Section 1: Issue activity metrics
st.header("Issue activity metrics")
col1, col2, col3 = st.columns(3)
state_counts = df['State'].value_counts()
open_issues = df.loc[df['State'] == 'open']
closed_issues = df.loc[df['State'] == 'closed']
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']
with col1:
st.metric(label="Open issues", value=state_counts['open'])
with col2:
st.metric(label="Closed issues", value=state_counts['closed'])
with col3:
average_time_to_close = closed_issues['Time to Close'].mean().days
st.metric(label="Avg. days to close", value=average_time_to_close)
# TODO Plot: number of open vs closed issues by date
# st.subheader("Latest bugs 🐞")
# bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
# bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
# st.dataframe(
# bug_issues.sort_values(by="Created at", ascending=False),
# hide_index=True,
# column_config={
# "Issue": st.column_config.TextColumn("Issue", width=400),
# "Labels": st.column_config.TextColumn("Labels"),
# "Created at": st.column_config.DatetimeColumn("Created at"),
# "URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
# }
# )
st.subheader("Latest updates πŸ“")
col1, col2 = st.columns(2)
with col1:
last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY")
last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time())
with col2:
updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)]
st.metric("Results:", updated_issues.shape[0])
st.dataframe(
updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False),
hide_index=True,
# use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width="large"),
"Labels": st.column_config.ListColumn("Labels", width="large"),
"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
"URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—", width="small")
}
)
st.subheader("Stale issues? πŸ•ΈοΈ")
col1, col2 = st.columns(2)
with col1:
not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY")
not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time())
with col2:
stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)]
st.metric("Results:", stale_issues.shape[0])
st.dataframe(
stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True),
hide_index=True,
# use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width="large"),
"Labels": st.column_config.ListColumn("Labels", width="large"),
"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
"URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—", width="small")
}
)
# Section 2: Issue classification
st.header("Issue classification")
col1, col2 = st.columns(2)
## Dataframe: Number of open issues by label.
with col1:
st.subheader("Top ten labels πŸ”–")
label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index()
def generate_labels_link(labels,repos):
links = []
for label,repo in zip(labels,repos):
label = label.replace(" ", "+")
links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22")
return links
label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository'])
st.dataframe(
label_counts[["Link","Labels","Repository", "count",]].head(10),
hide_index=True,
column_config={
"Labels": st.column_config.TextColumn("Labels"),
"count": st.column_config.NumberColumn("Count"),
"Link": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
}
)
## Cloud of words: Issue titles
with col2:
st.subheader("Cloud of words ☁️")
titles = " ".join(open_issues["Issue"])
titles = re.sub(r'\[.*?\]', '', titles)
wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
st.pyplot(plt, use_container_width=True)
# # Community engagement
st.header("Community engagement")
# ## Dataframe: Latest issues open by the community
# ## Dataframe: issues sorted by number of comments
st.subheader("Top engaging issues πŸ’¬")
engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
st.dataframe(
engagement_df,
hide_index=True,
# use_container_width=True,
column_config={
"Issue": st.column_config.TextColumn("Issue", width="large"),
"Reactions": st.column_config.NumberColumn("Reactions", format="%d πŸ‘", width="small"),
"Comments": st.column_config.NumberColumn("Comments", format="%d πŸ’¬", width="small"),
"URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—", width="small")
}
)
# ## Cloud of words: Comments??
# ## Dataframe: Contributor leaderboard.
# # Issue dependencies
# st.header("Issue dependencies")
# ## Map: dependencies between issues. Network of issue mentions.x
# status.update(label="Checking for updated data...", state="running")
# updated_data = fetch_data()
# if df.equals(updated_data):
# status.update(label="Data is up to date!", state="complete")
# else:
# save_data(updated_data)
# status.update(label="Refresh for updated data!", state="complete")