Spaces:

nataliaElv
/

github_issues_dashboard

Sleeping

File size: 7,863 Bytes

1e08812
0dca33a
 
fe06679
 
99e4eea
0b2c5d2
1e08812
b1dec7c
9b1266b
 
0dca33a
58bba8c
b1dec7c
0dca33a
b1dec7c
 
9b1266b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1dec7c
0dca33a
086fa02
 
b1dec7c
086fa02
 
 
 
 
 
 
 
58bba8c
b1dec7c
9b1266b
086fa02
 
 
0b2c5d2
 
b1dec7c
 
 
ef22b70
b1dec7c
 
 
ef22b70
e8df381
b1dec7c
 
99e4eea
b1dec7c
 
99e4eea
b1dec7c
e8df381
 
99e4eea
ef22b70
 
 
b1dec7c
0b2c5d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58bba8c
086fa02
0b2c5d2
086fa02
0b2c5d2
 
cc871a7
842bf87
b410ebc
58bba8c
cc871a7
58bba8c
 
 
 
cc871a7
 
b1dec7c
0b2c5d2
 
 
58bba8c
086fa02
0b2c5d2
086fa02
0b2c5d2
ef22b70
842bf87
ef22b70
58bba8c
ef22b70
58bba8c
 
 
 
ef22b70
 
 
b410ebc
 
 
cc871a7
b410ebc
 
 
9b1266b
b410ebc
9b1266b
b410ebc
9b1266b
b410ebc
9b1266b
b410ebc
 
9b1266b
b410ebc
 
842bf87
9b1266b
b410ebc
 
 
842bf87
b410ebc
 
 
fe06679
 
99e4eea
 
 
58bba8c
fe06679
 
 
58bba8c
b1dec7c
 
 
 
 
b410ebc
842bf87
b1dec7c
 
 
58bba8c
b1dec7c
58bba8c
 
 
 
b1dec7c
 
 
 
 
 
 
 
 
 
086fa02

import streamlit as st
import pandas as pd
from github import Github
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
import datetime

g = Github(st.secrets["ACCESS_TOKEN"])
repos = st.secrets["REPO_NAME"].split(",")
repos = [g.get_repo(repo) for repo in repos]

@st.cache_data
def fetch_data():

    issues_data = []
    
    for repo in repos:
        issues = repo.get_issues(state="all")
        
        for issue in issues:
            issues_data.append(
                {
                    'Issue': f"{issue.number} - {issue.title}", 
                    'State': issue.state, 
                    'Created at': issue.created_at, 
                    'Closed at': issue.closed_at,
                    'Last update': issue.updated_at,
                    'Labels': [label.name for label in issue.labels],
                    'Reactions': issue.reactions['total_count'],
                    'Comments': issue.comments,
                    'URL': issue.html_url,
                    'Repository': repo.name,
                }
            )
    return pd.DataFrame(issues_data)

# def save_data(df):
#     df.to_json("issues.json", orient="records", indent=4, index=False)

# @st.cache_data
# def load_data():
#     try:
#         df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
#     except:
#         df = fetch_data()
#         save_data(df)
#     return df


st.title(f"GitHub Issues Dashboard")
with st.status(label="Loading data...", state="running") as status:
    df = fetch_data()
    status.update(label="Data loaded!", state="complete")
today = datetime.date.today()

# Section 1: Issue activity metrics
st.header("Issue activity metrics")

col1, col2, col3 = st.columns(3)

state_counts = df['State'].value_counts()
open_issues = df.loc[df['State'] == 'open']
closed_issues = df.loc[df['State'] == 'closed']
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']

with col1:
    st.metric(label="Open issues", value=state_counts['open'])

with col2:
    st.metric(label="Closed issues", value=state_counts['closed'])

with col3:
    average_time_to_close = closed_issues['Time to Close'].mean().days
    st.metric(label="Avg. days to close", value=average_time_to_close)


# TODO Plot: number of open vs closed issues by date


# st.subheader("Latest bugs 🐞")
# bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
# bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
# st.dataframe(
#     bug_issues.sort_values(by="Created at", ascending=False),
#     hide_index=True,
#     column_config={
#         "Issue": st.column_config.TextColumn("Issue", width=400),
#         "Labels": st.column_config.TextColumn("Labels"),
#         "Created at": st.column_config.DatetimeColumn("Created at"),
#         "URL": st.column_config.LinkColumn("🔗", display_text="🔗")
#     }
# )

st.subheader("Latest updates 📝")
col1, col2 = st.columns(2)
with col1:
    last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY")
    last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time())
with col2:
    updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)]
    st.metric("Results:", updated_issues.shape[0])

st.dataframe(
    updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False),
    hide_index=True,
    # use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width="large"),
        "Labels": st.column_config.ListColumn("Labels", width="large"),
        "Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
        "URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
    }
)

st.subheader("Stale issues? 🕸️")
col1, col2 = st.columns(2)
with col1:
    not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY")
    not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time())
with col2:
    stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)]
    st.metric("Results:", stale_issues.shape[0])
st.dataframe(
    stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True),
    hide_index=True,
    # use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width="large"),
        "Labels": st.column_config.ListColumn("Labels", width="large"),
        "Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
        "URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
    }
)

# Section 2: Issue classification
st.header("Issue classification")
col1, col2 = st.columns(2)

## Dataframe: Number of open issues by label.
with col1:
    st.subheader("Top ten labels 🔖")
    label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index()

    def generate_labels_link(labels,repos):
        links = []
        for label,repo in zip(labels,repos):
            label = label.replace(" ", "+")
            links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22")
        return links

    label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository'])

    st.dataframe(
        label_counts[["Link","Labels","Repository", "count",]].head(10),
        hide_index=True,
        column_config={
            "Labels": st.column_config.TextColumn("Labels"),
            "count": st.column_config.NumberColumn("Count"),
            "Link": st.column_config.LinkColumn("🔗", display_text="🔗")
        }
    )
    
## Cloud of words: Issue titles
with col2:
    st.subheader("Cloud of words ☁️")
    titles = " ".join(open_issues["Issue"])
    titles = re.sub(r'\[.*?\]', '', titles)
    wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    st.pyplot(plt, use_container_width=True)

# # Community engagement
st.header("Community engagement")
# ## Dataframe: Latest issues open by the community
# ## Dataframe: issues sorted by number of comments
st.subheader("Top engaging issues 💬")
engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
st.dataframe(
    engagement_df,
    hide_index=True,
    # use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width="large"),
        "Reactions": st.column_config.NumberColumn("Reactions", format="%d 👍", width="small"),
        "Comments": st.column_config.NumberColumn("Comments", format="%d 💬", width="small"),
        "URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
    }
)

# ## Cloud of words: Comments??
# ## Dataframe: Contributor leaderboard.

# # Issue dependencies
# st.header("Issue dependencies")
# ## Map: dependencies between issues. Network of issue mentions.x

# status.update(label="Checking for updated data...", state="running")
# updated_data = fetch_data()
# if df.equals(updated_data):
#     status.update(label="Data is up to date!", state="complete")
# else:
#     save_data(updated_data)
#     status.update(label="Refresh for updated data!", state="complete")