File size: 7,863 Bytes
1e08812
0dca33a
 
fe06679
 
99e4eea
0b2c5d2
1e08812
b1dec7c
9b1266b
 
0dca33a
58bba8c
b1dec7c
0dca33a
b1dec7c
 
9b1266b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1dec7c
0dca33a
086fa02
 
b1dec7c
086fa02
 
 
 
 
 
 
 
58bba8c
b1dec7c
9b1266b
086fa02
 
 
0b2c5d2
 
b1dec7c
 
 
ef22b70
b1dec7c
 
 
ef22b70
e8df381
b1dec7c
 
99e4eea
b1dec7c
 
99e4eea
b1dec7c
e8df381
 
99e4eea
ef22b70
 
 
b1dec7c
0b2c5d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58bba8c
086fa02
0b2c5d2
086fa02
0b2c5d2
 
cc871a7
842bf87
b410ebc
58bba8c
cc871a7
58bba8c
 
 
 
cc871a7
 
b1dec7c
0b2c5d2
 
 
58bba8c
086fa02
0b2c5d2
086fa02
0b2c5d2
ef22b70
842bf87
ef22b70
58bba8c
ef22b70
58bba8c
 
 
 
ef22b70
 
 
b410ebc
 
 
cc871a7
b410ebc
 
 
9b1266b
b410ebc
9b1266b
b410ebc
9b1266b
b410ebc
9b1266b
b410ebc
 
9b1266b
b410ebc
 
842bf87
9b1266b
b410ebc
 
 
842bf87
b410ebc
 
 
fe06679
 
99e4eea
 
 
58bba8c
fe06679
 
 
58bba8c
b1dec7c
 
 
 
 
b410ebc
842bf87
b1dec7c
 
 
58bba8c
b1dec7c
58bba8c
 
 
 
b1dec7c
 
 
 
 
 
 
 
 
 
086fa02
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import streamlit as st
import pandas as pd
from github import Github
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
import datetime

g = Github(st.secrets["ACCESS_TOKEN"])
repos = st.secrets["REPO_NAME"].split(",")
repos = [g.get_repo(repo) for repo in repos]

@st.cache_data
def fetch_data():

    issues_data = []
    
    for repo in repos:
        issues = repo.get_issues(state="all")
        
        for issue in issues:
            issues_data.append(
                {
                    'Issue': f"{issue.number} - {issue.title}", 
                    'State': issue.state, 
                    'Created at': issue.created_at, 
                    'Closed at': issue.closed_at,
                    'Last update': issue.updated_at,
                    'Labels': [label.name for label in issue.labels],
                    'Reactions': issue.reactions['total_count'],
                    'Comments': issue.comments,
                    'URL': issue.html_url,
                    'Repository': repo.name,
                }
            )
    return pd.DataFrame(issues_data)

# def save_data(df):
#     df.to_json("issues.json", orient="records", indent=4, index=False)

# @st.cache_data
# def load_data():
#     try:
#         df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
#     except:
#         df = fetch_data()
#         save_data(df)
#     return df


st.title(f"GitHub Issues Dashboard")
with st.status(label="Loading data...", state="running") as status:
    df = fetch_data()
    status.update(label="Data loaded!", state="complete")
today = datetime.date.today()

# Section 1: Issue activity metrics
st.header("Issue activity metrics")

col1, col2, col3 = st.columns(3)

state_counts = df['State'].value_counts()
open_issues = df.loc[df['State'] == 'open']
closed_issues = df.loc[df['State'] == 'closed']
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']

with col1:
    st.metric(label="Open issues", value=state_counts['open'])

with col2:
    st.metric(label="Closed issues", value=state_counts['closed'])

with col3:
    average_time_to_close = closed_issues['Time to Close'].mean().days
    st.metric(label="Avg. days to close", value=average_time_to_close)


# TODO Plot: number of open vs closed issues by date


# st.subheader("Latest bugs 🐞")
# bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
# bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
# st.dataframe(
#     bug_issues.sort_values(by="Created at", ascending=False),
#     hide_index=True,
#     column_config={
#         "Issue": st.column_config.TextColumn("Issue", width=400),
#         "Labels": st.column_config.TextColumn("Labels"),
#         "Created at": st.column_config.DatetimeColumn("Created at"),
#         "URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
#     }
# )

st.subheader("Latest updates πŸ“")
col1, col2 = st.columns(2)
with col1:
    last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY")
    last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time())
with col2:
    updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)]
    st.metric("Results:", updated_issues.shape[0])

st.dataframe(
    updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False),
    hide_index=True,
    # use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width="large"),
        "Labels": st.column_config.ListColumn("Labels", width="large"),
        "Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
        "URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—", width="small")
    }
)

st.subheader("Stale issues? πŸ•ΈοΈ")
col1, col2 = st.columns(2)
with col1:
    not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY")
    not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time())
with col2:
    stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)]
    st.metric("Results:", stale_issues.shape[0])
st.dataframe(
    stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True),
    hide_index=True,
    # use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width="large"),
        "Labels": st.column_config.ListColumn("Labels", width="large"),
        "Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
        "URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—", width="small")
    }
)

# Section 2: Issue classification
st.header("Issue classification")
col1, col2 = st.columns(2)

## Dataframe: Number of open issues by label.
with col1:
    st.subheader("Top ten labels πŸ”–")
    label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index()

    def generate_labels_link(labels,repos):
        links = []
        for label,repo in zip(labels,repos):
            label = label.replace(" ", "+")
            links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22")
        return links

    label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository'])

    st.dataframe(
        label_counts[["Link","Labels","Repository", "count",]].head(10),
        hide_index=True,
        column_config={
            "Labels": st.column_config.TextColumn("Labels"),
            "count": st.column_config.NumberColumn("Count"),
            "Link": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—")
        }
    )
    
## Cloud of words: Issue titles
with col2:
    st.subheader("Cloud of words ☁️")
    titles = " ".join(open_issues["Issue"])
    titles = re.sub(r'\[.*?\]', '', titles)
    wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    st.pyplot(plt, use_container_width=True)

# # Community engagement
st.header("Community engagement")
# ## Dataframe: Latest issues open by the community
# ## Dataframe: issues sorted by number of comments
st.subheader("Top engaging issues πŸ’¬")
engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
st.dataframe(
    engagement_df,
    hide_index=True,
    # use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width="large"),
        "Reactions": st.column_config.NumberColumn("Reactions", format="%d πŸ‘", width="small"),
        "Comments": st.column_config.NumberColumn("Comments", format="%d πŸ’¬", width="small"),
        "URL": st.column_config.LinkColumn("πŸ”—", display_text="πŸ”—", width="small")
    }
)

# ## Cloud of words: Comments??
# ## Dataframe: Contributor leaderboard.

# # Issue dependencies
# st.header("Issue dependencies")
# ## Map: dependencies between issues. Network of issue mentions.x

# status.update(label="Checking for updated data...", state="running")
# updated_data = fetch_data()
# if df.equals(updated_data):
#     status.update(label="Data is up to date!", state="complete")
# else:
#     save_data(updated_data)
#     status.update(label="Refresh for updated data!", state="complete")