Spaces:

nataliaElv
/

github_issues_dashboard

Sleeping

nataliaElv

Links first

842bf87 10 months ago

7.86 kB

	import streamlit as st
	import pandas as pd
	from github import Github
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import re
	import datetime

	g = Github(st.secrets["ACCESS_TOKEN"])
	repos = st.secrets["REPO_NAME"].split(",")
	repos = [g.get_repo(repo) for repo in repos]

	@st.cache_data
	def fetch_data():

	issues_data = []

	for repo in repos:
	issues = repo.get_issues(state="all")

	for issue in issues:
	issues_data.append(
	{
	'Issue': f"{issue.number} - {issue.title}",
	'State': issue.state,
	'Created at': issue.created_at,
	'Closed at': issue.closed_at,
	'Last update': issue.updated_at,
	'Labels': [label.name for label in issue.labels],
	'Reactions': issue.reactions['total_count'],
	'Comments': issue.comments,
	'URL': issue.html_url,
	'Repository': repo.name,
	}
	)
	return pd.DataFrame(issues_data)

	# def save_data(df):
	# df.to_json("issues.json", orient="records", indent=4, index=False)

	# @st.cache_data
	# def load_data():
	# try:
	# df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
	# except:
	# df = fetch_data()
	# save_data(df)
	# return df


	st.title(f"GitHub Issues Dashboard")
	with st.status(label="Loading data...", state="running") as status:
	df = fetch_data()
	status.update(label="Data loaded!", state="complete")
	today = datetime.date.today()

	# Section 1: Issue activity metrics
	st.header("Issue activity metrics")

	col1, col2, col3 = st.columns(3)

	state_counts = df['State'].value_counts()
	open_issues = df.loc[df['State'] == 'open']
	closed_issues = df.loc[df['State'] == 'closed']
	closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']

	with col1:
	st.metric(label="Open issues", value=state_counts['open'])

	with col2:
	st.metric(label="Closed issues", value=state_counts['closed'])

	with col3:
	average_time_to_close = closed_issues['Time to Close'].mean().days
	st.metric(label="Avg. days to close", value=average_time_to_close)


	# TODO Plot: number of open vs closed issues by date


	# st.subheader("Latest bugs 🐞")
	# bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
	# bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
	# st.dataframe(
	# bug_issues.sort_values(by="Created at", ascending=False),
	# hide_index=True,
	# column_config={
	# "Issue": st.column_config.TextColumn("Issue", width=400),
	# "Labels": st.column_config.TextColumn("Labels"),
	# "Created at": st.column_config.DatetimeColumn("Created at"),
	# "URL": st.column_config.LinkColumn("🔗", display_text="🔗")
	# }
	# )

	st.subheader("Latest updates 📝")
	col1, col2 = st.columns(2)
	with col1:
	last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY")
	last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time())
	with col2:
	updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)]
	st.metric("Results:", updated_issues.shape[0])

	st.dataframe(
	updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False),
	hide_index=True,
	# use_container_width=True,
	column_config={
	"Issue": st.column_config.TextColumn("Issue", width="large"),
	"Labels": st.column_config.ListColumn("Labels", width="large"),
	"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
	"URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
	}
	)

	st.subheader("Stale issues? 🕸️")
	col1, col2 = st.columns(2)
	with col1:
	not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY")
	not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time())
	with col2:
	stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)]
	st.metric("Results:", stale_issues.shape[0])
	st.dataframe(
	stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True),
	hide_index=True,
	# use_container_width=True,
	column_config={
	"Issue": st.column_config.TextColumn("Issue", width="large"),
	"Labels": st.column_config.ListColumn("Labels", width="large"),
	"Last update": st.column_config.DatetimeColumn("Last update", width="medium"),
	"URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
	}
	)

	# Section 2: Issue classification
	st.header("Issue classification")
	col1, col2 = st.columns(2)

	## Dataframe: Number of open issues by label.
	with col1:
	st.subheader("Top ten labels 🔖")
	label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index()

	def generate_labels_link(labels,repos):
	links = []
	for label,repo in zip(labels,repos):
	label = label.replace(" ", "+")
	links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22")
	return links

	label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository'])

	st.dataframe(
	label_counts[["Link","Labels","Repository", "count",]].head(10),
	hide_index=True,
	column_config={
	"Labels": st.column_config.TextColumn("Labels"),
	"count": st.column_config.NumberColumn("Count"),
	"Link": st.column_config.LinkColumn("🔗", display_text="🔗")
	}
	)

	## Cloud of words: Issue titles
	with col2:
	st.subheader("Cloud of words ☁️")
	titles = " ".join(open_issues["Issue"])
	titles = re.sub(r'\[.*?\]', '', titles)
	wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles)
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis("off")
	st.pyplot(plt, use_container_width=True)

	# # Community engagement
	st.header("Community engagement")
	# ## Dataframe: Latest issues open by the community
	# ## Dataframe: issues sorted by number of comments
	st.subheader("Top engaging issues 💬")
	engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
	st.dataframe(
	engagement_df,
	hide_index=True,
	# use_container_width=True,
	column_config={
	"Issue": st.column_config.TextColumn("Issue", width="large"),
	"Reactions": st.column_config.NumberColumn("Reactions", format="%d 👍", width="small"),
	"Comments": st.column_config.NumberColumn("Comments", format="%d 💬", width="small"),
	"URL": st.column_config.LinkColumn("🔗", display_text="🔗", width="small")
	}
	)

	# ## Cloud of words: Comments??
	# ## Dataframe: Contributor leaderboard.

	# # Issue dependencies
	# st.header("Issue dependencies")
	# ## Map: dependencies between issues. Network of issue mentions.x

	# status.update(label="Checking for updated data...", state="running")
	# updated_data = fetch_data()
	# if df.equals(updated_data):
	# status.update(label="Data is up to date!", state="complete")
	# else:
	# save_data(updated_data)
	# status.update(label="Refresh for updated data!", state="complete")