Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from github import Github | |
| from wordcloud import WordCloud | |
| import matplotlib.pyplot as plt | |
| import re | |
| import datetime | |
| g = Github(st.secrets["ACCESS_TOKEN"]) | |
| repos = st.secrets["REPO_NAME"].split(",") | |
| repos = [g.get_repo(repo) for repo in repos] | |
| def fetch_data(): | |
| issues_data = [] | |
| for repo in repos: | |
| issues = repo.get_issues(state="all") | |
| for issue in issues: | |
| issues_data.append( | |
| { | |
| 'Issue': f"{issue.number} - {issue.title}", | |
| 'State': issue.state, | |
| 'Created at': issue.created_at, | |
| 'Closed at': issue.closed_at, | |
| 'Last update': issue.updated_at, | |
| 'Labels': [label.name for label in issue.labels], | |
| 'Reactions': issue.reactions['total_count'], | |
| 'Comments': issue.comments, | |
| 'URL': issue.html_url, | |
| 'Repository': repo.name, | |
| } | |
| ) | |
| return pd.DataFrame(issues_data) | |
| # def save_data(df): | |
| # df.to_json("issues.json", orient="records", indent=4, index=False) | |
| # @st.cache_data | |
| # def load_data(): | |
| # try: | |
| # df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms") | |
| # except: | |
| # df = fetch_data() | |
| # save_data(df) | |
| # return df | |
| st.title(f"GitHub Issues Dashboard") | |
| with st.status(label="Loading data...", state="running") as status: | |
| df = fetch_data() | |
| status.update(label="Data loaded!", state="complete") | |
| today = datetime.date.today() | |
| # Section 1: Issue activity metrics | |
| st.header("Issue activity metrics") | |
| col1, col2, col3 = st.columns(3) | |
| state_counts = df['State'].value_counts() | |
| open_issues = df.loc[df['State'] == 'open'] | |
| closed_issues = df.loc[df['State'] == 'closed'] | |
| closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at'] | |
| with col1: | |
| st.metric(label="Open issues", value=state_counts['open']) | |
| with col2: | |
| st.metric(label="Closed issues", value=state_counts['closed']) | |
| with col3: | |
| average_time_to_close = closed_issues['Time to Close'].mean().days | |
| st.metric(label="Avg. days to close", value=average_time_to_close) | |
| # TODO Plot: number of open vs closed issues by date | |
| # st.subheader("Latest bugs π") | |
| # bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)] | |
| # bug_issues = bug_issues[["Issue","Labels","Created at","URL"]] | |
| # st.dataframe( | |
| # bug_issues.sort_values(by="Created at", ascending=False), | |
| # hide_index=True, | |
| # column_config={ | |
| # "Issue": st.column_config.TextColumn("Issue", width=400), | |
| # "Labels": st.column_config.TextColumn("Labels"), | |
| # "Created at": st.column_config.DatetimeColumn("Created at"), | |
| # "URL": st.column_config.LinkColumn("π", display_text="π") | |
| # } | |
| # ) | |
| st.subheader("Latest updates π") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| last_update_date = st.date_input("Last updated after:", value=today - datetime.timedelta(days=7), format="DD-MM-YYYY") | |
| last_update_date = datetime.datetime.combine(last_update_date, datetime.datetime.min.time()) | |
| with col2: | |
| updated_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) > pd.to_datetime(last_update_date)] | |
| st.metric("Results:", updated_issues.shape[0]) | |
| st.dataframe( | |
| updated_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=False), | |
| hide_index=True, | |
| # use_container_width=True, | |
| column_config={ | |
| "Issue": st.column_config.TextColumn("Issue", width="large"), | |
| "Labels": st.column_config.ListColumn("Labels", width="large"), | |
| "Last update": st.column_config.DatetimeColumn("Last update", width="medium"), | |
| "URL": st.column_config.LinkColumn("π", display_text="π", width="small") | |
| } | |
| ) | |
| st.subheader("Stale issues? πΈοΈ") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| not_updated_since = st.date_input("Not updated since:", value=today - datetime.timedelta(days=90), format="DD-MM-YYYY") | |
| not_updated_since = datetime.datetime.combine(not_updated_since, datetime.datetime.min.time()) | |
| with col2: | |
| stale_issues = open_issues[pd.to_datetime(open_issues["Last update"]).dt.tz_localize(None) < pd.to_datetime(not_updated_since)] | |
| st.metric("Results:", stale_issues.shape[0]) | |
| st.dataframe( | |
| stale_issues[["URL","Issue","Labels", "Repository", "Last update"]].sort_values(by="Last update", ascending=True), | |
| hide_index=True, | |
| # use_container_width=True, | |
| column_config={ | |
| "Issue": st.column_config.TextColumn("Issue", width="large"), | |
| "Labels": st.column_config.ListColumn("Labels", width="large"), | |
| "Last update": st.column_config.DatetimeColumn("Last update", width="medium"), | |
| "URL": st.column_config.LinkColumn("π", display_text="π", width="small") | |
| } | |
| ) | |
| # Section 2: Issue classification | |
| st.header("Issue classification") | |
| col1, col2 = st.columns(2) | |
| ## Dataframe: Number of open issues by label. | |
| with col1: | |
| st.subheader("Top ten labels π") | |
| label_counts = open_issues.groupby("Repository").apply(lambda x: x.explode("Labels").value_counts("Labels").to_frame().reset_index()).reset_index() | |
| def generate_labels_link(labels,repos): | |
| links = [] | |
| for label,repo in zip(labels,repos): | |
| label = label.replace(" ", "+") | |
| links.append(f"https://github.com/argilla-io/{repo}/issues?q=is:open+is:issue+label:%22{label}%22") | |
| return links | |
| label_counts['Link'] = generate_labels_link(label_counts['Labels'],label_counts['Repository']) | |
| st.dataframe( | |
| label_counts[["Link","Labels","Repository", "count",]].head(10), | |
| hide_index=True, | |
| column_config={ | |
| "Labels": st.column_config.TextColumn("Labels"), | |
| "count": st.column_config.NumberColumn("Count"), | |
| "Link": st.column_config.LinkColumn("π", display_text="π") | |
| } | |
| ) | |
| ## Cloud of words: Issue titles | |
| with col2: | |
| st.subheader("Cloud of words βοΈ") | |
| titles = " ".join(open_issues["Issue"]) | |
| titles = re.sub(r'\[.*?\]', '', titles) | |
| wordcloud = WordCloud(width=800, height=400, background_color="black").generate(titles) | |
| plt.figure(figsize=(10, 5)) | |
| plt.imshow(wordcloud, interpolation="bilinear") | |
| plt.axis("off") | |
| st.pyplot(plt, use_container_width=True) | |
| # # Community engagement | |
| st.header("Community engagement") | |
| # ## Dataframe: Latest issues open by the community | |
| # ## Dataframe: issues sorted by number of comments | |
| st.subheader("Top engaging issues π¬") | |
| engagement_df = open_issues[["URL","Issue","Repository","Created at", "Reactions","Comments"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10) | |
| st.dataframe( | |
| engagement_df, | |
| hide_index=True, | |
| # use_container_width=True, | |
| column_config={ | |
| "Issue": st.column_config.TextColumn("Issue", width="large"), | |
| "Reactions": st.column_config.NumberColumn("Reactions", format="%d π", width="small"), | |
| "Comments": st.column_config.NumberColumn("Comments", format="%d π¬", width="small"), | |
| "URL": st.column_config.LinkColumn("π", display_text="π", width="small") | |
| } | |
| ) | |
| # ## Cloud of words: Comments?? | |
| # ## Dataframe: Contributor leaderboard. | |
| # # Issue dependencies | |
| # st.header("Issue dependencies") | |
| # ## Map: dependencies between issues. Network of issue mentions.x | |
| # status.update(label="Checking for updated data...", state="running") | |
| # updated_data = fetch_data() | |
| # if df.equals(updated_data): | |
| # status.update(label="Data is up to date!", state="complete") | |
| # else: | |
| # save_data(updated_data) | |
| # status.update(label="Refresh for updated data!", state="complete") | |