import requests from bs4 import BeautifulSoup import streamlit as st def get_github_data(url: str): """Fetches user profile and all public repo data from a GitHub URL.""" if not url or "github.com" not in url: return None try: # Get profile info from main page response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") profile_name_el = soup.find("span", class_="p-name") profile_name = profile_name_el.get_text(strip=True) if profile_name_el else "" profile_bio_el = soup.find("div", class_="p-note") profile_bio = profile_bio_el.get_text(strip=True) if profile_bio_el else "" # Get all repositories from the repositories tab repos_url = f"{url}?tab=repositories" all_repos = [] while repos_url: print(f"Scraping: {repos_url}") repos_response = requests.get(repos_url) repos_response.raise_for_status() repos_soup = BeautifulSoup(repos_response.content, "html.parser") repo_list = repos_soup.find("ul", {"id": "user-repositories-list"}) if repo_list: for repo_item in repo_list.find_all("li"): repo_name_el = repo_item.find("a", itemprop="name codeRepository") repo_name = repo_name_el.get_text(strip=True) if repo_name_el else "Unnamed Repo" repo_desc_el = repo_item.find("p", itemprop="description") repo_desc = repo_desc_el.get_text(strip=True) if repo_desc_el else "No description." all_repos.append(f"Repo: {repo_name}\nDescription: {repo_desc}") # Check for next page next_button = repos_soup.find("a", class_="next_page") if next_button and next_button.get('href'): repos_url = f"https://github.com{next_button.get('href')}" else: repos_url = None return { "name": profile_name, "bio": profile_bio, "repositories": "\n\n".join(all_repos) } except Exception as e: st.error(f"Failed to fetch GitHub data: {e}") return None