import json import requests import os import base64 import loguru from dotenv import load_dotenv # 加载环境变量 load_dotenv() # 从环境变量中获取TOKEN TOKEN = os.getenv('TOKEN') # 定义获取组织仓库的函数 def get_repos(org_name, token, export_dir): headers = { 'Authorization': f'token {token}', } url = f'https://api.github.com/orgs/{org_name}/repos' response = requests.get(url, headers=headers, params={'per_page': 200, 'page': 0}) if response.status_code == 200: repos = response.json() loguru.logger.info(f'Fetched {len(repos)} repositories for {org_name}.') # 使用 export_dir 确定保存仓库名的文件路径 repositories_path = os.path.join(export_dir, 'repositories.txt') with open(repositories_path, 'w', encoding='utf-8') as file: for repo in repos: file.write(repo['name'] + '\n') return repos else: loguru.logger.error(f"Error fetching repositories: {response.status_code}") loguru.logger.error(response.text) return [] # 定义拉取仓库README文件的函数 def fetch_repo_readme(org_name, repo_name, token, export_dir): headers = { 'Authorization': f'token {token}', } url = f'https://api.github.com/repos/{org_name}/{repo_name}/readme' response = requests.get(url, headers=headers) if response.status_code == 200: readme_content = response.json()['content'] # 解码base64内容 readme_content = base64.b64decode(readme_content).decode('utf-8') # 使用 export_dir 确定保存 README 的文件路径 repo_dir = os.path.join(export_dir, repo_name) if not os.path.exists(repo_dir): os.makedirs(repo_dir) readme_path = os.path.join(repo_dir, 'README.md') with open(readme_path, 'w', encoding='utf-8') as file: file.write(readme_content) else: loguru.logger.error(f"Error fetching README for {repo_name}: {response.status_code}") loguru.logger.error(response.text) # 主函数 if __name__ == '__main__': # 配置组织名称 org_name = 'datawhalechina' # 配置 export_dir export_dir = "database/readme_db" # 请替换为实际的目录路径 # 获取仓库列表 repos = get_repos(org_name, TOKEN, export_dir) # 打印仓库名称 if repos: for repo in repos: repo_name = repo['name'] # 拉取每个仓库的README fetch_repo_readme(org_name, repo_name, TOKEN, export_dir) # 清理临时文件夹 # if os.path.exists('temp'): # shutil.rmtree('temp')