Spaces:
Running
Running
import json | |
import requests | |
import os | |
import base64 | |
import loguru | |
from dotenv import load_dotenv | |
# 加载环境变量 | |
load_dotenv() | |
# 从环境变量中获取TOKEN | |
TOKEN = os.getenv('TOKEN') | |
# 定义获取组织仓库的函数 | |
def get_repos(org_name, token, export_dir): | |
headers = { | |
'Authorization': f'token {token}', | |
} | |
url = f'https://api.github.com/orgs/{org_name}/repos' | |
response = requests.get(url, headers=headers, params={'per_page': 200, 'page': 0}) | |
if response.status_code == 200: | |
repos = response.json() | |
loguru.logger.info(f'Fetched {len(repos)} repositories for {org_name}.') | |
# 使用 export_dir 确定保存仓库名的文件路径 | |
repositories_path = os.path.join(export_dir, 'repositories.txt') | |
with open(repositories_path, 'w', encoding='utf-8') as file: | |
for repo in repos: | |
file.write(repo['name'] + '\n') | |
return repos | |
else: | |
loguru.logger.error(f"Error fetching repositories: {response.status_code}") | |
loguru.logger.error(response.text) | |
return [] | |
# 定义拉取仓库README文件的函数 | |
def fetch_repo_readme(org_name, repo_name, token, export_dir): | |
headers = { | |
'Authorization': f'token {token}', | |
} | |
url = f'https://api.github.com/repos/{org_name}/{repo_name}/readme' | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
readme_content = response.json()['content'] | |
# 解码base64内容 | |
readme_content = base64.b64decode(readme_content).decode('utf-8') | |
# 使用 export_dir 确定保存 README 的文件路径 | |
repo_dir = os.path.join(export_dir, repo_name) | |
if not os.path.exists(repo_dir): | |
os.makedirs(repo_dir) | |
readme_path = os.path.join(repo_dir, 'README.md') | |
with open(readme_path, 'w', encoding='utf-8') as file: | |
file.write(readme_content) | |
else: | |
loguru.logger.error(f"Error fetching README for {repo_name}: {response.status_code}") | |
loguru.logger.error(response.text) | |
# 主函数 | |
if __name__ == '__main__': | |
# 配置组织名称 | |
org_name = 'datawhalechina' | |
# 配置 export_dir | |
export_dir = "database/readme_db" # 请替换为实际的目录路径 | |
# 获取仓库列表 | |
repos = get_repos(org_name, TOKEN, export_dir) | |
# 打印仓库名称 | |
if repos: | |
for repo in repos: | |
repo_name = repo['name'] | |
# 拉取每个仓库的README | |
fetch_repo_readme(org_name, repo_name, TOKEN, export_dir) | |
# 清理临时文件夹 | |
# if os.path.exists('temp'): | |
# shutil.rmtree('temp') |