transformers-github-bot / update_stored_issues.py
Amy Roberts
Draft
9b744c5
raw
history blame
4.1 kB
"""
Module which updates any of the issues to reflect changes in the issue state
"""
import json
import datetime
from defaults import TOKEN, OWNER, REPO
GITHUB_API_VERSION = "2022-11-28"
# Get the issues that have been updated since the last update
import json
import argparse
import requests
import os
import numpy as np
import json
import datetime
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
today = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
OWNER = "huggingface"
REPO = "transformers"
GITHUB_API_VERSION = "2022-11-28"
TOKEN = os.environ.get("GITHUB_TOKEN")
JSON_FILE = f"issues.json"
def get_issues(
input_filename=JSON_FILE,
output_filename=JSON_FILE,
github_api_version=GITHUB_API_VERSION,
owner=OWNER,
repo=REPO,
token=TOKEN,
n_pages=-1,
):
"""
Function to get the issues from the transformers repo and save them to a json file
"""
with open("issues_dict.json", "r") as f:
issues = json.load(f)
# Get most recent updated at information
updated_at = [issue["updated_at"] for issue in issues.values()]
most_recent = max(updated_at)
# If file exists and we want to overwrite it, delete it
if not os.path.exists(output_filename):
raise ValueError(f"File {output_filename} does not exist")
# Define the URL and headers
url = f"https://api.github.com/repos/{owner}/{repo}/issues"
headers = {
"Accept": "application/vnd.github+json",
f"Authorization": f"{token}",
"X-GitHub-Api-Version": f"{github_api_version}",
"User-Agent": "amyeroberts",
}
per_page = 100
page = 1
query_params = {
"state": "all",
"since": "2024-02-01T11:33:35Z",
# "since": most_recent,
"sort": "created",
"direction": "asc",
"page": page,
}
new_lines = []
page_limit = (n_pages + page) if n_pages > 0 else np.inf
while True:
if page >= page_limit:
break
# Send the GET request
response = requests.get(url, headers=headers, params=query_params)
if not response.status_code == 200:
raise ValueError(
f"Request failed with status code {response.status_code} and message {response.text}"
)
json_response = response.json()
logger.info(f"Page: {page}, number of issues: {len(json_response)}")
# If we get an empty response, we've reached the end of the issues
if len(json_response) == 0:
break
new_lines.extend(json_response)
# If we get less than the number of issues per page, we've reached the end of the issues
if len(json_response) < per_page:
break
page += 1
query_params["page"] = page
issue_lines_map = {issue["number"]: issue for issue in new_lines}
with open(input_filename, "r") as f:
with open("tmp_" + output_filename, "a") as g:
for line in f:
issue = json.loads(line)
number = issue["number"]
if number in issue_lines_map:
g.write(json.dumps(issue_lines_map[number]))
g.write("\n")
else:
g.write(line)
os.rename("tmp_" + output_filename, output_filename)
with open("updated_issues.json", "w") as f:
json.dump(issue_lines_map, f, indent=4, sort_keys=True)
return output_filename
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--input_filename", type=str, default=JSON_FILE)
parser.add_argument("--output_filename", type=str, default=JSON_FILE)
parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION)
parser.add_argument("--owner", type=str, default=OWNER)
parser.add_argument("--repo", type=str, default=REPO)
parser.add_argument("--token", type=str, default=TOKEN)
parser.add_argument("--n_pages", type=int, default=-1)
args = parser.parse_args()
get_issues(**vars(args))