Spaces:
Sleeping
Sleeping
import os | |
import json | |
import pytz | |
import logging | |
import asyncio | |
from datetime import datetime | |
from pathlib import Path | |
import huggingface_hub | |
from huggingface_hub.errors import RepositoryNotFoundError, RevisionNotFoundError | |
from dotenv import load_dotenv | |
from git import Repo | |
from datetime import datetime | |
from tqdm.auto import tqdm | |
from tqdm.contrib.logging import logging_redirect_tqdm | |
from app.config.hf_config import HF_TOKEN, QUEUE_REPO, API, EVAL_REQUESTS_PATH | |
from app.utils.model_validation import ModelValidator | |
huggingface_hub.logging.set_verbosity_error() | |
huggingface_hub.utils.disable_progress_bars() | |
logging.basicConfig( | |
level=logging.ERROR, | |
format='%(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
load_dotenv() | |
validator = ModelValidator() | |
def get_changed_files(repo_path, start_date, end_date): | |
repo = Repo(repo_path) | |
start = datetime.strptime(start_date, '%Y-%m-%d') | |
end = datetime.strptime(end_date, '%Y-%m-%d') | |
changed_files = set() | |
pbar = tqdm(repo.iter_commits(), desc=f"Reading commits from {end_date} to {start_date}") | |
for commit in pbar: | |
commit_date = datetime.fromtimestamp(commit.committed_date) | |
pbar.set_postfix_str(f"Commit date: {commit_date}") | |
if start <= commit_date <= end: | |
changed_files.update(item.a_path for item in commit.diff(commit.parents[0])) | |
if commit_date < start: | |
break | |
return changed_files | |
def read_json(repo_path, file): | |
with open(f"{repo_path}/{file}") as file: | |
return json.load(file) | |
def write_json(repo_path, file, content): | |
with open(f"{repo_path}/{file}", "w") as file: | |
json.dump(content, file, indent=2) | |
def main(): | |
requests_path = "/Users/lozowski/Developer/requests" | |
start_date = "2024-12-09" | |
end_date = "2025-01-07" | |
changed_files = get_changed_files(requests_path, start_date, end_date) | |
for file in tqdm(changed_files): | |
try: | |
request_data = read_json(requests_path, file) | |
except FileNotFoundError as e: | |
tqdm.write(f"File {file} not found") | |
continue | |
try: | |
model_info = API.model_info( | |
repo_id=request_data["model"], | |
revision=request_data["revision"], | |
token=HF_TOKEN | |
) | |
except (RepositoryNotFoundError, RevisionNotFoundError) as e: | |
tqdm.write(f"Model info for {request_data["model"]} not found") | |
continue | |
with logging_redirect_tqdm(): | |
new_model_size, error = asyncio.run(validator.get_model_size( | |
model_info=model_info, | |
precision=request_data["precision"], | |
base_model=request_data["base_model"], | |
revision=request_data["revision"] | |
)) | |
if error: | |
tqdm.write(f"Error getting model size info for {request_data["model"]}, {error}") | |
continue | |
old_model_size = request_data["params"] | |
if old_model_size != new_model_size: | |
if new_model_size > 100: | |
tqdm.write(f"Model: {request_data["model"]}, size is more 100B: {new_model_size}") | |
tqdm.write(f"Model: {request_data["model"]}, old size: {request_data["params"]} new size: {new_model_size}") | |
tqdm.write(f"Updating request file {file}") | |
request_data["params"] = new_model_size | |
write_json(requests_path, file, content=request_data) | |
if __name__ == "__main__": | |
main() | |