Spaces:
Sleeping
Sleeping
import csv | |
import numpy as np | |
import sys | |
import pandas as pd | |
import re | |
sys.path.append("./") | |
from evaluations.utils import * | |
token = os.getenv("githubToken") | |
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"] | |
for venue in custom_order: | |
df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue) | |
df = df.replace('\t', ' ', regex=True) | |
df.to_csv(f'data/{venue}.csv', sep="\t") | |
# Store all evaluations here | |
paper_dump = pd.DataFrame() | |
# Official color codes for conferences | |
zip_idx = 0 | |
for venue in custom_order: | |
with open(f'data/{venue}.csv') as file: | |
tsv_file = csv.reader(file, delimiter="\t") | |
for row in tsv_file: | |
if (row[0] == ""): | |
continue | |
if (row[1] == ""): | |
continue | |
repo_url = row[4] | |
username, repo_name = decompose_url(repo_url) | |
repo_save_name = f"repo_{zip_idx}.zip" | |
repository_zip_name = f"data/test/{repo_save_name}" | |
log(0, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}") | |
fetch_repo(0, repo_url, repository_zip_name, token) | |
if (os.path.exists(repository_zip_name)): | |
paper_dump = pd.concat([paper_dump, pd.DataFrame({"venue": venue, "title": [row[1]], "year": [row[2]], "pdf": [row[3]], "url": [row[4]], "public": [row[5]], "dependencies": [row[6]], "training": [row[7]], "evaluation": [row[8]], "weights": [row[9]], "readme": [row[10]], "license": [row[11]], "zip_idx": [ repository_zip_name ]})], ignore_index=True) | |
zip_idx += 1 | |
paper_dump.to_csv(f'data/zipfiles.csv', sep="\t") |