|
import json |
|
import requests |
|
import csv |
|
|
|
offset = 0 |
|
limit = 1000 |
|
max_count = 4944 |
|
|
|
base_url = 'https://api.openreview.net' |
|
|
|
all_papers = [] |
|
while offset < max_count: |
|
limit = min(limit, max_count - offset) |
|
|
|
print(offset, limit) |
|
url = base_url + f"/notes?details=invitation%2Coriginal&offset={offset}&limit={limit}&invitation=ICLR.cc%2F2023%2FConference%2F-%2FBlind_Submission" |
|
|
|
response = requests.get(url) |
|
papers = json.loads(response.text)['notes'] |
|
all_papers += papers |
|
|
|
offset += limit |
|
|
|
|
|
|
|
with open('iclr_submissions.csv', 'w', encoding='UTF8', newline='') as f: |
|
header = ['title', 'url', 'pdf', 'tldr', 'abstract', 'keywords'] |
|
writer = csv.writer(f) |
|
writer.writerow(header) |
|
|
|
for paper in all_papers: |
|
content = paper['content'] |
|
|
|
title = content['title'] |
|
url = f'https://openreview.net/forum?id={paper["forum"]}' |
|
pdf = f'https://openreview.net/pdf?id={paper["forum"]}' |
|
tldr = content.get('TL;DR', '') |
|
abstract = content['abstract'] |
|
keywords = ', '.join(content['keywords']) |
|
|
|
writer.writerow([title, url, pdf, tldr, abstract, keywords]) |