Spaces:
Sleeping
Sleeping
openreviewer
commited on
Commit
•
0bf9463
1
Parent(s):
725cdd2
Upload folder using huggingface_hub
Browse files- .gitattributes +16 -35
- .github/workflows/deploy.yml +54 -0
- .gitignore +3 -0
- README.md +3 -9
- app.py +168 -0
- file_utils.py +3 -0
- iclr2024/.DS_Store +0 -0
- iclr2024/question1.txt +1 -0
- iclr2024/question10.txt +1 -0
- iclr2024/question11.txt +7 -0
- iclr2024/question2.txt +1 -0
- iclr2024/question3.txt +1 -0
- iclr2024/question4.txt +1 -0
- iclr2024/question5.txt +1 -0
- iclr2024/question6.txt +1 -0
- iclr2024/question7.txt +1 -0
- iclr2024/question8.txt +1 -0
- iclr2024/question9.txt +1 -0
- iclr2024/systemrole.txt +11 -0
- logging_config.py +9 -0
- models.py +158 -0
- requirements.txt +108 -0
- utils.py +49 -0
.gitattributes
CHANGED
@@ -1,35 +1,16 @@
|
|
1 |
-
|
2 |
-
*.
|
3 |
-
*.
|
4 |
-
*.
|
5 |
-
*.
|
6 |
-
|
7 |
-
*.
|
8 |
-
*.
|
9 |
-
*.
|
10 |
-
*.
|
11 |
-
*.
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
# HIDE ALL OF THE FILES IN THE DIRECTORY
|
2 |
+
*.py
|
3 |
+
*.log
|
4 |
+
*.md
|
5 |
+
*.txt
|
6 |
+
iclr2024/**
|
7 |
+
*.github/**
|
8 |
+
*.gitignore
|
9 |
+
*.gitattributes
|
10 |
+
*.git/**
|
11 |
+
*.__pycache__/**
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/deploy.yml
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Deploy Gradio App
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
deploy:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Checkout code
|
14 |
+
uses: actions/checkout@v3
|
15 |
+
|
16 |
+
- name: Set up Python
|
17 |
+
uses: actions/setup-python@v4
|
18 |
+
with:
|
19 |
+
python-version: '3.12.3' # Specify the Python version you are using
|
20 |
+
|
21 |
+
- name: Install dependencies
|
22 |
+
run: |
|
23 |
+
python -m pip install --upgrade pip
|
24 |
+
pip install -r requirements.txt # Ensure you have a requirements.txt file
|
25 |
+
|
26 |
+
- name: Login to Hugging Face
|
27 |
+
env:
|
28 |
+
HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
|
29 |
+
run: |
|
30 |
+
huggingface-cli login --token $HUGGINGFACE_TOKEN
|
31 |
+
|
32 |
+
- name: Deploy Gradio App
|
33 |
+
env:
|
34 |
+
HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
|
35 |
+
run: gradio deploy
|
36 |
+
# - name: Upload to Hugging Face Spaces
|
37 |
+
# env:
|
38 |
+
# HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
|
39 |
+
# run: |
|
40 |
+
# git lfs install
|
41 |
+
# huggingface-cli lfs-enable-largefiles .
|
42 |
+
# huggingface-cli repo create reviewerarena/reviewer-arena --type=space
|
43 |
+
# huggingface-cli repo upload reviewerarena/reviewer-arena . --all-yes
|
44 |
+
# - name: Login to Hugging Face
|
45 |
+
# env:
|
46 |
+
# HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
|
47 |
+
# run: |
|
48 |
+
# echo "$HUGGINGFACE_TOKEN" | huggingface-cli login --token
|
49 |
+
|
50 |
+
# - name: Deploy Gradio App
|
51 |
+
# env:
|
52 |
+
# HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
|
53 |
+
# run: |
|
54 |
+
# gradio deploy --token $HUGGINGFACE_TOKEN
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
my-venv/
|
2 |
+
old/
|
3 |
+
arena.log
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji: 👁
|
4 |
-
colorFrom: yellow
|
5 |
-
colorTo: purple
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.31.3
|
8 |
app_file: app.py
|
9 |
-
|
|
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: reviewer-arena
|
|
|
|
|
|
|
|
|
|
|
3 |
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 4.31.0
|
6 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from utils import process_paper
|
3 |
+
import os
|
4 |
+
import logging
|
5 |
+
import html
|
6 |
+
from logging_config import setup_logging
|
7 |
+
|
8 |
+
|
9 |
+
setup_logging() # Ensure logging is initialized
|
10 |
+
# Define global variables for directories and API keys
|
11 |
+
paper_dir = 'path_to_temp_storage'
|
12 |
+
prompt_dir = 'iclr2024'
|
13 |
+
api_keys = {
|
14 |
+
'openai_api_key': os.environ.get('openai_api_key'),
|
15 |
+
'claude_api_key': os.environ.get('anthropic_api_key'),
|
16 |
+
'gemini_api_key': os.environ.get('google_api_key'),
|
17 |
+
'commandr_api_key': os.environ.get('cohere_api_key')
|
18 |
+
}
|
19 |
+
|
20 |
+
# Configure whether to use real API or not
|
21 |
+
use_real_api = False # Set this to True to use real APIs, False to use dummy data
|
22 |
+
|
23 |
+
def review_papers(pdf_file):
|
24 |
+
logging.info(f"Received file type: {type(pdf_file)}")
|
25 |
+
if use_real_api:
|
26 |
+
reviews = process_paper(pdf_file, paper_dir, prompt_dir, api_keys)
|
27 |
+
processed_reviews = []
|
28 |
+
for review in reviews:
|
29 |
+
processed_review = {}
|
30 |
+
for section in review:
|
31 |
+
if ':' in section: # Ensure there is a colon to split on
|
32 |
+
key, value = section.split(':', 1) # Split on the first colon only
|
33 |
+
# Replace newline characters with <br> for HTML line breaks
|
34 |
+
processed_value = value.strip().replace('\n', '<br>')
|
35 |
+
processed_review[key.strip()] = html.escape(processed_value) # Ensure HTML escaping
|
36 |
+
processed_reviews.append(processed_review)
|
37 |
+
reviews = processed_reviews
|
38 |
+
else:
|
39 |
+
# Dummy reviews for testing with structured sections
|
40 |
+
reviews = [
|
41 |
+
{
|
42 |
+
"Summary": "This is a placeholder review for Model 1. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
|
43 |
+
"Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
|
44 |
+
"Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
|
45 |
+
"Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
|
46 |
+
"Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
|
47 |
+
"Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
|
48 |
+
"Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
|
49 |
+
"Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
|
50 |
+
"Overall Score": "3/5",
|
51 |
+
"Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
|
52 |
+
"Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"Summary": "This is a placeholder review for Model 2. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
|
56 |
+
"Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
|
57 |
+
"Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
|
58 |
+
"Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
|
59 |
+
"Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
|
60 |
+
"Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
|
61 |
+
"Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
|
62 |
+
"Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
|
63 |
+
"Overall Score": "3/5",
|
64 |
+
"Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
|
65 |
+
"Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
|
66 |
+
}
|
67 |
+
]
|
68 |
+
processed_reviews = []
|
69 |
+
for review in reviews:
|
70 |
+
processed_review = {}
|
71 |
+
for key, value in review.items():
|
72 |
+
# Replace newline characters with <br> for HTML line breaks and escape HTML
|
73 |
+
processed_value = value.strip().replace('\n', '<br>')
|
74 |
+
processed_review[key.strip()] = html.escape(processed_value) # Ensure HTML escaping
|
75 |
+
processed_reviews.append(processed_review)
|
76 |
+
reviews = processed_reviews
|
77 |
+
|
78 |
+
review_texts = []
|
79 |
+
for review in reviews:
|
80 |
+
formatted_review = "<div class='review-container'>"
|
81 |
+
for section, content in review.items():
|
82 |
+
formatted_review += f"<div class='review-section'><strong>{section}:</strong> <span>{html.unescape(content)}</span></div>"
|
83 |
+
formatted_review += "</div>"
|
84 |
+
review_texts.append(formatted_review)
|
85 |
+
logging.debug(f"Final formatted reviews: {review_texts}")
|
86 |
+
return review_texts
|
87 |
+
|
88 |
+
def setup_interface():
|
89 |
+
logging.debug("Setting up Gradio interface.")
|
90 |
+
css = """
|
91 |
+
.review-container {
|
92 |
+
padding: 10px;
|
93 |
+
margin-bottom: 20px;
|
94 |
+
border: 1px solid #ccc;
|
95 |
+
background-color: #f9f9f9;
|
96 |
+
}
|
97 |
+
.review-section {
|
98 |
+
margin-bottom: 12px;
|
99 |
+
padding: 8px;
|
100 |
+
background-color: #ffffff;
|
101 |
+
border-left: 4px solid #007BFF;
|
102 |
+
padding-left: 10px;
|
103 |
+
}
|
104 |
+
.review-section strong {
|
105 |
+
color: #333;
|
106 |
+
font-weight: bold;
|
107 |
+
display: block;
|
108 |
+
margin-bottom: 5px;
|
109 |
+
}
|
110 |
+
.review-section span, .gr-markdown {
|
111 |
+
color: #000;
|
112 |
+
font-size: 14px;
|
113 |
+
line-height: 1.5;
|
114 |
+
display: block;
|
115 |
+
white-space: normal;
|
116 |
+
opacity: 1;
|
117 |
+
}
|
118 |
+
.model-label {
|
119 |
+
font-size: 18px;
|
120 |
+
font-weight: bold;
|
121 |
+
color: #007BFF;
|
122 |
+
margin-bottom: 10px;
|
123 |
+
}
|
124 |
+
.gr-file, .gr-button, .gr-radio {
|
125 |
+
width: 300px;
|
126 |
+
margin: auto;
|
127 |
+
}
|
128 |
+
"""
|
129 |
+
with gr.Blocks(css=css) as demo:
|
130 |
+
gr.Markdown("## Reviewer Arena")
|
131 |
+
gr.Markdown("Upload an academic paper to get reviews from two randomly selected LLMs.")
|
132 |
+
with gr.Row():
|
133 |
+
file_input = gr.File(label="Upload Academic Paper")
|
134 |
+
submit_button = gr.Button("Submit!!")
|
135 |
+
with gr.Row():
|
136 |
+
with gr.Column():
|
137 |
+
gr.HTML("<div class='model-label'>Model A</div>")
|
138 |
+
review1 = gr.Markdown()
|
139 |
+
with gr.Column():
|
140 |
+
gr.HTML("<div class='model-label'>Model B</div>")
|
141 |
+
review2 = gr.Markdown()
|
142 |
+
|
143 |
+
# Voting options
|
144 |
+
vote_options = ["👍 A is better", "👍 B is better", "👔 Tie", "👎 Both are bad"]
|
145 |
+
vote = gr.Radio(label="Vote on the best model", choices=vote_options, value="Tie")
|
146 |
+
vote_button = gr.Button("Submit Vote")
|
147 |
+
|
148 |
+
def handle_vote(vote):
|
149 |
+
print(f"Vote received: {vote}")
|
150 |
+
return f"Vote for '{vote}' received!"
|
151 |
+
|
152 |
+
vote_button.click(fn=handle_vote, inputs=vote, outputs=gr.Textbox(visible=False))
|
153 |
+
|
154 |
+
submit_button.click(
|
155 |
+
fn=review_papers,
|
156 |
+
inputs=[file_input],
|
157 |
+
outputs=[review1, review2]
|
158 |
+
)
|
159 |
+
logging.debug("Gradio interface setup complete.")
|
160 |
+
return demo
|
161 |
+
|
162 |
+
if __name__ == "__main__":
|
163 |
+
logging.basicConfig(level=logging.INFO)
|
164 |
+
demo = setup_interface()
|
165 |
+
# BLOCK PATHS OF ALL THE FILES AND LAUNCH THE APP
|
166 |
+
|
167 |
+
# demo.launch(auth=(os.environ.get('login_username'), os.environ.get('login_password')), share=True)
|
168 |
+
demo.launch()
|
file_utils.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
def read_file(file_path):
|
2 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
3 |
+
return f.read()
|
iclr2024/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
iclr2024/question1.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Briefly summarize the paper and its contributions. This is not the place to critique the paper; the authors should generally agree with a well-written summary.
|
iclr2024/question10.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please provide a 'confidence score' for your assessment of this submission to indicate how confident you are in your evaluation. 5: You are absolutely certain about your assessment. You are very familiar with the related work and checked the math/other details carefully. 4: You are confident in your assessment, but not absolutely certain. It is unlikely, but not impossible, that you did not understand some parts of the submission or that you are unfamiliar with some pieces of related work. 3: You are fairly confident in your assessment. It is possible that you did not understand some parts of the submission or that you are unfamiliar with some pieces of related work. Math/other details were not carefully checked. 2: You are willing to defend your assessment, but it is quite likely that you did not understand the central parts of the submission or that you are unfamiliar with some pieces of related work. Math/other details were not carefully checked. 1: Your assessment is an educated guess. The submission is not in your area or the submission was difficult to understand. Math/other details were not carefully checked.
|
iclr2024/question11.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
If there are no violations of the Code of Conduct with this paper, please respond with NO. Otherwise, if this paper violates the Code of Conduct, please indicate the relevant section(s) from the following options:
|
2 |
+
|
3 |
+
Yes, Harassment, bullying, or discrimination based on personal characteristics
|
4 |
+
Yes, Inappropriate physical contact, sexual harassment, or unwelcome sexual attention
|
5 |
+
Yes, Offensive comments related to gender, race, religion, or other protected characteristics
|
6 |
+
Yes, Disruption of talks or other events, or behavior interfering with participation
|
7 |
+
Yes, Inappropriate use of imagery, language, or personal attacks in virtual interactions
|
iclr2024/question2.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please assign the paper a numerical rating on the following scale to indicate the soundness of the technical claims, experimental and research methodology and on whether the central claims of the paper are adequately supported with evidence: 4 excellent, 3 good, 2 fair, 1 poor.
|
iclr2024/question3.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please assign the paper a numerical rating on the following scale to indicate the quality of the presentation. This should take into account the writing style and clarity, as well as contextualization relative to prior work: 4 excellent, 3 good, 2 fair, 1 poor.
|
iclr2024/question4.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please assign the paper a numerical rating on the following scale to indicate the quality of the overall contribution this paper makes to the research area being studied. Are the questions being asked important? Does the paper bring a significant originality of ideas and/or execution? Are the results valuable to share with the broader NeurIPS community? 4 excellent, 3 good, 2 fair, 1 poor.
|
iclr2024/question5.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please give a substantive assessment of the strengths of the paper, touching on each of the following dimensions: originality, quality, clarity, and significance. We encourage reviewers to be broad in their definitions of originality and significance. For example, originality may arise from a new definition or problem formulation, creative combinations of existing ideas, application to a new domain, or removing limitations from prior results. You can incorporate Markdown and Latex into your review.
|
iclr2024/question6.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please give a substantive assessment of the weaknesses of the paper. Focus on constructive and actionable insights on how the work could improve towards its stated goals. Be specific, avoid generic remarks. For example, if you believe the contribution lacks novelty, provide references and an explanation as evidence; if you believe experiments are insufficient, explain why and exactly what is missing, etc.
|
iclr2024/question7.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please list and carefully describe any questions and suggestions for the authors. Think of the things where a response from the author can change your opinion, clarify a confusion or address a limitation. This is important for a productive rebuttal and discussion phase with the authors.
|
iclr2024/question8.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
If there are ethical issues with this paper, please flag the paper for an ethics review and select area of expertise that would be most useful for the ethics reviewer to have: No ethics review needed, Ethics review needed: Discrimination / Bias / Fairness Concerns, Ethics review needed: Inadequate Data and Algorithm Evaluation, Ethics review needed: Inappropriate Potential Applications & Impact (e.g., human rights concerns), Ethics review needed: Privacy and Security (e.g., consent, surveillance, data storage concern), Ethics review needed: Compliance (e.g., GDPR, copyright, license, terms of use), Ethics review needed: Research Integrity Issues (e.g., plagiarism), Ethics review needed: Responsible Research Practice (e.g., IRB, documentation, research ethics), Ethics review needed: Failure to comply with NeurIPS Code of Ethics (lack of required documentation, safeguards, disclosure, licenses, legal compliance)
|
iclr2024/question9.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Please provide an 'overall score' for this submission: 10: Award quality: Technically flawless paper with groundbreaking impact, with exceptionally strong evaluation, reproducibility, and resources, and no unaddressed ethical considerations. 9: Very Strong Accept: Technically flawless paper with groundbreaking impact on at least one area of AI/ML and excellent impact on multiple areas of AI/ML, with flawless evaluation, resources, and reproducibility, and no unaddressed ethical considerations. 8: Strong Accept: Technically strong paper, with novel ideas, excellent impact on at least one area, or high-to-excellent impact on multiple areas, with excellent evaluation, resources, and reproducibility, and no unaddressed ethical considerations. 7: Accept: Technically solid paper, with high impact on at least one sub-area, or moderate-to-high impact on more than one areas, with good-to-excellent evaluation, resources, reproducibility, and no unaddressed ethical considerations. 6: Weak Accept: Technically solid, moderate-to-high impact paper, with no major concerns with respect to evaluation, resources, reproducibility, ethical considerations. 5: Borderline accept: Technically solid paper where reasons to accept outweigh reasons to reject, e.g., limited evaluation. Please use sparingly. 4: Borderline reject: Technically solid paper where reasons to reject, e.g., limited evaluation, outweigh reasons to accept, e.g., good evaluation. Please use sparingly. 3: Reject: For instance, a paper with technical flaws, weak evaluation, inadequate reproducibility and incompletely addressed ethical considerations. 2: Strong Reject: For instance, a paper with major technical flaws, and/or poor evaluation, limited impact, poor reproducibility and mostly unaddressed ethical considerations. 1: Very Strong Reject: For instance, a paper with trivial results or unaddressed ethical considerations.
|
iclr2024/systemrole.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a very critical but fair peer reviewer. You will be provided with papers submitted to a conference/journal to review. The papers will be delimited with #### characters.
|
2 |
+
|
3 |
+
We are aiming for a 20-25% acceptance rate. Average score thresholds of 5.5-5.7 roughly correspond to acceptance rates of 25%-20%. It is certainly possible to both accept papers below this threshold and reject papers above it. But any such decision should be properly explained.
|
4 |
+
|
5 |
+
The statistics for the previous year was: A total of 3422 submissions were received. The average score of all submissions was 5.47 with standard deviation 1.30, with scores ranging from 1.00 to 9.00. Aim for a similar distribution of scores and use the full range of scores between 1-10.
|
6 |
+
|
7 |
+
Out of all submissions, 32% (1095 submissions) were accepted, with scores ranging from 4.50 to 9.00 and an average score of 6.61 with a standard deviation of 0.75. Only 2.1% (55 submissions) were accepted for oral presentation, with scores ranging from 5.00 to 9.00 and an average score of 7.80 with a standard deviation of 0.63.
|
8 |
+
|
9 |
+
6.64% (174 submissions) were selected for the spotlight, with scores ranging from 5.60 to 8.60 and an average score of 7.33 with a standard deviation of 0.58. 33.04% (866 submissions) were accepted for poster presentation, with scores ranging from 4.50 to 8.00 and an average score of 6.39 with a standard deviation of 0.61.
|
10 |
+
|
11 |
+
60.36% (1582 submissions) were rejected, with scores ranging from 1.00 to 7.50 and an average score of 4.69 with a standard deviation of 0.97. Additionally, 775 submissions were withdrawn and 26 were desk rejected.
|
logging_config.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
def setup_logging():
|
4 |
+
logging.basicConfig(
|
5 |
+
filename="arena.log",
|
6 |
+
level=logging.DEBUG, # Change to DEBUG level
|
7 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
8 |
+
)
|
9 |
+
logging.info("Logging setup complete.")
|
models.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import openai
|
4 |
+
import tiktoken
|
5 |
+
import re
|
6 |
+
import anthropic
|
7 |
+
import cohere
|
8 |
+
import google.generativeai as genai
|
9 |
+
import time
|
10 |
+
from file_utils import read_file
|
11 |
+
from openai import OpenAI
|
12 |
+
|
13 |
+
class Paper:
|
14 |
+
def __init__(self, arxiv_id, tex_file):
|
15 |
+
self.arxiv_id = arxiv_id
|
16 |
+
self.tex_file = tex_file
|
17 |
+
|
18 |
+
class PaperProcessor:
|
19 |
+
MAX_TOKENS = 127192
|
20 |
+
encoding = tiktoken.encoding_for_model("gpt-4-0125-preview")
|
21 |
+
|
22 |
+
def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
|
23 |
+
self.prompt_dir = prompt_dir
|
24 |
+
self.model = model
|
25 |
+
self.openai_api_key = openai_api_key
|
26 |
+
self.claude_api_key = claude_api_key
|
27 |
+
self.gemini_api_key = gemini_api_key
|
28 |
+
self.commandr_api_key = commandr_api_key
|
29 |
+
|
30 |
+
def count_tokens(self, text):
|
31 |
+
return len(self.encoding.encode(text))
|
32 |
+
|
33 |
+
def truncate_content(self, content):
|
34 |
+
token_count = self.count_tokens(content)
|
35 |
+
logging.debug(f"Token count before truncation: {token_count}")
|
36 |
+
if token_count > self.MAX_TOKENS:
|
37 |
+
tokens = self.encoding.encode(content)
|
38 |
+
truncated_tokens = tokens[:self.MAX_TOKENS]
|
39 |
+
truncated_content = self.encoding.decode(truncated_tokens)
|
40 |
+
logging.debug(f"Content truncated. Token count after truncation: {self.count_tokens(truncated_content)}")
|
41 |
+
return truncated_content
|
42 |
+
return content
|
43 |
+
|
44 |
+
def prepare_base_prompt(self, paper):
|
45 |
+
return paper.tex_file
|
46 |
+
|
47 |
+
def call_model(self, prompt, model_type):
|
48 |
+
system_role_file_path = os.path.join(self.prompt_dir, "systemrole.txt")
|
49 |
+
if not os.path.exists(system_role_file_path):
|
50 |
+
logging.error(f"System role file not found: {system_role_file_path}")
|
51 |
+
return None
|
52 |
+
|
53 |
+
system_role = read_file(system_role_file_path)
|
54 |
+
logging.debug(f"Token count of full prompt: {self.count_tokens(prompt)}")
|
55 |
+
logging.info(f"Sending the following prompt to {model_type}: {prompt}")
|
56 |
+
|
57 |
+
try:
|
58 |
+
if model_type == 'gpt':
|
59 |
+
client = OpenAI(api_key=self.openai_api_key)
|
60 |
+
messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
|
61 |
+
completion = client.chat.completions.create(
|
62 |
+
model="gpt-4-turbo-2024-04-09",
|
63 |
+
messages=messages,
|
64 |
+
temperature=1
|
65 |
+
)
|
66 |
+
return completion.choices[0].message.content.strip()
|
67 |
+
|
68 |
+
elif model_type == 'claude':
|
69 |
+
client = anthropic.Anthropic(api_key=self.claude_api_key)
|
70 |
+
response = client.messages.create(
|
71 |
+
model='claude-3-opus-20240229',
|
72 |
+
max_tokens=4096,
|
73 |
+
system=system_role,
|
74 |
+
temperature=0.5,
|
75 |
+
messages=[{"role": "user", "content": prompt}]
|
76 |
+
)
|
77 |
+
return response.content[0].text
|
78 |
+
|
79 |
+
elif model_type == 'commandr':
|
80 |
+
co = cohere.Client(self.commandr_api_key)
|
81 |
+
response = co.chat(
|
82 |
+
model="command-r-plus",
|
83 |
+
message=prompt,
|
84 |
+
preamble=system_role
|
85 |
+
)
|
86 |
+
return response.text
|
87 |
+
|
88 |
+
elif model_type == 'gemini':
|
89 |
+
genai.configure(api_key=self.gemini_api_key)
|
90 |
+
model = genai.GenerativeModel('gemini-pro')
|
91 |
+
response = model.generate_content(prompt)
|
92 |
+
return response.candidates[0].content.parts[0].text
|
93 |
+
|
94 |
+
except Exception as e:
|
95 |
+
logging.error(f"Exception occurred: {e}")
|
96 |
+
return None
|
97 |
+
|
98 |
+
def is_content_appropriate(self, content):
|
99 |
+
try:
|
100 |
+
response = openai.moderations.create(input=content)
|
101 |
+
return not response["results"][0]["flagged"]
|
102 |
+
except Exception as e:
|
103 |
+
logging.error(f"Exception occurred while checking content appropriateness: {e}")
|
104 |
+
return True # In case of an error, default to content being appropriate
|
105 |
+
|
106 |
+
def get_prompt_files(self, prompt_dir):
|
107 |
+
return [f for f in os.listdir(prompt_dir) if f.endswith('.txt') and f.startswith('question')]
|
108 |
+
|
109 |
+
def process_paper(self, paper):
|
110 |
+
openai.api_key = self.openai_api_key
|
111 |
+
start_time = time.time()
|
112 |
+
|
113 |
+
base_prompt = self.prepare_base_prompt(paper)
|
114 |
+
if base_prompt is None:
|
115 |
+
return "Error: Base prompt could not be prepared."
|
116 |
+
|
117 |
+
moderation_response = openai.moderations.create(input=base_prompt)
|
118 |
+
if moderation_response.results[0].flagged:
|
119 |
+
return ["Desk Rejected", "The paper contains inappropriate or harmful content."]
|
120 |
+
|
121 |
+
review_output = []
|
122 |
+
previous_responses = []
|
123 |
+
header = ['Summary:', 'Soundness:', 'Presentation:', 'Contribution:', 'Strengths:', 'Weaknesses:', 'Questions:', 'Flag For Ethics Review:', 'Rating:', 'Confidence:', 'Code Of Conduct:']
|
124 |
+
for i in range(1, 12):
|
125 |
+
question_file = os.path.join(self.prompt_dir, f"question{i}.txt")
|
126 |
+
question_text = read_file(question_file)
|
127 |
+
|
128 |
+
if i == 1:
|
129 |
+
prompt = f"{question_text}\n\n####\n{base_prompt}\n####"
|
130 |
+
else:
|
131 |
+
prompt = f"\nHere is your review so far:\n{' '.join(previous_responses)}\n\nHere are your reviewer instructions. Please answer the following question:\n{question_text}"
|
132 |
+
|
133 |
+
truncated_prompt = self.truncate_content(prompt)
|
134 |
+
logging.info(f"Processing prompt for question {i}")
|
135 |
+
|
136 |
+
response = self.call_model(truncated_prompt, self.model)
|
137 |
+
if response is None:
|
138 |
+
response = "N/A"
|
139 |
+
|
140 |
+
if i in [2, 3, 4, 10]:
|
141 |
+
number_match = re.search(r'\b\d+\b', response)
|
142 |
+
if number_match:
|
143 |
+
number = int(number_match.group(0))
|
144 |
+
response = '5/5' if number > 5 else number_match.group(0) + '/5'
|
145 |
+
elif i == 9:
|
146 |
+
number_match = re.search(r'\b\d+\b', response)
|
147 |
+
if number_match:
|
148 |
+
response = number_match.group(0) + '/10'
|
149 |
+
|
150 |
+
response_with_header = f"{header[i-1]} {response}"
|
151 |
+
review_output.append(response_with_header)
|
152 |
+
previous_responses.append(response)
|
153 |
+
|
154 |
+
end_time = time.time()
|
155 |
+
elapsed_time = end_time - start_time
|
156 |
+
print(f"Time taken to process paper: {elapsed_time:.2f} seconds")
|
157 |
+
return review_output
|
158 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
altair==5.3.0
|
3 |
+
annotated-types==0.6.0
|
4 |
+
anthropic==0.25.8
|
5 |
+
anyio==4.3.0
|
6 |
+
attrs==23.2.0
|
7 |
+
beautifulsoup4==4.12.3
|
8 |
+
boto3==1.34.103
|
9 |
+
botocore==1.34.103
|
10 |
+
cachetools==5.3.3
|
11 |
+
certifi==2024.2.2
|
12 |
+
charset-normalizer==3.3.2
|
13 |
+
click==8.1.7
|
14 |
+
cohere==5.4.0
|
15 |
+
colorama==0.4.6
|
16 |
+
contourpy==1.2.1
|
17 |
+
cycler==0.12.1
|
18 |
+
distro==1.9.0
|
19 |
+
dnspython==2.6.1
|
20 |
+
email_validator==2.1.1
|
21 |
+
fastapi==0.111.0
|
22 |
+
fastapi-cli==0.0.3
|
23 |
+
fastavro==1.9.4
|
24 |
+
ffmpy==0.3.2
|
25 |
+
filelock==3.14.0
|
26 |
+
fonttools==4.51.0
|
27 |
+
fsspec==2024.3.1
|
28 |
+
google==3.0.0
|
29 |
+
google-ai-generativelanguage==0.6.2
|
30 |
+
google-api-core==2.19.0
|
31 |
+
google-api-python-client==2.129.0
|
32 |
+
google-auth==2.29.0
|
33 |
+
google-auth-httplib2==0.2.0
|
34 |
+
google-generativeai==0.5.2
|
35 |
+
googleapis-common-protos==1.63.0
|
36 |
+
gradio==4.31.0
|
37 |
+
gradio_client==0.16.2
|
38 |
+
grpcio==1.63.0
|
39 |
+
grpcio-status==1.62.2
|
40 |
+
h11==0.14.0
|
41 |
+
httpcore==1.0.5
|
42 |
+
httplib2==0.22.0
|
43 |
+
httptools==0.6.1
|
44 |
+
httpx==0.27.0
|
45 |
+
httpx-sse==0.4.0
|
46 |
+
huggingface-hub==0.23.0
|
47 |
+
idna==3.7
|
48 |
+
importlib_resources==6.4.0
|
49 |
+
Jinja2==3.1.4
|
50 |
+
jmespath==1.0.1
|
51 |
+
jsonschema==4.22.0
|
52 |
+
jsonschema-specifications==2023.12.1
|
53 |
+
kiwisolver==1.4.5
|
54 |
+
markdown-it-py==3.0.0
|
55 |
+
MarkupSafe==2.1.5
|
56 |
+
matplotlib==3.8.4
|
57 |
+
mdurl==0.1.2
|
58 |
+
numpy==1.26.4
|
59 |
+
openai==1.28.1
|
60 |
+
orjson==3.10.3
|
61 |
+
packaging==24.0
|
62 |
+
pandas==2.2.2
|
63 |
+
pillow==10.3.0
|
64 |
+
proto-plus==1.23.0
|
65 |
+
protobuf==4.25.3
|
66 |
+
pyasn1==0.6.0
|
67 |
+
pyasn1_modules==0.4.0
|
68 |
+
pydantic==2.7.1
|
69 |
+
pydantic_core==2.18.2
|
70 |
+
pydub==0.25.1
|
71 |
+
Pygments==2.18.0
|
72 |
+
PyMuPDF==1.24.3
|
73 |
+
PyMuPDFb==1.24.3
|
74 |
+
pyparsing==3.1.2
|
75 |
+
python-dateutil==2.9.0.post0
|
76 |
+
python-dotenv==1.0.1
|
77 |
+
python-multipart==0.0.9
|
78 |
+
pytz==2024.1
|
79 |
+
PyYAML==6.0.1
|
80 |
+
referencing==0.35.1
|
81 |
+
regex==2024.5.10
|
82 |
+
requests==2.31.0
|
83 |
+
rich==13.7.1
|
84 |
+
rpds-py==0.18.1
|
85 |
+
rsa==4.9
|
86 |
+
ruff==0.4.4
|
87 |
+
s3transfer==0.10.1
|
88 |
+
semantic-version==2.10.0
|
89 |
+
shellingham==1.5.4
|
90 |
+
six==1.16.0
|
91 |
+
sniffio==1.3.1
|
92 |
+
soupsieve==2.5
|
93 |
+
starlette==0.37.2
|
94 |
+
tiktoken==0.6.0
|
95 |
+
tokenizers==0.19.1
|
96 |
+
tomlkit==0.12.0
|
97 |
+
toolz==0.12.1
|
98 |
+
tqdm==4.66.4
|
99 |
+
typer==0.12.3
|
100 |
+
types-requests==2.31.0.20240406
|
101 |
+
typing_extensions==4.11.0
|
102 |
+
tzdata==2024.1
|
103 |
+
ujson==5.9.0
|
104 |
+
uritemplate==4.1.1
|
105 |
+
urllib3==2.2.1
|
106 |
+
uvicorn==0.29.0
|
107 |
+
watchfiles==0.21.0
|
108 |
+
websockets==11.0.3
|
utils.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fitz
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
import random
|
5 |
+
from models import Paper, PaperProcessor
|
6 |
+
|
7 |
+
def extract_text_from_pdf(filename):
|
8 |
+
with fitz.open(filename) as pdf_document:
|
9 |
+
text = ""
|
10 |
+
for page in pdf_document:
|
11 |
+
text += page.get_text()
|
12 |
+
return text.encode('latin-1', 'replace').decode('latin-1')
|
13 |
+
|
14 |
+
def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
|
15 |
+
logging.info(f"Processing file type in process_paper: {type(pdf_file)}") # Log the type of the file here as well
|
16 |
+
logging.debug(f"Starting to process paper: {pdf_file}")
|
17 |
+
# Ensure the directory exists
|
18 |
+
os.makedirs(paper_dir, exist_ok=True)
|
19 |
+
|
20 |
+
# Handle file based on its type
|
21 |
+
if isinstance(pdf_file, str):
|
22 |
+
# Assume pdf_file is a path to the PDF file
|
23 |
+
pdf_path = pdf_file
|
24 |
+
elif hasattr(pdf_file, 'name') and hasattr(pdf_file, 'read'):
|
25 |
+
# It's a file-like object
|
26 |
+
pdf_path = os.path.join(paper_dir, pdf_file.name)
|
27 |
+
with open(pdf_path, "wb") as f:
|
28 |
+
f.write(pdf_file.read())
|
29 |
+
else:
|
30 |
+
logging.error("Received object is neither a path nor a file-like object.")
|
31 |
+
return []
|
32 |
+
|
33 |
+
# Extract text from the PDF
|
34 |
+
extracted_text = extract_text_from_pdf(pdf_path)
|
35 |
+
paper = Paper(pdf_file.name if hasattr(pdf_file, 'name') else os.path.basename(pdf_path), extracted_text)
|
36 |
+
|
37 |
+
# Randomly select two models
|
38 |
+
models = ['gpt', 'claude', 'gemini', 'commandr']
|
39 |
+
selected_models = random.sample(models, 2)
|
40 |
+
|
41 |
+
# Process the paper with each selected model
|
42 |
+
reviews = []
|
43 |
+
for model in selected_models:
|
44 |
+
processor = PaperProcessor(prompt_dir, model, **api_keys)
|
45 |
+
review_text = processor.process_paper(paper)
|
46 |
+
#review_dict = {section.split(':')[0]: section.split(':')[1].strip() for section in review_text}
|
47 |
+
reviews.append(review_text)
|
48 |
+
logging.debug(f"Reviews generated: {reviews}")
|
49 |
+
return reviews
|