kpal002 commited on
Commit
410a087
1 Parent(s): a62f778

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -77
app.py CHANGED
@@ -94,108 +94,106 @@ def generate_score_bar(score, num_criteria):
94
  <p style="color: {color};">{text}</p> <!-- Display the text -->
95
  """
96
  return score_bar_html
97
- def format_example(example):
98
- """
99
- Formats a few-shot example into a string.
100
- Args:
101
- example (dict): A dictionary containing 'query', 'score', and 'reasoning' for the few-shot example.
102
- Returns:
103
- str: Formatted few-shot example text.
104
- """
105
- return "Example:\nQuery: {}\n Direct Answer: {}\n".format(
106
- example['query'], example['Answer'])
107
 
108
- def process_pdf(uploaded_file, llm_model, n_criteria = num_criteria):
109
- # Process the PDF file
110
- pdf_processor = PDFProcessor_Unstructured(pdf_processing_config)
111
- merged_chunks, tables, title = pdf_processor.process_pdf_file(uploaded_file)
112
- documents = [Document(text=t) for t in merged_chunks]
113
-
114
- # Prompts and Queries
115
- utils = base_utils()
 
 
 
 
 
 
 
116
 
117
- info_prompt = utils.read_from_file(info_prompt_path)
118
 
119
- # LLM Model choice
120
- try:
121
- if llm_model == "Model 1":
122
- llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=output_token_size)
123
- general_prompt = utils.read_from_file(gpt_prompt_path)
124
 
125
- elif llm_model == "Model 2":
126
- if any(param is None for param in [model_context_window, output_token_size, model_temperature, hf_token]):
127
- raise ValueError("All parameters are required for Mistral LLM.")
128
 
129
 
130
- llm = MixtralLLM(context_window=model_context_window, num_output=output_token_size,
131
  temperature=model_temperature, model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key=hf_token)
132
- general_prompt = utils.read_from_file(mistral_prompt_path)
133
- else:
134
- raise ValueError(f"Unsupported language model: {llm_model}")
135
-
136
- except Exception as e:
137
- logger.error(f"Error initializing language model '{llm_model}': {e}", exc_info=True)
138
- raise # Or handle the exception as needed
139
-
140
- # Embedding model choice for RAG
141
- try:
142
- if embed == "openai":
143
- embed_model = OpenAIEmbedding(model="text-embedding-3-large")
144
-
145
- elif embed == "huggingface":
146
- # Use the specified model name
147
- embed_model = HuggingFaceEmbedding(embed_model_name)
148
 
149
- else:
150
- raise ValueError(f"Unsupported embedding model: {embed_model}")
151
 
152
 
153
- except Exception as e:
154
- logger.error(f"Error initializing embedding model: {e}", exc_info=True)
155
- raise
156
 
157
 
158
- peer_review_journals = utils.read_from_file(peer_review_journals_path)
159
- eq_network_journals = utils.read_from_file(eq_network_journals_path)
160
 
161
- peer_review_journals_list = peer_review_journals.split('\n')
162
- eq_network_journals_list = eq_network_journals.split('\n')
163
 
164
 
165
- modified_journal_query = "Is the given research paper published in any of the following journals: " + ", ".join(peer_review_journals_list) + "?"
166
 
167
- info_llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=100)
168
- pdf_info_query = PDFQueryEngine(documents, info_llm, embed_model, (info_prompt))
169
- info_query_engine = pdf_info_query.setup_query_engine()
170
- journal_result = info_query_engine.query(modified_journal_query).response
171
- author_result = info_query_engine.query(author_query).response
172
 
173
 
174
- pdf_criteria_query = PDFQueryEngine(documents, llm, embed_model, (general_prompt))
175
 
176
- # Check for prior registration
177
- nlp_methods = KeywordSearch(merged_chunks)
178
- eq_journal_result = nlp_methods.find_journal_name(journal_result, eq_network_journals_list)
179
- peer_journal_result = nlp_methods.find_journal_name(journal_result, peer_review_journals_list)
180
 
181
- registration_result = nlp_methods.check_registration()
182
 
183
- # Evaluate with OpenAI model
184
- total_score, criteria_met, score_percentage, reasoning = pdf_criteria_query.evaluate_with_llm(registration_result, peer_journal_result, eq_journal_result, queries)
185
 
186
- reasoning_html = "<ul>"
187
- for query, reason in zip(criteria, reasoning):
188
- reasoning_html += f"<li style='font-size: 18px;'><strong style='color: forestgreen;'>{query}</strong> <br> Reasoning: {reason}</li>"
189
- reasoning_html += "</ul>"
190
 
191
- # Generate the score bar HTML
192
- score_bar_html = generate_score_bar(total_score, n_criteria)
193
 
194
- author_info_html = f"<div style='font-size: 18px;'>{author_result}</div>"
195
- title_info_html = f"<div style='font-size: 20px;'>{title}</div>"
196
 
197
- # Return the score as a string and the reasoning as HTML
198
- return str(round((total_score / n_criteria) * 100)) + "/100", score_bar_html, reasoning_html, author_info_html, title_info_html
199
 
200
 
201
  with gr.Blocks(theme=gr.themes.Glass(
 
94
  <p style="color: {color};">{text}</p> <!-- Display the text -->
95
  """
96
  return score_bar_html
97
+
 
 
 
 
 
 
 
 
 
98
 
99
+ def process_pdf(uploaded_files, llm_model, n_criteria = num_criteria):
100
+ # Initialize aggregation variables
101
+ final_score = 0
102
+ final_reasoning = []
103
+ final_score_bar_html = ""
104
+ final_author_info_html = ""
105
+ final_title_info_html = ""
106
+ for uploaded_file in uploaded_files:
107
+ # Process the PDF file
108
+ pdf_processor = PDFProcessor_Unstructured(pdf_processing_config)
109
+ merged_chunks, tables, title = pdf_processor.process_pdf_file(uploaded_file)
110
+ documents = [Document(text=t) for t in merged_chunks]
111
+
112
+ # Prompts and Queries
113
+ utils = base_utils()
114
 
115
+ info_prompt = utils.read_from_file(info_prompt_path)
116
 
117
+ # LLM Model choice
118
+ try:
119
+ if llm_model == "Model 1":
120
+ llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=output_token_size)
121
+ general_prompt = utils.read_from_file(gpt_prompt_path)
122
 
123
+ elif llm_model == "Model 2":
124
+ if any(param is None for param in [model_context_window, output_token_size, model_temperature, hf_token]):
125
+ raise ValueError("All parameters are required for Mistral LLM.")
126
 
127
 
128
+ llm = MixtralLLM(context_window=model_context_window, num_output=output_token_size,
129
  temperature=model_temperature, model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key=hf_token)
130
+ general_prompt = utils.read_from_file(mistral_prompt_path)
131
+ else:
132
+ raise ValueError(f"Unsupported language model: {llm_model}")
133
+
134
+ except Exception as e:
135
+ logger.error(f"Error initializing language model '{llm_model}': {e}", exc_info=True)
136
+ raise # Or handle the exception as needed
137
+
138
+ # Embedding model choice for RAG
139
+ try:
140
+ if embed == "openai":
141
+ embed_model = OpenAIEmbedding(model="text-embedding-3-large")
142
+
143
+ elif embed == "huggingface":
144
+ # Use the specified model name
145
+ embed_model = HuggingFaceEmbedding(embed_model_name)
146
 
147
+ else:
148
+ raise ValueError(f"Unsupported embedding model: {embed_model}")
149
 
150
 
151
+ except Exception as e:
152
+ logger.error(f"Error initializing embedding model: {e}", exc_info=True)
153
+ raise
154
 
155
 
156
+ peer_review_journals = utils.read_from_file(peer_review_journals_path)
157
+ eq_network_journals = utils.read_from_file(eq_network_journals_path)
158
 
159
+ peer_review_journals_list = peer_review_journals.split('\n')
160
+ eq_network_journals_list = eq_network_journals.split('\n')
161
 
162
 
163
+ modified_journal_query = "Is the given research paper published in any of the following journals: " + ", ".join(peer_review_journals_list) + "?"
164
 
165
+ info_llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=100)
166
+ pdf_info_query = PDFQueryEngine(documents, info_llm, embed_model, (info_prompt))
167
+ info_query_engine = pdf_info_query.setup_query_engine()
168
+ journal_result = info_query_engine.query(modified_journal_query).response
169
+ author_result = info_query_engine.query(author_query).response
170
 
171
 
172
+ pdf_criteria_query = PDFQueryEngine(documents, llm, embed_model, (general_prompt))
173
 
174
+ # Check for prior registration
175
+ nlp_methods = KeywordSearch(merged_chunks)
176
+ eq_journal_result = nlp_methods.find_journal_name(journal_result, eq_network_journals_list)
177
+ peer_journal_result = nlp_methods.find_journal_name(journal_result, peer_review_journals_list)
178
 
179
+ registration_result = nlp_methods.check_registration()
180
 
181
+ # Evaluate with OpenAI model
182
+ total_score, criteria_met, score_percentage, reasoning = pdf_criteria_query.evaluate_with_llm(registration_result, peer_journal_result, eq_journal_result, queries)
183
 
184
+ reasoning_html = "<ul>"
185
+ for query, reason in zip(criteria, reasoning):
186
+ reasoning_html += f"<li style='font-size: 18px;'><strong style='color: forestgreen;'>{query}</strong> <br> Reasoning: {reason}</li>"
187
+ reasoning_html += "</ul>"
188
 
189
+ # Generate the score bar HTML
190
+ score_bar_html = generate_score_bar(total_score, n_criteria)
191
 
192
+ author_info_html = f"<div style='font-size: 18px;'>{author_result}</div>"
193
+ title_info_html = f"<div style='font-size: 20px;'>{title}</div>"
194
 
195
+ # Return the score as a string and the reasoning as HTML
196
+ return str(round((total_score / n_criteria) * 100)) + "/100", score_bar_html, reasoning_html, author_info_html, title_info_html
197
 
198
 
199
  with gr.Blocks(theme=gr.themes.Glass(