Epitomea-demo-V2

Runtime error

App Files Files Community

kpal002 commited on Feb 6

Commit

410a087

•

1 Parent(s): a62f778

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -77

app.py CHANGED Viewed

@@ -94,108 +94,106 @@ def generate_score_bar(score, num_criteria):
         <p style="color: {color};">{text}</p>  <!-- Display the text -->
     """
     return score_bar_html
-def format_example(example):
-        """
-        Formats a few-shot example into a string.
-        Args:
-            example (dict): A dictionary containing 'query', 'score', and 'reasoning' for the few-shot example.
-        Returns:
-            str: Formatted few-shot example text.
-        """
-        return "Example:\nQuery: {}\n Direct Answer: {}\n".format(
-            example['query'], example['Answer'])
-def process_pdf(uploaded_file, llm_model, n_criteria = num_criteria):
-    # Process the PDF file
-    pdf_processor = PDFProcessor_Unstructured(pdf_processing_config)
-    merged_chunks, tables, title = pdf_processor.process_pdf_file(uploaded_file)
-    documents = [Document(text=t) for t in merged_chunks]
-    # Prompts and Queries
-    utils = base_utils()
-    info_prompt = utils.read_from_file(info_prompt_path)
-    # LLM Model choice
-    try:
-        if llm_model == "Model 1":
-            llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=output_token_size)
-            general_prompt = utils.read_from_file(gpt_prompt_path)
-        elif llm_model == "Model 2":
-            if any(param is None for param in [model_context_window, output_token_size, model_temperature, hf_token]):
-                raise ValueError("All parameters are required for Mistral LLM.")
-            llm = MixtralLLM(context_window=model_context_window, num_output=output_token_size,
                            temperature=model_temperature, model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key=hf_token)
-            general_prompt = utils.read_from_file(mistral_prompt_path)
-        else:
-            raise ValueError(f"Unsupported language model: {llm_model}")
-    except Exception as e:
-        logger.error(f"Error initializing language model '{llm_model}': {e}", exc_info=True)
-        raise  # Or handle the exception as needed
-    # Embedding model choice for RAG
-    try:
-        if embed == "openai":
-            embed_model = OpenAIEmbedding(model="text-embedding-3-large")
-        elif embed == "huggingface":
-            # Use the specified model name
-            embed_model = HuggingFaceEmbedding(embed_model_name)
-        else:
-            raise ValueError(f"Unsupported embedding model: {embed_model}")
-    except Exception as e:
-        logger.error(f"Error initializing embedding model: {e}", exc_info=True)
-        raise
-    peer_review_journals = utils.read_from_file(peer_review_journals_path)
-    eq_network_journals = utils.read_from_file(eq_network_journals_path)
-    peer_review_journals_list = peer_review_journals.split('\n')
-    eq_network_journals_list = eq_network_journals.split('\n')
-    modified_journal_query = "Is the given research paper published in any of the following journals: " + ", ".join(peer_review_journals_list) + "?"
-    info_llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=100)
-    pdf_info_query = PDFQueryEngine(documents, info_llm, embed_model, (info_prompt))
-    info_query_engine = pdf_info_query.setup_query_engine()
-    journal_result = info_query_engine.query(modified_journal_query).response
-    author_result = info_query_engine.query(author_query).response
-    pdf_criteria_query = PDFQueryEngine(documents, llm, embed_model, (general_prompt))
-    # Check for prior registration
-    nlp_methods = KeywordSearch(merged_chunks)
-    eq_journal_result = nlp_methods.find_journal_name(journal_result, eq_network_journals_list)
-    peer_journal_result = nlp_methods.find_journal_name(journal_result, peer_review_journals_list)
-    registration_result = nlp_methods.check_registration()
-    # Evaluate with OpenAI model
-    total_score, criteria_met, score_percentage, reasoning = pdf_criteria_query.evaluate_with_llm(registration_result, peer_journal_result, eq_journal_result, queries)
-    reasoning_html = "<ul>"
-    for query, reason in zip(criteria, reasoning):
-        reasoning_html += f"<li style='font-size: 18px;'><strong style='color: forestgreen;'>{query}</strong> <br> Reasoning: {reason}</li>"
-    reasoning_html += "</ul>"
-    # Generate the score bar HTML
-    score_bar_html = generate_score_bar(total_score, n_criteria)
-    author_info_html = f"<div style='font-size: 18px;'>{author_result}</div>"
-    title_info_html = f"<div style='font-size: 20px;'>{title}</div>"
-    # Return the score as a string and the reasoning as HTML
-    return str(round((total_score / n_criteria) * 100)) + "/100", score_bar_html, reasoning_html, author_info_html, title_info_html
 with gr.Blocks(theme=gr.themes.Glass(

         <p style="color: {color};">{text}</p>  <!-- Display the text -->
     """
     return score_bar_html
+def process_pdf(uploaded_files, llm_model, n_criteria = num_criteria):
+    # Initialize aggregation variables
+    final_score = 0
+    final_reasoning = []
+    final_score_bar_html = ""
+    final_author_info_html = ""
+    final_title_info_html = ""
+    for uploaded_file in uploaded_files:
+        # Process the PDF file
+        pdf_processor = PDFProcessor_Unstructured(pdf_processing_config)
+        merged_chunks, tables, title = pdf_processor.process_pdf_file(uploaded_file)
+        documents = [Document(text=t) for t in merged_chunks]
+        # Prompts and Queries
+        utils = base_utils()
+        info_prompt = utils.read_from_file(info_prompt_path)
+        # LLM Model choice
+        try:
+            if llm_model == "Model 1":
+                llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=output_token_size)
+                general_prompt = utils.read_from_file(gpt_prompt_path)
+            elif llm_model == "Model 2":
+                if any(param is None for param in [model_context_window, output_token_size, model_temperature, hf_token]):
+                    raise ValueError("All parameters are required for Mistral LLM.")
+                llm = MixtralLLM(context_window=model_context_window, num_output=output_token_size,
                            temperature=model_temperature, model_name="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key=hf_token)
+                general_prompt = utils.read_from_file(mistral_prompt_path)
+            else:
+                raise ValueError(f"Unsupported language model: {llm_model}")
+        except Exception as e:
+            logger.error(f"Error initializing language model '{llm_model}': {e}", exc_info=True)
+            raise  # Or handle the exception as needed
+        # Embedding model choice for RAG
+        try:
+            if embed == "openai":
+                embed_model = OpenAIEmbedding(model="text-embedding-3-large")
+            elif embed == "huggingface":
+                # Use the specified model name
+                embed_model = HuggingFaceEmbedding(embed_model_name)
+            else:
+                raise ValueError(f"Unsupported embedding model: {embed_model}")
+        except Exception as e:
+            logger.error(f"Error initializing embedding model: {e}", exc_info=True)
+            raise
+        peer_review_journals = utils.read_from_file(peer_review_journals_path)
+        eq_network_journals = utils.read_from_file(eq_network_journals_path)
+        peer_review_journals_list = peer_review_journals.split('\n')
+        eq_network_journals_list = eq_network_journals.split('\n')
+        modified_journal_query = "Is the given research paper published in any of the following journals: " + ", ".join(peer_review_journals_list) + "?"
+        info_llm = OpenAI(model="gpt-4-1106-preview", temperature=model_temperature, max_tokens=100)
+        pdf_info_query = PDFQueryEngine(documents, info_llm, embed_model, (info_prompt))
+        info_query_engine = pdf_info_query.setup_query_engine()
+        journal_result = info_query_engine.query(modified_journal_query).response
+        author_result = info_query_engine.query(author_query).response
+        pdf_criteria_query = PDFQueryEngine(documents, llm, embed_model, (general_prompt))
+        # Check for prior registration
+        nlp_methods = KeywordSearch(merged_chunks)
+        eq_journal_result = nlp_methods.find_journal_name(journal_result, eq_network_journals_list)
+        peer_journal_result = nlp_methods.find_journal_name(journal_result, peer_review_journals_list)
+        registration_result = nlp_methods.check_registration()
+        # Evaluate with OpenAI model
+        total_score, criteria_met, score_percentage, reasoning = pdf_criteria_query.evaluate_with_llm(registration_result, peer_journal_result, eq_journal_result, queries)
+        reasoning_html = "<ul>"
+        for query, reason in zip(criteria, reasoning):
+            reasoning_html += f"<li style='font-size: 18px;'><strong style='color: forestgreen;'>{query}</strong> <br> Reasoning: {reason}</li>"
+        reasoning_html += "</ul>"
+        # Generate the score bar HTML
+        score_bar_html = generate_score_bar(total_score, n_criteria)
+        author_info_html = f"<div style='font-size: 18px;'>{author_result}</div>"
+        title_info_html = f"<div style='font-size: 20px;'>{title}</div>"
+        # Return the score as a string and the reasoning as HTML
+        return str(round((total_score / n_criteria) * 100)) + "/100", score_bar_html, reasoning_html, author_info_html, title_info_html
 with gr.Blocks(theme=gr.themes.Glass(