Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pypdf import PdfReader | |
from openai import OpenAI | |
api_key = "251a30544f394891bd37c6b44960b68f" | |
base_url = "https://api.aimlapi.com/v1" | |
api = OpenAI(api_key=api_key, base_url=base_url) | |
# Function to extract text from a PDF file | |
def extract_text_from_pdf(pdf_path): | |
reader = PdfReader(pdf_path) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to detect sections in the extracted text | |
def detect_sections(text): | |
sections = {"Introduction": "", "Methodology": "", "Results": "", "Conclusion": ""} | |
lines = text.split('\n') | |
current_section = None | |
for line in lines: | |
line_lower = line.lower().strip() | |
if "introduction" in line_lower: | |
current_section = "Introduction" | |
elif "methodology" in line_lower or "methods" in line_lower: | |
current_section = "Methodology" | |
elif "results" in line_lower: | |
current_section = "Results" | |
elif "conclusion" in line_lower or "discussion" in line_lower: | |
current_section = "Conclusion" | |
if current_section: | |
sections[current_section] += line + "\n" | |
return sections | |
# Function to summarize sections | |
def summarize_section(section_title, text): | |
system_prompt = f"You are an AI assistant. Summarize the {section_title.lower()} of the following research paper section:" | |
user_prompt = text[:2000] # Limit input to the first 2000 characters | |
completion = api.chat.completions.create( | |
model="mistralai/Mistral-7B-Instruct-v0.2", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt}, | |
], | |
temperature=0.3, | |
max_tokens=150, | |
) | |
summary = completion.choices[0].message.content.strip() | |
return summary | |
# Function to propose experiments | |
def propose_experiments(text): | |
system_prompt = "You are an AI assistant. Based on the following research paper, propose 3-5 potential follow-up experiments:" | |
user_prompt = text[:3000] # Limit input to the first 3000 characters | |
completion = api.chat.completions.create( | |
model="mistralai/Mistral-7B-Instruct-v0.2", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt}, | |
], | |
temperature=0.7, | |
max_tokens=200, | |
) | |
proposed_experiments = completion.choices[0].message.content.strip() | |
return proposed_experiments | |
# Function to perform a comparative study | |
def comparative_study(texts): | |
system_prompt = "You are an AI assistant. Compare and contrast the following research papers, highlighting key similarities and differences:" | |
user_prompt = "\n\n".join([f"Paper {i+1}:\n{text[:1000]}" for i, text in enumerate(texts)]) | |
completion = api.chat.completions.create( | |
model="mistralai/Mistral-7B-Instruct-v0.2", | |
messages=[ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt}, | |
], | |
temperature=0.5, | |
max_tokens=300, | |
) | |
comparison_results = completion.choices[0].message.content.strip() | |
return comparison_results | |
# Process PDF and summarize sections | |
def process_and_summarize_pdf(pdf_paths): | |
results = {} | |
texts = [] | |
for pdf_path in pdf_paths: | |
text = extract_text_from_pdf(pdf_path) | |
texts.append(text) | |
sections = detect_sections(text) | |
summaries = {} | |
for section_title, content in sections.items(): | |
if content.strip(): | |
summaries[section_title] = summarize_section(section_title, content) | |
results[pdf_path] = { | |
"summaries": summaries, | |
"proposed_experiments": propose_experiments(text) | |
} | |
results["comparative_study"] = comparative_study(texts) | |
return results | |
def chat_with_paper(pdf_path, user_query, chat_history): | |
# Extract the text from the selected PDF | |
text = extract_text_from_pdf(pdf_path) | |
# Prepare the chat history for the API | |
messages = [ | |
{"role": "system", "content": f"You are an AI assistant. Answer questions based on the following research paper: {pdf_path}."}, | |
] | |
for human, ai in chat_history: | |
messages.append({"role": "user", "content": human}) | |
messages.append({"role": "assistant", "content": ai}) | |
messages.append({"role": "user", "content": user_query}) | |
completion = api.chat.completions.create( | |
model="mistralai/Mistral-7B-Instruct-v0.2", | |
messages=messages, | |
temperature=0.7, | |
max_tokens=256, | |
) | |
response = completion.choices[0].message.content.strip() | |
chat_history.append((user_query, response)) | |
return chat_history, chat_history | |
def create_interface(): | |
with gr.Blocks(css=".center-title {text-align: center;}") as interface: | |
# Centered title | |
gr.Markdown("<h2 class='center-title'><b>ResearchHive</b> - Research Paper Summarizer and Chat Tool</h2>") | |
# Sidebar layout for file upload | |
with gr.Row(): | |
with gr.Column(scale=1): # Sidebar | |
gr.Markdown("### Upload Research Papers") | |
file_output = gr.File(label="Upload PDFs", file_count="multiple") | |
upload_button = gr.Button("Upload Files") | |
with gr.Column(scale=3): # Main Area | |
uploaded_files = gr.State([]) | |
chat_history = gr.State([]) | |
with gr.Tabs(): | |
with gr.TabItem("Summarize"): | |
papers_to_summarize = gr.CheckboxGroup(label="Select Papers to Summarize", choices=[]) | |
summarize_button = gr.Button("Summarize Papers") | |
summarized_sections = gr.Textbox(label="Summarized Sections", lines=10) | |
proposed_experiments = gr.Textbox(label="Proposed Experiments", lines=5) | |
comparative_study_results = gr.Textbox(label="Comparative Study Results", lines=5) | |
with gr.TabItem("Chat"): | |
paper_dropdown_chat = gr.Dropdown(label="Select a Paper to Chat With", choices=[]) | |
chatbot = gr.Chatbot() | |
user_query = gr.Textbox(label="Ask a Question about the Research Paper") | |
chat_button = gr.Button("Send") | |
# Function to update file list... | |
def update_file_list(files, current_files): | |
if files is not None: | |
current_files.extend(files) | |
file_names = [file.name for file in current_files] | |
return gr.CheckboxGroup(choices=file_names), gr.Dropdown(choices=file_names), current_files | |
upload_button.click( | |
fn=update_file_list, | |
inputs=[file_output, uploaded_files], | |
outputs=[papers_to_summarize, paper_dropdown_chat, uploaded_files] | |
) | |
# Function to summarize papers... | |
def summarize_papers(selected_files, files): | |
selected_pdfs = [file for file in files if file.name in selected_files] | |
if not selected_pdfs: | |
return "Please select at least one valid file.", "", "" | |
results = process_and_summarize_pdf([pdf.name for pdf in selected_pdfs]) | |
summarized_text = "" | |
for pdf, result in results.items(): | |
if pdf != "comparative_study": | |
summarized_text += f"Summaries for {pdf}:\n" | |
for section, summary in result["summaries"].items(): | |
summarized_text += f"{section}:\n{summary}\n\n" | |
proposed_experiments_text = "\n\n".join( | |
[f"{pdf}:\n{result['proposed_experiments']}" for pdf, result in results.items() if pdf != "comparative_study"] | |
) | |
comparative_study_results = results["comparative_study"] | |
return summarized_text, proposed_experiments_text, comparative_study_results | |
summarize_button.click( | |
fn=summarize_papers, | |
inputs=[papers_to_summarize, uploaded_files], | |
outputs=[summarized_sections, proposed_experiments, comparative_study_results] | |
) | |
# Chat function... | |
def chat_with_selected_paper(selected_file, query, files, history): | |
selected_pdf = next((file for file in files if file.name == selected_file), None) | |
if selected_pdf is None: | |
return [("Error", "Please select a valid file.")], history | |
updated_history, _ = chat_with_paper(selected_pdf.name, query, history) | |
return updated_history, updated_history | |
chat_button.click( | |
fn=chat_with_selected_paper, | |
inputs=[paper_dropdown_chat, user_query, uploaded_files, chat_history], | |
outputs=[chatbot, chat_history] | |
) | |
return interface | |
# Run the Gradio interface | |
if __name__ == "__main__": | |
create_interface().launch() |