research_hive / app.py
mfahadkhan's picture
Update app.py
48af45f verified
import gradio as gr
from pypdf import PdfReader
from openai import OpenAI
api_key = "251a30544f394891bd37c6b44960b68f"
base_url = "https://api.aimlapi.com/v1"
api = OpenAI(api_key=api_key, base_url=base_url)
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
# Function to detect sections in the extracted text
def detect_sections(text):
sections = {"Introduction": "", "Methodology": "", "Results": "", "Conclusion": ""}
lines = text.split('\n')
current_section = None
for line in lines:
line_lower = line.lower().strip()
if "introduction" in line_lower:
current_section = "Introduction"
elif "methodology" in line_lower or "methods" in line_lower:
current_section = "Methodology"
elif "results" in line_lower:
current_section = "Results"
elif "conclusion" in line_lower or "discussion" in line_lower:
current_section = "Conclusion"
if current_section:
sections[current_section] += line + "\n"
return sections
# Function to summarize sections
def summarize_section(section_title, text):
system_prompt = f"You are an AI assistant. Summarize the {section_title.lower()} of the following research paper section:"
user_prompt = text[:2000] # Limit input to the first 2000 characters
completion = api.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.2",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.3,
max_tokens=150,
)
summary = completion.choices[0].message.content.strip()
return summary
# Function to propose experiments
def propose_experiments(text):
system_prompt = "You are an AI assistant. Based on the following research paper, propose 3-5 potential follow-up experiments:"
user_prompt = text[:3000] # Limit input to the first 3000 characters
completion = api.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.2",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.7,
max_tokens=200,
)
proposed_experiments = completion.choices[0].message.content.strip()
return proposed_experiments
# Function to perform a comparative study
def comparative_study(texts):
system_prompt = "You are an AI assistant. Compare and contrast the following research papers, highlighting key similarities and differences:"
user_prompt = "\n\n".join([f"Paper {i+1}:\n{text[:1000]}" for i, text in enumerate(texts)])
completion = api.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.2",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.5,
max_tokens=300,
)
comparison_results = completion.choices[0].message.content.strip()
return comparison_results
# Process PDF and summarize sections
def process_and_summarize_pdf(pdf_paths):
results = {}
texts = []
for pdf_path in pdf_paths:
text = extract_text_from_pdf(pdf_path)
texts.append(text)
sections = detect_sections(text)
summaries = {}
for section_title, content in sections.items():
if content.strip():
summaries[section_title] = summarize_section(section_title, content)
results[pdf_path] = {
"summaries": summaries,
"proposed_experiments": propose_experiments(text)
}
results["comparative_study"] = comparative_study(texts)
return results
def chat_with_paper(pdf_path, user_query, chat_history):
# Extract the text from the selected PDF
text = extract_text_from_pdf(pdf_path)
# Prepare the chat history for the API
messages = [
{"role": "system", "content": f"You are an AI assistant. Answer questions based on the following research paper: {pdf_path}."},
]
for human, ai in chat_history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": ai})
messages.append({"role": "user", "content": user_query})
completion = api.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.2",
messages=messages,
temperature=0.7,
max_tokens=256,
)
response = completion.choices[0].message.content.strip()
chat_history.append((user_query, response))
return chat_history, chat_history
def create_interface():
with gr.Blocks(css=".center-title {text-align: center;}") as interface:
# Centered title
gr.Markdown("<h2 class='center-title'><b>ResearchHive</b> - Research Paper Summarizer and Chat Tool</h2>")
# Sidebar layout for file upload
with gr.Row():
with gr.Column(scale=1): # Sidebar
gr.Markdown("### Upload Research Papers")
file_output = gr.File(label="Upload PDFs", file_count="multiple")
upload_button = gr.Button("Upload Files")
with gr.Column(scale=3): # Main Area
uploaded_files = gr.State([])
chat_history = gr.State([])
with gr.Tabs():
with gr.TabItem("Summarize"):
papers_to_summarize = gr.CheckboxGroup(label="Select Papers to Summarize", choices=[])
summarize_button = gr.Button("Summarize Papers")
summarized_sections = gr.Textbox(label="Summarized Sections", lines=10)
proposed_experiments = gr.Textbox(label="Proposed Experiments", lines=5)
comparative_study_results = gr.Textbox(label="Comparative Study Results", lines=5)
with gr.TabItem("Chat"):
paper_dropdown_chat = gr.Dropdown(label="Select a Paper to Chat With", choices=[])
chatbot = gr.Chatbot()
user_query = gr.Textbox(label="Ask a Question about the Research Paper")
chat_button = gr.Button("Send")
# Function to update file list...
def update_file_list(files, current_files):
if files is not None:
current_files.extend(files)
file_names = [file.name for file in current_files]
return gr.CheckboxGroup(choices=file_names), gr.Dropdown(choices=file_names), current_files
upload_button.click(
fn=update_file_list,
inputs=[file_output, uploaded_files],
outputs=[papers_to_summarize, paper_dropdown_chat, uploaded_files]
)
# Function to summarize papers...
def summarize_papers(selected_files, files):
selected_pdfs = [file for file in files if file.name in selected_files]
if not selected_pdfs:
return "Please select at least one valid file.", "", ""
results = process_and_summarize_pdf([pdf.name for pdf in selected_pdfs])
summarized_text = ""
for pdf, result in results.items():
if pdf != "comparative_study":
summarized_text += f"Summaries for {pdf}:\n"
for section, summary in result["summaries"].items():
summarized_text += f"{section}:\n{summary}\n\n"
proposed_experiments_text = "\n\n".join(
[f"{pdf}:\n{result['proposed_experiments']}" for pdf, result in results.items() if pdf != "comparative_study"]
)
comparative_study_results = results["comparative_study"]
return summarized_text, proposed_experiments_text, comparative_study_results
summarize_button.click(
fn=summarize_papers,
inputs=[papers_to_summarize, uploaded_files],
outputs=[summarized_sections, proposed_experiments, comparative_study_results]
)
# Chat function...
def chat_with_selected_paper(selected_file, query, files, history):
selected_pdf = next((file for file in files if file.name == selected_file), None)
if selected_pdf is None:
return [("Error", "Please select a valid file.")], history
updated_history, _ = chat_with_paper(selected_pdf.name, query, history)
return updated_history, updated_history
chat_button.click(
fn=chat_with_selected_paper,
inputs=[paper_dropdown_chat, user_query, uploaded_files, chat_history],
outputs=[chatbot, chat_history]
)
return interface
# Run the Gradio interface
if __name__ == "__main__":
create_interface().launch()