Spaces:
Sleeping
Sleeping
File size: 1,656 Bytes
05172f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import streamlit as st
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import faiss
import numpy as np
# Load models
embedder = SentenceTransformer('all-MiniLM-L6-v2')
qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad')
st.set_page_config(page_title="QuickLit - AI Research Assistant")
st.title("π QuickLit: Literature Q&A Assistant")
# File upload
uploaded_file = st.file_uploader("Upload a research paper (PDF)", type=["pdf"])
if uploaded_file:
reader = PdfReader(uploaded_file)
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
# Split text into chunks
sentences = full_text.split('. ')
chunks = ['. '.join(sentences[i:i+3]) for i in range(0, len(sentences), 3)]
# Generate embeddings
st.info("π Generating embeddings...")
embeddings = embedder.encode(chunks)
# Create FAISS index
index = faiss.IndexFlatL2(embeddings[0].shape[0])
index.add(np.array(embeddings))
# Input question
question = st.text_input("Ask a question about the paper:")
if question:
# Embed the question
q_embedding = embedder.encode([question])
# Retrieve top 3 similar chunks
D, I = index.search(np.array(q_embedding), k=3)
retrieved_contexts = [chunks[i] for i in I[0]]
context = " ".join(retrieved_contexts)
# Answer using transformer
st.info("π‘ Answering with AI...")
answer = qa_pipeline(question=question, context=context)
st.success(f"**Answer:** {answer['answer']}")
|