Spaces:

Kazeemkz
/

intuiPy_chat

Sleeping

App Files Files Community

Kazeemkz commited on Jul 6, 2024

Commit

fe4fdc2

1 Parent(s): cb6b522

Add Streamlit application and dependencies

Browse files

Files changed (2) hide show

app.py +66 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import streamlit as st
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+# Load the lightweight Hugging Face transformer model
+model_name = "distilbert-base-uncased-distilled-squad"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+qa_model = AutoModelForQuestionAnswering.from_pretrained(model_name)
+qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=tokenizer)
+# Load the SentenceTransformer model for embeddings
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Upload PDF files
+st.header("Question and Answer Chatbot")
+with st.sidebar:
+    st.title("Turn your PDFs into a Q&A session. Upload a file and start asking questions")
+    file = st.file_uploader("PDF file upload", type="pdf")
+# Extract the text
+if file is not None:
+    pdf_reader = PdfReader(file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    # Break it into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        separators="\n",
+        #chunk_size=1000,
+        #chunk_overlap=500,
+        chunk_size=800,
+        chunk_overlap=150,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    # Generate embeddings for each chunk
+    embeddings = embedding_model.encode(chunks)
+    # Create FAISS index and add embeddings
+    dimension = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(embeddings))
+    # Get user question
+    user_question = st.text_input("Type your question here")
+    # Perform similarity search
+    if user_question:
+        question_embedding = embedding_model.encode([user_question])
+        D, I = index.search(np.array(question_embedding), k=5)
+        matched_texts = [chunks[i] for i in I[0]]
+        # Use the lightweight transformer model for question answering
+        response = ""
+        for context in matched_texts:
+            result = qa_pipeline(question=user_question, context=context)
+            response += result['answer'] + " "
+        st.write(response)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+PyPDF2
+langchain  # Ensure this package name is correct based on your actual usage
+transformers
+sentence-transformers
+faiss
+numpy