Spaces:
Sleeping
Sleeping
File size: 2,950 Bytes
3e11e62 eee55c2 3e11e62 d640554 3e11e62 eee55c2 0e1a332 3e11e62 0e1a332 3e11e62 0e1a332 3e11e62 eee55c2 0e1a332 3e11e62 0e1a332 87aa840 0e1a332 89fbd84 d640554 3e11e62 0e1a332 3e11e62 0e1a332 3e11e62 87aa840 0e1a332 3e11e62 0e1a332 d640554 89fbd84 0e1a332 3e11e62 0e1a332 d640554 0e1a332 d640554 89fbd84 3e11e62 0e1a332 eee55c2 0e1a332 3e11e62 0e1a332 3e11e62 eee55c2 0e1a332 3e11e62 0e1a332 eee55c2 3e11e62 0e1a332 3e11e62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
import gradio as gr
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer
def load_documents(file_path="study_materials"):
documents = []
for filename in os.listdir(file_path):
path = os.path.join(file_path, filename)
if filename.endswith(".pdf"):
loader = PyMuPDFLoader(path)
documents.extend(loader.load())
elif filename.endswith(".txt"):
loader = TextLoader(path)
documents.extend(loader.load())
return documents
def create_qa_system():
try:
# Load documents
documents = load_documents()
if not documents:
raise ValueError("📚 No study materials found")
# Text splitting
text_splitter = CharacterTextSplitter(
chunk_size=1100,
chunk_overlap=200,
separator="\n\n"
)
texts = text_splitter.split_documents(documents)
# Embeddings
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# Vector store
db = FAISS.from_documents(texts, embeddings)
# LLM setup with proper LangChain wrapper
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
pipe = pipeline(
"text2text-generation",
model="google/flan-t5-base",
tokenizer=tokenizer,
max_length=600,
temperature=0.7,
do_sample=True,
top_k=50,
device=-1
)
# Wrap pipeline in LangChain component
llm = HuggingFacePipeline(pipeline=pipe)
# Create QA chain
return RetrievalQA.from_llm(
llm=llm,
retriever=db.as_retriever(search_kwargs={"k": 3}),
return_source_documents=True
)
except Exception as e:
raise gr.Error(f"Error: {str(e)}")
# Initialize system
try:
qa = create_qa_system()
except Exception as e:
print(f"Startup failed: {str(e)}")
raise
def ask_question(question, history):
try:
result = qa.invoke({"query": question})
answer = result["result"]
sources = list({doc.metadata['source'] for doc in result['source_documents']})
return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
except Exception as e:
return f"Error: {str(e)[:150]}"
gr.ChatInterface(
ask_question,
title="Study Assistant",
description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
theme="soft"
).launch() |