File size: 3,212 Bytes
f597613 f084f9a 3c2875a e2511d3 f597613 667e863 f597613 5cb9d7e f597613 667e863 12d44b9 667e863 12d44b9 667e863 12d44b9 667e863 12d44b9 f597613 12d44b9 f597613 12d44b9 f597613 12d44b9 f597613 667e863 f597613 667e863 f597613 667e863 12d44b9 667e863 f597613 667e863 12d44b9 f597613 667e863 f597613 667e863 5cb9d7e 667e863 5cb9d7e 667e863 5cb9d7e f597613 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
import nbformat
# Load Groq API Key securely
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
# Helper: Read .ipynb file and extract text
def load_ipynb(file):
try:
with open(file.name, "r", encoding="utf-8") as f:
nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
text = ""
for cell in nb.cells:
if cell.cell_type in ["markdown", "code"]:
text += cell.source + "\n\n"
return [Document(page_content=text)]
except Exception as e:
print("Error loading .ipynb:", e)
return []
# Helper: Read PDF or IPYNB and build retriever chain
def process_files(files):
try:
all_docs = []
for file in files:
if file.name.endswith(".pdf"):
loader = PyPDFLoader(file.name)
all_docs.extend(loader.load())
elif file.name.endswith(".ipynb"):
all_docs.extend(load_ipynb(file))
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(all_docs)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever()
llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return qa_chain
except Exception as e:
print("Error in processing files:", e)
return None
# Global chain
qa_chain = None
def upload_docs(files):
global qa_chain
qa_chain = process_files(files)
if qa_chain is None:
return "β Error processing files. Please make sure the file format is correct."
return "β
PDFs or Notebooks uploaded and processed. Now ask your questions."
def ask_question(query):
if qa_chain is None:
return "β Please upload PDFs or Kaggle Notebooks first."
try:
return qa_chain.run(query)
except Exception as e:
return f"β Error answering question: {e}"
# Gradio UI
with gr.Blocks() as app:
gr.Markdown("## π€ Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.")
with gr.Row():
upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files")
btn_upload = gr.Button("π₯ Process Files")
upload_output = gr.Textbox(label="Upload Status")
btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output)
question = gr.Textbox(label="Ask a question about uploaded notebooks")
answer = gr.Textbox(label="Assistant Answer", interactive=False)
question.submit(fn=ask_question, inputs=question, outputs=answer)
app.launch()
|