import os import gradio as gr from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.document_loaders import PyPDFLoader from langchain_groq import ChatGroq from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.docstore.document import Document import nbformat # Load Groq API Key securely os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY") # Helper: Read .ipynb file and extract text def load_ipynb(file): try: with open(file.name, "r", encoding="utf-8") as f: nb = nbformat.read(f, as_version=nbformat.NO_CONVERT) text = "" for cell in nb.cells: if cell.cell_type in ["markdown", "code"]: text += cell.source + "\n\n" return [Document(page_content=text)] except Exception as e: print("Error loading .ipynb:", e) return [] # Helper: Read PDF or IPYNB and build retriever chain def process_files(files): try: all_docs = [] for file in files: if file.name.endswith(".pdf"): loader = PyPDFLoader(file.name) all_docs.extend(loader.load()) elif file.name.endswith(".ipynb"): all_docs.extend(load_ipynb(file)) splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) chunks = splitter.split_documents(all_docs) embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectorstore = FAISS.from_documents(chunks, embeddings) retriever = vectorstore.as_retriever() llm = ChatGroq(model_name="llama3-70b-8192", temperature=0) qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) return qa_chain except Exception as e: print("Error in processing files:", e) return None # Global chain qa_chain = None def upload_docs(files): global qa_chain qa_chain = process_files(files) if qa_chain is None: return "❌ Error processing files. Please make sure the file format is correct." return "✅ PDFs or Notebooks uploaded and processed. Now ask your questions." def ask_question(query): if qa_chain is None: return "❌ Please upload PDFs or Kaggle Notebooks first." try: return qa_chain.run(query) except Exception as e: return f"⚠ Error answering question: {e}" # Gradio UI with gr.Blocks() as app: gr.Markdown("## 🤖 Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.") with gr.Row(): upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files") btn_upload = gr.Button("📥 Process Files") upload_output = gr.Textbox(label="Upload Status") btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output) question = gr.Textbox(label="Ask a question about uploaded notebooks") answer = gr.Textbox(label="Assistant Answer", interactive=False) question.submit(fn=ask_question, inputs=question, outputs=answer) app.launch()