File size: 3,212 Bytes
f597613
 
f084f9a
3c2875a
 
e2511d3
f597613
 
667e863
 
f597613
5cb9d7e
f597613
 
667e863
 
12d44b9
 
667e863
 
 
 
 
12d44b9
 
 
 
667e863
 
 
12d44b9
 
 
 
 
667e863
12d44b9
 
f597613
12d44b9
 
f597613
12d44b9
 
 
f597613
12d44b9
 
 
 
 
 
f597613
667e863
f597613
 
667e863
f597613
667e863
12d44b9
 
667e863
f597613
 
 
667e863
12d44b9
 
 
 
f597613
667e863
f597613
667e863
5cb9d7e
 
667e863
 
5cb9d7e
 
667e863
5cb9d7e
 
 
f597613
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
import nbformat

# Load Groq API Key securely
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# Helper: Read .ipynb file and extract text
def load_ipynb(file):
    try:
        with open(file.name, "r", encoding="utf-8") as f:
            nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
            text = ""
            for cell in nb.cells:
                if cell.cell_type in ["markdown", "code"]:
                    text += cell.source + "\n\n"
            return [Document(page_content=text)]
    except Exception as e:
        print("Error loading .ipynb:", e)
        return []

# Helper: Read PDF or IPYNB and build retriever chain
def process_files(files):
    try:
        all_docs = []
        for file in files:
            if file.name.endswith(".pdf"):
                loader = PyPDFLoader(file.name)
                all_docs.extend(loader.load())
            elif file.name.endswith(".ipynb"):
                all_docs.extend(load_ipynb(file))

        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        chunks = splitter.split_documents(all_docs)

        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vectorstore = FAISS.from_documents(chunks, embeddings)
        retriever = vectorstore.as_retriever()

        llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
        qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
        return qa_chain
    except Exception as e:
        print("Error in processing files:", e)
        return None

# Global chain
qa_chain = None

def upload_docs(files):
    global qa_chain
    qa_chain = process_files(files)
    if qa_chain is None:
        return "❌ Error processing files. Please make sure the file format is correct."
    return "βœ… PDFs or Notebooks uploaded and processed. Now ask your questions."

def ask_question(query):
    if qa_chain is None:
        return "❌ Please upload PDFs or Kaggle Notebooks first."
    try:
        return qa_chain.run(query)
    except Exception as e:
        return f"⚠ Error answering question: {e}"

# Gradio UI
with gr.Blocks() as app:
    gr.Markdown("## πŸ€– Kaggle Study Assistant\nUpload Kaggle `.pdf` or `.ipynb` files and ask intelligent questions.")

    with gr.Row():
        upload = gr.File(file_types=[".pdf", ".ipynb"], file_count="multiple", label="Upload Kaggle Files")
        btn_upload = gr.Button("πŸ“₯ Process Files")

    upload_output = gr.Textbox(label="Upload Status")
    btn_upload.click(fn=upload_docs, inputs=upload, outputs=upload_output)

    question = gr.Textbox(label="Ask a question about uploaded notebooks")
    answer = gr.Textbox(label="Assistant Answer", interactive=False)
    question.submit(fn=ask_question, inputs=question, outputs=answer)

app.launch()