File size: 3,565 Bytes
3f4f0ff
 
 
 
 
 
 
 
 
0be4c5a
 
 
 
 
 
 
 
 
 
 
3f4f0ff
 
 
0be4c5a
3f4f0ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
import os
import pdfplumber
import tempfile
from huggingface_hub import InferenceClient
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

import os
from huggingface_hub import InferenceClient

hf_tokens = os.environ.get("hf_token")

# client = InferenceClient(
#     provider="novita",
#     api_key=hf_tokens
# )


# Initialize Hugging Face InferenceClient
client = InferenceClient(
    provider="novita",
    api_key=hf_tokens  #"hf_xxxxxxxxxxxxxxxxxxxxxxxxx"  # Replace with your HF token
)

# Global vectorstore
vectorstore = None

# Load and process the uploaded PDF
def load_pdf(file):
    global vectorstore

    try:
        # Save uploaded file to temp path (file is already bytes in Kaggle!)
        temp_pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
        with open(temp_pdf_path, "wb") as f:
            f.write(file)  # <--- FIXED LINE

        # Extract text using pdfplumber
        import pdfplumber
        raw_text = ""
        with pdfplumber.open(temp_pdf_path) as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    raw_text += text + "\n"

        if not raw_text.strip():
            return "❌ No extractable text found in the PDF."

        # Chunk the text
        splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
        texts = splitter.split_text(raw_text)

        # Create FAISS vectorstore
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = FAISS.from_texts(texts, embeddings)

        return "βœ… PDF successfully processed. You can now ask questions!"
    
    except Exception as e:
        return f"❌ Error: {str(e)}"






def ask_question(query):
    global vectorstore

    if vectorstore is None:
        return "❌ Please upload a PDF first."

    try:
        docs = vectorstore.similarity_search(query, k=3)
        context = "\n\n".join([doc.page_content for doc in docs])

        # Prepare chat message format
        messages = [
            {
                "role": "system",
                "content": "You are a helpful assistant that answers questions based on a document."
            },
            {
                "role": "user",
                "content": f"Answer this question using the context below:\n\nContext:\n{context}\n\nQuestion:\n{query}"
            }
        ]

        # Use chat.completions.create
        completion = client.chat.completions.create(
            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
            messages=messages,
            max_tokens=500
        )

        return completion.choices[0].message.content.strip()

    except Exception as e:
        return f"❌ Failed to generate answer: {str(e)}"


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## πŸ“„ RAG PDF Chatbot using Hugging Face Inference API")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", type="binary")
        upload_btn = gr.Button("Process")
    
    status_box = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        question = gr.Textbox(label="Ask a Question")
        ask_btn = gr.Button("Ask")
    
    answer = gr.Textbox(label="Answer", lines=6)

    upload_btn.click(load_pdf, inputs=file_input, outputs=status_box)
    ask_btn.click(ask_question, inputs=question, outputs=answer)

demo.launch()