Spaces:

Jurk06
/

LLAMA-4-SCOUT-RAG

Running

File size: 3,565 Bytes

3f4f0ff
 
 
 
 
 
 
 
 
0be4c5a
 
 
 
 
 
 
 
 
 
 
3f4f0ff
 
 
0be4c5a
3f4f0ff

import gradio as gr
import os
import pdfplumber
import tempfile
from huggingface_hub import InferenceClient
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

import os
from huggingface_hub import InferenceClient

hf_tokens = os.environ.get("hf_token")

# client = InferenceClient(
#     provider="novita",
#     api_key=hf_tokens
# )


# Initialize Hugging Face InferenceClient
client = InferenceClient(
    provider="novita",
    api_key=hf_tokens  #"hf_xxxxxxxxxxxxxxxxxxxxxxxxx"  # Replace with your HF token
)

# Global vectorstore
vectorstore = None

# Load and process the uploaded PDF
def load_pdf(file):
    global vectorstore

    try:
        # Save uploaded file to temp path (file is already bytes in Kaggle!)
        temp_pdf_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf").name
        with open(temp_pdf_path, "wb") as f:
            f.write(file)  # <--- FIXED LINE

        # Extract text using pdfplumber
        import pdfplumber
        raw_text = ""
        with pdfplumber.open(temp_pdf_path) as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    raw_text += text + "\n"

        if not raw_text.strip():
            return "❌ No extractable text found in the PDF."

        # Chunk the text
        splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
        texts = splitter.split_text(raw_text)

        # Create FAISS vectorstore
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vectorstore = FAISS.from_texts(texts, embeddings)

        return "✅ PDF successfully processed. You can now ask questions!"
    
    except Exception as e:
        return f"❌ Error: {str(e)}"






def ask_question(query):
    global vectorstore

    if vectorstore is None:
        return "❌ Please upload a PDF first."

    try:
        docs = vectorstore.similarity_search(query, k=3)
        context = "\n\n".join([doc.page_content for doc in docs])

        # Prepare chat message format
        messages = [
            {
                "role": "system",
                "content": "You are a helpful assistant that answers questions based on a document."
            },
            {
                "role": "user",
                "content": f"Answer this question using the context below:\n\nContext:\n{context}\n\nQuestion:\n{query}"
            }
        ]

        # Use chat.completions.create
        completion = client.chat.completions.create(
            model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
            messages=messages,
            max_tokens=500
        )

        return completion.choices[0].message.content.strip()

    except Exception as e:
        return f"❌ Failed to generate answer: {str(e)}"


# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 📄 RAG PDF Chatbot using Hugging Face Inference API")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", type="binary")
        upload_btn = gr.Button("Process")
    
    status_box = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        question = gr.Textbox(label="Ask a Question")
        ask_btn = gr.Button("Ask")
    
    answer = gr.Textbox(label="Answer", lines=6)

    upload_btn.click(load_pdf, inputs=file_input, outputs=status_box)
    ask_btn.click(ask_question, inputs=question, outputs=answer)

demo.launch()