|
import gradio as gr |
|
import requests |
|
import subprocess |
|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
import numpy as np |
|
|
|
|
|
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
vector_index = None |
|
indexed_chunks = [] |
|
url_cache = {} |
|
|
|
FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc" |
|
|
|
|
|
def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str: |
|
""" |
|
Calls local Ollama LLM via CLI and returns generated text. |
|
""" |
|
try: |
|
command = [ |
|
"ollama", |
|
"generate", |
|
model, |
|
"--system", system_prompt, |
|
"--prompt", user_prompt, |
|
"--quiet", |
|
"--json" |
|
] |
|
result = subprocess.run(command, capture_output=True, text=True, check=True) |
|
|
|
import json |
|
output = json.loads(result.stdout) |
|
return output.get("text", "").strip() |
|
except Exception as e: |
|
return f"Error calling Ollama: {str(e)}" |
|
|
|
|
|
def scrape_and_embed(url: str): |
|
global vector_index, indexed_chunks |
|
|
|
if url in url_cache: |
|
indexed_chunks, vector_index = url_cache[url] |
|
return f"β
Loaded cached content for {url}" |
|
|
|
|
|
response = requests.post( |
|
"https://api.firecrawl.dev/v1/scrape", |
|
headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"}, |
|
json={"url": url, "javascript": False} |
|
) |
|
if response.status_code != 200: |
|
return f"β Failed to scrape URL: {response.status_code}" |
|
|
|
content = response.json().get("text", "") |
|
chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50] |
|
indexed_chunks = chunks[:100] |
|
|
|
|
|
embeddings = embedding_model.encode(indexed_chunks) |
|
vector_index = faiss.IndexFlatL2(embeddings.shape[1]) |
|
vector_index.add(np.array(embeddings)) |
|
|
|
|
|
url_cache[url] = (indexed_chunks, vector_index) |
|
|
|
return f"β
Scraped and indexed {len(indexed_chunks)} chunks from {url}" |
|
|
|
|
|
def web_rag_ollama(combined_input: str) -> str: |
|
""" |
|
Expects input: "<URL> || <question>" |
|
Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer. |
|
""" |
|
global vector_index, indexed_chunks |
|
|
|
if "||" not in combined_input: |
|
return "β Input format must be: <URL> || <your question>" |
|
|
|
url, question = [part.strip() for part in combined_input.split("||", 1)] |
|
|
|
|
|
scrape_status = scrape_and_embed(url) |
|
if scrape_status.startswith("β"): |
|
return scrape_status |
|
|
|
|
|
if not indexed_chunks or vector_index is None: |
|
return "β οΈ No indexed content available." |
|
|
|
query_emb = embedding_model.encode([question]) |
|
D, I = vector_index.search(np.array(query_emb), k=3) |
|
context = "\n\n".join([indexed_chunks[i] for i in I[0]]) |
|
|
|
|
|
system_prompt = ( |
|
"You are a helpful assistant. Use the provided context to answer the question. " |
|
"If the answer is not contained in the context, say you don't know." |
|
) |
|
user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:" |
|
|
|
|
|
answer = ollama_generate(system_prompt, user_prompt) |
|
|
|
return f"**Scrape status:** {scrape_status}\n\n**Answer:**\n{answer}" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=web_rag_ollama, |
|
inputs=gr.Textbox( |
|
label="Input", |
|
placeholder="Enter input in format:\nhttps://example.com || What is this page about?" |
|
), |
|
outputs=gr.Textbox(label="Answer"), |
|
title="π Web RAG Q&A with Ollama (MCP-ready)", |
|
description="Scrape URL, embed content, and answer questions using local Ollama LLM." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(mcp_server=True) |
|
|