Spaces:

PraveenVellingiri
/

CrawlQuery_Answers

Sleeping

App Files Files Community

CrawlQuery_Answers / app.py

PraveenVellingiri

Create app.py

a8d5350 verified 2 months ago

raw

history blame contribute delete

4 kB

	import gradio as gr
	import requests
	import subprocess
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np

	# Globals
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
	vector_index = None
	indexed_chunks = []
	url_cache = {} # url -> (chunks, faiss index)

	FIRECRAWL_API_KEY = "sk_lBtoP3fxN5Z5z9D7WJAdGUIhR9uoWB6w0IIzqjRZ0Q8yq6Nc" # Replace this

	# Ollama call helper
	def ollama_generate(system_prompt: str, user_prompt: str, model: str = "llama2") -> str:
	"""
	Calls local Ollama LLM via CLI and returns generated text.
	"""
	try:
	command = [
	"ollama",
	"generate",
	model,
	"--system", system_prompt,
	"--prompt", user_prompt,
	"--quiet",
	"--json"
	]
	result = subprocess.run(command, capture_output=True, text=True, check=True)
	# Ollama JSON output format: {"text":"generated text..."}
	import json
	output = json.loads(result.stdout)
	return output.get("text", "").strip()
	except Exception as e:
	return f"Error calling Ollama: {str(e)}"

	# Scrape URL and embed content
	def scrape_and_embed(url: str):
	global vector_index, indexed_chunks

	if url in url_cache:
	indexed_chunks, vector_index = url_cache[url]
	return f"✅ Loaded cached content for {url}"

	# Firecrawl scrape
	response = requests.post(
	"https://api.firecrawl.dev/v1/scrape",
	headers={"Authorization": f"Bearer {FIRECRAWL_API_KEY}"},
	json={"url": url, "javascript": False}
	)
	if response.status_code != 200:
	return f"❌ Failed to scrape URL: {response.status_code}"

	content = response.json().get("text", "")
	chunks = [line.strip() for line in content.split("\n") if len(line.strip()) > 50]
	indexed_chunks = chunks[:100]

	# Embeddings + FAISS index
	embeddings = embedding_model.encode(indexed_chunks)
	vector_index = faiss.IndexFlatL2(embeddings.shape[1])
	vector_index.add(np.array(embeddings))

	# Cache it
	url_cache[url] = (indexed_chunks, vector_index)

	return f"✅ Scraped and indexed {len(indexed_chunks)} chunks from {url}"

	# Main RAG + Ollama Q&A function
	def web_rag_ollama(combined_input: str) -> str:
	"""
	Expects input: "<URL> \|\| <question>"
	Scrapes URL (cached), embeds, retrieves context, then asks Ollama to answer.
	"""
	global vector_index, indexed_chunks

	if "\|\|" not in combined_input:
	return "❌ Input format must be: <URL> \|\| <your question>"

	url, question = [part.strip() for part in combined_input.split("\|\|", 1)]

	# Scrape and embed
	scrape_status = scrape_and_embed(url)
	if scrape_status.startswith("❌"):
	return scrape_status

	# Retrieval
	if not indexed_chunks or vector_index is None:
	return "⚠️ No indexed content available."

	query_emb = embedding_model.encode([question])
	D, I = vector_index.search(np.array(query_emb), k=3)
	context = "\n\n".join([indexed_chunks[i] for i in I[0]])

	# Ollama prompt engineering
	system_prompt = (
	"You are a helpful assistant. Use the provided context to answer the question. "
	"If the answer is not contained in the context, say you don't know."
	)
	user_prompt = f"Context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"

	# Call Ollama
	answer = ollama_generate(system_prompt, user_prompt)

	return f"Scrape status: {scrape_status}\n\nAnswer:\n{answer}"

	# Gradio interface with MCP support
	demo = gr.Interface(
	fn=web_rag_ollama,
	inputs=gr.Textbox(
	label="Input",
	placeholder="Enter input in format:\nhttps://example.com \|\| What is this page about?"
	),
	outputs=gr.Textbox(label="Answer"),
	title="🌐 Web RAG Q&A with Ollama (MCP-ready)",
	description="Scrape URL, embed content, and answer questions using local Ollama LLM."
	)

	if __name__ == "__main__":
	demo.launch(mcp_server=True)