Spaces:

ChrisSaws
/

mes-chatbot-rag-backend

Sleeping

mes-chatbot-rag-backend / api /main_hface_hosted.py

christian

Remove big files for HF

402e33f about 2 months ago

4.88 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import os
	import requests
	from langchain.chains import RetrievalQA
	from langchain.llms import HuggingFaceHub
	from ..utils.vector_store import get_vector_store

	app = FastAPI()


	class QueryRequest(BaseModel):
	query: str

	# Option 1: Hugging Face Inference API (Free with rate limits)


	def setup_hf_api_model():
	"""Uses Hugging Face's free Inference API"""
	# Get free token from https://huggingface.co/settings/tokens
	# Set this in your deployment
	hf_token = os.getenv("HUGGINGFACE_API_TOKEN")

	if not hf_token:
	raise ValueError("HUGGINGFACE_API_TOKEN environment variable required")

	llm = HuggingFaceHub(
	repo_id="microsoft/DialoGPT-medium", # Free model
	model_kwargs={
	"temperature": 0.1,
	"max_length": 512
	},
	huggingfacehub_api_token=hf_token
	)
	return llm

	# Option 2: Cohere API (Free tier: 100 API calls/month)


	def setup_cohere_model():
	"""Uses Cohere's free tier"""
	from langchain.llms import Cohere

	cohere_api_key = os.getenv("COHERE_API_KEY")
	if not cohere_api_key:
	raise ValueError("COHERE_API_KEY required")

	llm = Cohere(
	cohere_api_key=cohere_api_key,
	model="command-light", # Free tier model
	temperature=0.1
	)
	return llm

	# Option 3: Together AI (Free credits)


	def setup_together_model():
	"""Uses Together AI's free credits"""
	from langchain.llms import Together

	together_api_key = os.getenv("TOGETHER_API_KEY")
	if not together_api_key:
	raise ValueError("TOGETHER_API_KEY required")

	llm = Together(
	together_api_key=together_api_key,
	model="meta-llama/Llama-2-7b-chat-hf",
	temperature=0.1
	)
	return llm


	# Initialize model (try different options in order of preference)
	llm = None
	model_used = "none"

	try:
	llm = setup_hf_api_model()
	model_used = "huggingface"
	print("✅ Using Hugging Face Inference API")
	except:
	try:
	llm = setup_cohere_model()
	model_used = "cohere"
	print("✅ Using Cohere API")
	except:
	try:
	llm = setup_together_model()
	model_used = "together"
	print("✅ Using Together AI")
	except Exception as e:
	print(f"❌ Failed to initialize any model: {e}")

	# Setup QA chain
	qa_chain = None
	if llm:
	try:
	retriever = get_vector_store().as_retriever()
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	retriever=retriever,
	return_source_documents=True
	)
	print("✅ QA chain ready")
	except Exception as e:
	print(f"❌ QA chain failed: {e}")


	@app.get("/")
	def root():
	return {
	"status": "running",
	"model": model_used,
	"qa_ready": qa_chain is not None
	}


	@app.post("/ask")
	def ask_question(request: QueryRequest):
	if qa_chain is None:
	raise HTTPException(status_code=500, detail="Service not ready")

	try:
	result = qa_chain({"query": request.query})

	return {
	"answer": result["result"],
	"model_used": model_used,
	"sources": [
	{
	"content": doc.page_content[:200] + "...",
	"metadata": doc.metadata
	}
	for doc in result["source_documents"][:3]
	]
	}

	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error: {str(e)}")


	@app.post("/ask/{store_type}")
	def ask_specific_store(store_type: str, request: QueryRequest):
	if llm is None:
	raise HTTPException(status_code=500, detail="LLM not available")

	store_paths = {
	"mes": "./vector_stores/mes_db",
	"general": "./vector_stores/general_db",
	"tech": "./vector_stores/tech_db"
	}

	if store_type not in store_paths:
	raise HTTPException(status_code=400, detail="Invalid store type")

	try:
	vector_store = get_vector_store(
	persist_directory=store_paths[store_type])
	retriever = vector_store.as_retriever()

	store_qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	retriever=retriever,
	return_source_documents=True
	)

	result = store_qa_chain({"query": request.query})

	return {
	"answer": result["result"],
	"store_used": store_type,
	"model_used": model_used,
	"sources": [doc.page_content[:200] + "..." for doc in result["source_documents"][:3]]
	}

	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	if __name__ == "__main__":
	import uvicorn
	port = int(os.environ.get("PORT", 8000))
	uvicorn.run(app, host="0.0.0.0", port=port)