Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from pydantic import BaseModel | |
| import os | |
| import requests | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import HuggingFaceHub | |
| from ..utils.vector_store import get_vector_store | |
| app = FastAPI() | |
| class QueryRequest(BaseModel): | |
| query: str | |
| # Option 1: Hugging Face Inference API (Free with rate limits) | |
| def setup_hf_api_model(): | |
| """Uses Hugging Face's free Inference API""" | |
| # Get free token from https://huggingface.co/settings/tokens | |
| # Set this in your deployment | |
| hf_token = os.getenv("HUGGINGFACE_API_TOKEN") | |
| if not hf_token: | |
| raise ValueError("HUGGINGFACE_API_TOKEN environment variable required") | |
| llm = HuggingFaceHub( | |
| repo_id="microsoft/DialoGPT-medium", # Free model | |
| model_kwargs={ | |
| "temperature": 0.1, | |
| "max_length": 512 | |
| }, | |
| huggingfacehub_api_token=hf_token | |
| ) | |
| return llm | |
| # Option 2: Cohere API (Free tier: 100 API calls/month) | |
| def setup_cohere_model(): | |
| """Uses Cohere's free tier""" | |
| from langchain.llms import Cohere | |
| cohere_api_key = os.getenv("COHERE_API_KEY") | |
| if not cohere_api_key: | |
| raise ValueError("COHERE_API_KEY required") | |
| llm = Cohere( | |
| cohere_api_key=cohere_api_key, | |
| model="command-light", # Free tier model | |
| temperature=0.1 | |
| ) | |
| return llm | |
| # Option 3: Together AI (Free credits) | |
| def setup_together_model(): | |
| """Uses Together AI's free credits""" | |
| from langchain.llms import Together | |
| together_api_key = os.getenv("TOGETHER_API_KEY") | |
| if not together_api_key: | |
| raise ValueError("TOGETHER_API_KEY required") | |
| llm = Together( | |
| together_api_key=together_api_key, | |
| model="meta-llama/Llama-2-7b-chat-hf", | |
| temperature=0.1 | |
| ) | |
| return llm | |
| # Initialize model (try different options in order of preference) | |
| llm = None | |
| model_used = "none" | |
| try: | |
| llm = setup_hf_api_model() | |
| model_used = "huggingface" | |
| print("β Using Hugging Face Inference API") | |
| except: | |
| try: | |
| llm = setup_cohere_model() | |
| model_used = "cohere" | |
| print("β Using Cohere API") | |
| except: | |
| try: | |
| llm = setup_together_model() | |
| model_used = "together" | |
| print("β Using Together AI") | |
| except Exception as e: | |
| print(f"β Failed to initialize any model: {e}") | |
| # Setup QA chain | |
| qa_chain = None | |
| if llm: | |
| try: | |
| retriever = get_vector_store().as_retriever() | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=retriever, | |
| return_source_documents=True | |
| ) | |
| print("β QA chain ready") | |
| except Exception as e: | |
| print(f"β QA chain failed: {e}") | |
| def root(): | |
| return { | |
| "status": "running", | |
| "model": model_used, | |
| "qa_ready": qa_chain is not None | |
| } | |
| def ask_question(request: QueryRequest): | |
| if qa_chain is None: | |
| raise HTTPException(status_code=500, detail="Service not ready") | |
| try: | |
| result = qa_chain({"query": request.query}) | |
| return { | |
| "answer": result["result"], | |
| "model_used": model_used, | |
| "sources": [ | |
| { | |
| "content": doc.page_content[:200] + "...", | |
| "metadata": doc.metadata | |
| } | |
| for doc in result["source_documents"][:3] | |
| ] | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error: {str(e)}") | |
| def ask_specific_store(store_type: str, request: QueryRequest): | |
| if llm is None: | |
| raise HTTPException(status_code=500, detail="LLM not available") | |
| store_paths = { | |
| "mes": "./vector_stores/mes_db", | |
| "general": "./vector_stores/general_db", | |
| "tech": "./vector_stores/tech_db" | |
| } | |
| if store_type not in store_paths: | |
| raise HTTPException(status_code=400, detail="Invalid store type") | |
| try: | |
| vector_store = get_vector_store( | |
| persist_directory=store_paths[store_type]) | |
| retriever = vector_store.as_retriever() | |
| store_qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=retriever, | |
| return_source_documents=True | |
| ) | |
| result = store_qa_chain({"query": request.query}) | |
| return { | |
| "answer": result["result"], | |
| "store_used": store_type, | |
| "model_used": model_used, | |
| "sources": [doc.page_content[:200] + "..." for doc in result["source_documents"][:3]] | |
| } | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| if __name__ == "__main__": | |
| import uvicorn | |
| port = int(os.environ.get("PORT", 8000)) | |
| uvicorn.run(app, host="0.0.0.0", port=port) | |