rohit commited on
Commit
e5ec944
·
1 Parent(s): 6de22e1

remove duplicate

Browse files
Files changed (3) hide show
  1. app.py +0 -115
  2. developer-portfolio-rag +0 -1
  3. start.sh +2 -2
app.py DELETED
@@ -1,115 +0,0 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- import os
4
- import logging
5
- import sys
6
- import json
7
-
8
- # Configure logging
9
- logging.basicConfig(
10
- level=logging.INFO,
11
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
12
- handlers=[
13
- logging.StreamHandler(sys.stdout)
14
- ]
15
- )
16
- logger = logging.getLogger(__name__)
17
-
18
- app = FastAPI(title="RAG Pipeline API", description="Multi-dataset RAG API", version="1.0.0")
19
-
20
- # Initialize pipelines for all datasets
21
- pipelines = {}
22
- google_api_key = os.getenv("GOOGLE_API_KEY")
23
-
24
- logger.info(f"Starting RAG Pipeline API")
25
- logger.info(f"Google API Key present: {'Yes' if google_api_key else 'No'}")
26
-
27
- # Don't load datasets during startup - do it asynchronously after server starts
28
- logger.info("RAG Pipeline API is ready to serve requests - datasets will load in background")
29
-
30
- class Question(BaseModel):
31
- text: str
32
- dataset: str = "developer-portfolio"
33
-
34
- @app.post("/answer")
35
- async def get_answer(question: Question):
36
- try:
37
- # Check if any pipelines are loaded
38
- if not pipelines:
39
- return {
40
- "answer": "RAG Pipeline is running but datasets are still loading in the background. Please try again in a moment, or check /health for loading status.",
41
- "dataset": question.dataset,
42
- "status": "datasets_loading"
43
- }
44
-
45
- # Select the appropriate pipeline based on dataset
46
- if question.dataset not in pipelines:
47
- raise HTTPException(status_code=400, detail=f"Dataset '{question.dataset}' not available. Available datasets: {list(pipelines.keys())}")
48
-
49
- selected_pipeline = pipelines[question.dataset]
50
- answer = selected_pipeline.answer_question(question.text)
51
- return {"answer": answer, "dataset": question.dataset}
52
- except Exception as e:
53
- raise HTTPException(status_code=500, detail=str(e))
54
-
55
- @app.get("/datasets")
56
- async def list_datasets():
57
- """List all available datasets"""
58
- return {"datasets": list(pipelines.keys())}
59
-
60
- async def load_datasets_background():
61
- """Load datasets in background after server starts"""
62
- global pipelines
63
- if google_api_key:
64
- try:
65
- # Import modules only when needed
66
- import sys
67
- sys.path.append('/app')
68
- from app.pipeline import RAGPipeline
69
- from app.config import DATASET_CONFIGS
70
-
71
- # Only load developer-portfolio to save memory
72
- dataset_name = "developer-portfolio"
73
- logger.info(f"Loading dataset: {dataset_name}")
74
- pipeline = RAGPipeline.from_preset(
75
- google_api_key=google_api_key,
76
- preset_name=dataset_name
77
- )
78
- pipelines[dataset_name] = pipeline
79
- logger.info(f"Successfully loaded {dataset_name}")
80
- except Exception as e:
81
- logger.error(f"Failed to load dataset: {e}")
82
- logger.info(f"Background loading complete - {len(pipelines)} datasets loaded")
83
- else:
84
- logger.warning("No Google API key provided - running in demo mode without datasets")
85
-
86
- @app.on_event("startup")
87
- async def startup_event():
88
- logger.info("FastAPI application startup complete")
89
- logger.info(f"Server should be running on port: 7860")
90
-
91
- # Start loading datasets in background (non-blocking)
92
- import asyncio
93
- asyncio.create_task(load_datasets_background())
94
-
95
- @app.on_event("shutdown")
96
- async def shutdown_event():
97
- logger.info("FastAPI application shutting down")
98
-
99
- @app.get("/")
100
- async def root():
101
- """Root endpoint"""
102
- return {"status": "ok", "message": "RAG Pipeline API", "version": "1.0.0", "datasets": list(pipelines.keys())}
103
-
104
- @app.get("/health")
105
- async def health_check():
106
- """Health check endpoint"""
107
- logger.info("Health check called")
108
- loading_status = "complete" if "developer-portfolio" in pipelines else "loading"
109
- return {
110
- "status": "healthy",
111
- "datasets_loaded": len(pipelines),
112
- "total_datasets": 1, # Only loading developer-portfolio
113
- "loading_status": loading_status,
114
- "port": "7860"
115
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
developer-portfolio-rag DELETED
@@ -1 +0,0 @@
1
- Subproject commit 761b11c411795efadbf92045b906c28125597810
 
 
start.sh CHANGED
@@ -1,7 +1,7 @@
1
  #!/bin/bash
2
  set -e # Exit on any error
3
 
4
- echo "=== RAG Pipeline Startup ==="
5
  echo "PORT environment variable: ${PORT:-'not set'}"
6
  echo "Using port: 7860"
7
  if [ -n "$GOOGLE_API_KEY" ]; then
@@ -22,4 +22,4 @@ ls -la app/
22
  echo "Testing Python import:"
23
  python -c "import app.main; print('Import successful')" || echo "Import failed"
24
  echo "Starting uvicorn..."
25
- uvicorn rag_pipeline.app.main:app --host 0.0.0.0 --port 7860 --log-level info
 
1
  #!/bin/bash
2
  set -e # Exit on any error
3
 
4
+ echo "=== RAG Pipeline Startup (TESTING)==="
5
  echo "PORT environment variable: ${PORT:-'not set'}"
6
  echo "Using port: 7860"
7
  if [ -n "$GOOGLE_API_KEY" ]; then
 
22
  echo "Testing Python import:"
23
  python -c "import app.main; print('Import successful')" || echo "Import failed"
24
  echo "Starting uvicorn..."
25
+ uvicorn app.main:app --host 0.0.0.0 --port 7860 --log-level info