Qar-Raz commited on
Commit
02e0da8
·
1 Parent(s): 21adad0

added time metrics

Browse files
Files changed (4) hide show
  1. .gitignore +1 -1
  2. api.py +17 -0
  3. main.py +1 -0
  4. query_only.py +1 -2
.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- # Python bytecode
2
  __pycache__/
3
  *.py[cod]
4
  *$py.class
 
1
+ # python specific ignores
2
  __pycache__/
3
  *.py[cod]
4
  *$py.class
api.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import time
3
  from typing import Any
@@ -38,6 +39,11 @@ class PredictResponse(BaseModel):
38
  metrics: dict[str, float]
39
 
40
 
 
 
 
 
 
41
  app = FastAPI(title="RAG-AS3 API", version="0.1.0")
42
 
43
  app.add_middleware(
@@ -77,6 +83,13 @@ def _resolve_model(name: str, models: dict[str, Any]) -> tuple[str, Any]:
77
  return model_key, models[model_key]
78
 
79
 
 
 
 
 
 
 
 
80
  @app.on_event("startup")
81
  def startup_event() -> None:
82
  load_dotenv()
@@ -131,6 +144,10 @@ def health() -> dict[str, str]:
131
  return {"status": "ok" if ready else "starting"}
132
 
133
 
 
 
 
 
134
  @app.post("/predict", response_model=PredictResponse)
135
  def predict(payload: PredictRequest) -> PredictResponse:
136
  if not state:
 
1
+ # Fastapi endpoints defined here
2
  import os
3
  import time
4
  from typing import Any
 
39
  metrics: dict[str, float]
40
 
41
 
42
+
43
+ # Fastapi setup
44
+ # Fastapi allows us to define python based endpoint
45
+ # That is called from the react based frontend
46
+
47
  app = FastAPI(title="RAG-AS3 API", version="0.1.0")
48
 
49
  app.add_middleware(
 
83
  return model_key, models[model_key]
84
 
85
 
86
+ # On startup most of the time is spent loading chunks from pinecone
87
+ # This is done because bm25 needs the enture corpus in memory
88
+ # we want to avoid loading it on every query, so loading it at startup is better
89
+
90
+ # COuld improve this as not ideal to load entire corpus in memory
91
+ # currently it wont scale well
92
+
93
  @app.on_event("startup")
94
  def startup_event() -> None:
95
  load_dotenv()
 
144
  return {"status": "ok" if ready else "starting"}
145
 
146
 
147
+
148
+ # Predict endpoint that takes a query and returns an answer along with contexts and metrics
149
+ # is called from the frontend when user clicks submits
150
+ # Also resolves model based on user selection
151
  @app.post("/predict", response_model=PredictResponse)
152
  def predict(payload: PredictRequest) -> PredictResponse:
153
  if not state:
main.py CHANGED
@@ -19,6 +19,7 @@ def main():
19
 
20
  # ------------------------------------------------------------------
21
  # 0. Configuration
 
22
  # ------------------------------------------------------------------
23
  hf_token = os.getenv("HF_TOKEN")
24
  pinecone_api_key = os.getenv("PINECONE_API_KEY")
 
19
 
20
  # ------------------------------------------------------------------
21
  # 0. Configuration
22
+ # Query defined here
23
  # ------------------------------------------------------------------
24
  hf_token = os.getenv("HF_TOKEN")
25
  pinecone_api_key = os.getenv("PINECONE_API_KEY")
query_only.py CHANGED
@@ -1,7 +1,6 @@
1
  # This file is for inference without actually embedding documents
2
  # Main does embedding everytime, is redundant for querying.
3
-
4
-
5
 
6
  import os
7
  import time
 
1
  # This file is for inference without actually embedding documents
2
  # Main does embedding everytime, is redundant for querying.
3
+ # made this just to test querying part --@Qamar
 
4
 
5
  import os
6
  import time