junaid17 commited on
Commit
e4b6894
·
verified ·
1 Parent(s): 30db3b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -162
app.py CHANGED
@@ -1,163 +1,163 @@
1
- # main.py
2
- from fastapi import FastAPI, HTTPException, status, File, UploadFile, Form, Query
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from typing import Optional
5
- import pandas as pd
6
- import io
7
- import os
8
- from text_engine import Text_Search_Engine
9
-
10
- app = FastAPI(title="CortexSearch", version="1.0", description="A flexible text search API with multiple FAISS index types and BM25 support.")
11
-
12
- # Choose default index_type here: "flat", "ivf", or "hnsw"
13
- store = Text_Search_Engine(index_type=os.getenv("INDEX_TYPE", "flat"))
14
- try:
15
- store.load()
16
- except Exception:
17
- pass
18
-
19
- app.add_middleware(
20
- CORSMiddleware,
21
- allow_origins=["*"],
22
- allow_credentials=True,
23
- allow_methods=["*"],
24
- allow_headers=["*"],
25
- )
26
-
27
-
28
- @app.get("/")
29
- async def root():
30
- return {"message": "Welcome to the Flexible Text Intelligence API"}
31
-
32
-
33
- # -------------------------
34
- # Column preview endpoint
35
- # -------------------------
36
- @app.post("/list_columns")
37
- async def list_columns(file: UploadFile = File(...)):
38
- """
39
- Upload a CSV and get available columns back.
40
- Useful to preview before choosing columns to index.
41
- """
42
- try:
43
- contents = await file.read()
44
- df = pd.read_csv(io.BytesIO(contents))
45
- return {"available_columns": list(df.columns)}
46
- except Exception as e:
47
- raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
48
-
49
-
50
- # -------------------------
51
- # Health check endpoint
52
- # -------------------------
53
- @app.get("/health")
54
- async def health():
55
- return {"status": "ok", "rows_indexed": len(store.rows), "index_type": store.index_type}
56
-
57
-
58
- # -------------------------
59
- # Upload CSV (build fresh index)
60
- # -------------------------
61
- @app.post("/upload_csv")
62
- async def upload_csv(file: UploadFile = File(...), columns: str = Form(...), index_type: Optional[str] = Form(None)):
63
- #Upload CSV and specify columns (comma-separated) to combine into searchable text.
64
- #Optional form field 'index_type' can be 'flat', 'ivf', or 'hnsw' to override engine default.
65
- try:
66
- contents = await file.read()
67
- df = pd.read_csv(io.BytesIO(contents))
68
-
69
- column_list = [c.strip() for c in columns.split(",") if c.strip()]
70
- # Validate
71
- for col in column_list:
72
- if col not in df.columns:
73
- return {
74
- "status": "error",
75
- "detail": f"Column '{col}' not found.",
76
- "available_columns": list(df.columns),
77
- }
78
-
79
- rows = df.dropna(subset=column_list).to_dict(orient="records")
80
- for r in rows:
81
- r["_search_text"] = " ".join(str(r[col]) for col in column_list if r.get(col) is not None)
82
-
83
- texts = [r["_search_text"] for r in rows]
84
-
85
- if index_type:
86
- store.index_type = index_type
87
-
88
- store.encode_store(rows, texts)
89
- return {"status": "success", "count": len(rows), "used_columns": column_list, "index_type": store.index_type}
90
- except Exception as e:
91
- raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
92
-
93
-
94
- # -------------------------
95
- # Add CSV (append new rows)
96
- # -------------------------
97
- @app.post("/add_csv")
98
- async def add_csv(file: UploadFile = File(...), columns: str = Form(...)):
99
- try:
100
- contents = await file.read()
101
- df = pd.read_csv(io.BytesIO(contents))
102
-
103
- column_list = [c.strip() for c in columns.split(",") if c.strip()]
104
- for col in column_list:
105
- if col not in df.columns:
106
- return {
107
- "status": "error",
108
- "detail": f"Column '{col}' not found.",
109
- "available_columns": list(df.columns),
110
- }
111
-
112
- new_rows = df.dropna(subset=column_list).to_dict(orient="records")
113
- for r in new_rows:
114
- r["_search_text"] = " ".join(str(r[col]) for col in column_list if r.get(col) is not None)
115
-
116
- new_texts = [r["_search_text"] for r in new_rows]
117
-
118
- store.add_rows(new_rows, new_texts)
119
-
120
- return {"status": "success", "added_count": len(new_rows), "used_columns": column_list, "total_rows": len(store.rows)}
121
- except Exception as e:
122
- raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
123
-
124
-
125
- # -------------------------
126
- # Search endpoint
127
- # -------------------------
128
- @app.get("/search")
129
- async def search(
130
- query: str,
131
- top_k: int = 3,
132
- mode: str = Query("semantic", enum=["semantic", "lexical", "hybrid"]),
133
- alpha: float = 0.5,):
134
- #mode: semantic | lexical | hybrid
135
- #alpha: weight for semantic in hybrid (0..1)
136
- try:
137
- if mode == "semantic":
138
- results = store.search(query, top_k=top_k)
139
- elif mode == "lexical":
140
- if store.bm25 is None:
141
- return {"results": []}
142
- tokenized_query = query.lower().split()
143
- scores = store.bm25.get_scores(tokenized_query)
144
- ranked = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)[:top_k]
145
- results = [{**store.rows[i], "score": float(score)} for i, score in ranked]
146
- else:
147
- results = store.hybrid_search(query, top_k=top_k, alpha=alpha)
148
-
149
- return {"results": results}
150
- except Exception as e:
151
- raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
152
-
153
-
154
- # -------------------------
155
- # Delete all data
156
- # -------------------------
157
- @app.delete("/delete_data")
158
- async def delete_data():
159
- try:
160
- store.clear_vdb()
161
- return {"status": "success", "message": "Vector DB cleared"}
162
- except Exception as e:
163
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
 
1
+ # main.py
2
+ from fastapi import FastAPI, HTTPException, status, File, UploadFile, Form, Query
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from typing import Optional
5
+ import pandas as pd
6
+ import io
7
+ import os
8
+ from text_engine import Text_Search_Engine
9
+
10
+ app = FastAPI(title="CortexSearch", version="1.0", description="A flexible text search API with multiple FAISS index types and BM25 support.")
11
+
12
+ # Choose default index_type here: "flat", "ivf", or "hnsw"
13
+ store = Text_Search_Engine(index_type=os.getenv("INDEX_TYPE", "flat"))
14
+ try:
15
+ store.load()
16
+ except Exception:
17
+ pass
18
+
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"],
22
+ allow_credentials=True,
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
+ )
26
+
27
+
28
+ @app.get("/")
29
+ async def root():
30
+ return {"Status": "The CortexSearch API is live!!!"}
31
+
32
+
33
+ # -------------------------
34
+ # Column preview endpoint
35
+ # -------------------------
36
+ @app.post("/list_columns")
37
+ async def list_columns(file: UploadFile = File(...)):
38
+ """
39
+ Upload a CSV and get available columns back.
40
+ Useful to preview before choosing columns to index.
41
+ """
42
+ try:
43
+ contents = await file.read()
44
+ df = pd.read_csv(io.BytesIO(contents))
45
+ return {"available_columns": list(df.columns)}
46
+ except Exception as e:
47
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
48
+
49
+
50
+ # -------------------------
51
+ # Health check endpoint
52
+ # -------------------------
53
+ @app.get("/health")
54
+ async def health():
55
+ return {"status": "ok", "rows_indexed": len(store.rows), "index_type": store.index_type}
56
+
57
+
58
+ # -------------------------
59
+ # Upload CSV (build fresh index)
60
+ # -------------------------
61
+ @app.post("/upload_csv")
62
+ async def upload_csv(file: UploadFile = File(...), columns: str = Form(...), index_type: Optional[str] = Form(None)):
63
+ #Upload CSV and specify columns (comma-separated) to combine into searchable text.
64
+ #Optional form field 'index_type' can be 'flat', 'ivf', or 'hnsw' to override engine default.
65
+ try:
66
+ contents = await file.read()
67
+ df = pd.read_csv(io.BytesIO(contents))
68
+
69
+ column_list = [c.strip() for c in columns.split(",") if c.strip()]
70
+ # Validate
71
+ for col in column_list:
72
+ if col not in df.columns:
73
+ return {
74
+ "status": "error",
75
+ "detail": f"Column '{col}' not found.",
76
+ "available_columns": list(df.columns),
77
+ }
78
+
79
+ rows = df.dropna(subset=column_list).to_dict(orient="records")
80
+ for r in rows:
81
+ r["_search_text"] = " ".join(str(r[col]) for col in column_list if r.get(col) is not None)
82
+
83
+ texts = [r["_search_text"] for r in rows]
84
+
85
+ if index_type:
86
+ store.index_type = index_type
87
+
88
+ store.encode_store(rows, texts)
89
+ return {"status": "success", "count": len(rows), "used_columns": column_list, "index_type": store.index_type}
90
+ except Exception as e:
91
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
92
+
93
+
94
+ # -------------------------
95
+ # Add CSV (append new rows)
96
+ # -------------------------
97
+ @app.post("/add_csv")
98
+ async def add_csv(file: UploadFile = File(...), columns: str = Form(...)):
99
+ try:
100
+ contents = await file.read()
101
+ df = pd.read_csv(io.BytesIO(contents))
102
+
103
+ column_list = [c.strip() for c in columns.split(",") if c.strip()]
104
+ for col in column_list:
105
+ if col not in df.columns:
106
+ return {
107
+ "status": "error",
108
+ "detail": f"Column '{col}' not found.",
109
+ "available_columns": list(df.columns),
110
+ }
111
+
112
+ new_rows = df.dropna(subset=column_list).to_dict(orient="records")
113
+ for r in new_rows:
114
+ r["_search_text"] = " ".join(str(r[col]) for col in column_list if r.get(col) is not None)
115
+
116
+ new_texts = [r["_search_text"] for r in new_rows]
117
+
118
+ store.add_rows(new_rows, new_texts)
119
+
120
+ return {"status": "success", "added_count": len(new_rows), "used_columns": column_list, "total_rows": len(store.rows)}
121
+ except Exception as e:
122
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
123
+
124
+
125
+ # -------------------------
126
+ # Search endpoint
127
+ # -------------------------
128
+ @app.get("/search")
129
+ async def search(
130
+ query: str,
131
+ top_k: int = 3,
132
+ mode: str = Query("semantic", enum=["semantic", "lexical", "hybrid"]),
133
+ alpha: float = 0.5,):
134
+ #mode: semantic | lexical | hybrid
135
+ #alpha: weight for semantic in hybrid (0..1)
136
+ try:
137
+ if mode == "semantic":
138
+ results = store.search(query, top_k=top_k)
139
+ elif mode == "lexical":
140
+ if store.bm25 is None:
141
+ return {"results": []}
142
+ tokenized_query = query.lower().split()
143
+ scores = store.bm25.get_scores(tokenized_query)
144
+ ranked = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)[:top_k]
145
+ results = [{**store.rows[i], "score": float(score)} for i, score in ranked]
146
+ else:
147
+ results = store.hybrid_search(query, top_k=top_k, alpha=alpha)
148
+
149
+ return {"results": results}
150
+ except Exception as e:
151
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
152
+
153
+
154
+ # -------------------------
155
+ # Delete all data
156
+ # -------------------------
157
+ @app.delete("/delete_data")
158
+ async def delete_data():
159
+ try:
160
+ store.clear_vdb()
161
+ return {"status": "success", "message": "Vector DB cleared"}
162
+ except Exception as e:
163
  raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))