Ayush0708 commited on
Commit
9ce511e
·
verified ·
1 Parent(s): e5a94c4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import JSONResponse
3
+ from bertopic import BERTopic
4
+ from sklearn.feature_extraction.text import CountVectorizer
5
+ from sklearn.decomposition import TruncatedSVD
6
+ from sklearn.cluster import KMeans
7
+ import pandas as pd
8
+
9
+ vectorizer_model = CountVectorizer()
10
+ dimensionality_model = TruncatedSVD(n_components=5)
11
+ clustering_model = KMeans(n_clusters=5, random_state=42)
12
+
13
+ topic_model = BERTopic(
14
+ vectorizer_model=vectorizer_model,
15
+ umap_model=dimensionality_model,
16
+ hdbscan_model=clustering_model
17
+ )
18
+
19
+ app = FastAPI()
20
+
21
+ @app.post("/predict")
22
+ async def predict(request: Request):
23
+ data = await request.json()
24
+ if "text" in data:
25
+ text = data["text"]
26
+ elif "data" in data and isinstance(data["data"], list):
27
+ text = data["data"][0]
28
+ else:
29
+ return JSONResponse({"error": "No input text provided."}, status_code=400)
30
+ documents = [doc.strip() for doc in text.split("\n") if doc.strip()]
31
+ if not documents:
32
+ return JSONResponse({"error": "No valid input."}, status_code=400)
33
+ topics, probs = topic_model.fit_transform(documents)
34
+ topic_info = topic_model.get_topic_info()
35
+ return {
36
+ "topics": topic_info.to_dict(orient="records"),
37
+ "topic_assignments": topics
38
+ }