Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request
|
2 |
+
from fastapi.responses import JSONResponse
|
3 |
+
from bertopic import BERTopic
|
4 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
5 |
+
from sklearn.decomposition import TruncatedSVD
|
6 |
+
from sklearn.cluster import KMeans
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
vectorizer_model = CountVectorizer()
|
10 |
+
dimensionality_model = TruncatedSVD(n_components=5)
|
11 |
+
clustering_model = KMeans(n_clusters=5, random_state=42)
|
12 |
+
|
13 |
+
topic_model = BERTopic(
|
14 |
+
vectorizer_model=vectorizer_model,
|
15 |
+
umap_model=dimensionality_model,
|
16 |
+
hdbscan_model=clustering_model
|
17 |
+
)
|
18 |
+
|
19 |
+
app = FastAPI()
|
20 |
+
|
21 |
+
@app.post("/predict")
|
22 |
+
async def predict(request: Request):
|
23 |
+
data = await request.json()
|
24 |
+
if "text" in data:
|
25 |
+
text = data["text"]
|
26 |
+
elif "data" in data and isinstance(data["data"], list):
|
27 |
+
text = data["data"][0]
|
28 |
+
else:
|
29 |
+
return JSONResponse({"error": "No input text provided."}, status_code=400)
|
30 |
+
documents = [doc.strip() for doc in text.split("\n") if doc.strip()]
|
31 |
+
if not documents:
|
32 |
+
return JSONResponse({"error": "No valid input."}, status_code=400)
|
33 |
+
topics, probs = topic_model.fit_transform(documents)
|
34 |
+
topic_info = topic_model.get_topic_info()
|
35 |
+
return {
|
36 |
+
"topics": topic_info.to_dict(orient="records"),
|
37 |
+
"topic_assignments": topics
|
38 |
+
}
|