Chittrarasu commited on
Commit
9288345
·
1 Parent(s): c11261f
data/sms_process_data_main.xlsx ADDED
Binary file (42.2 kB). View file
 
main.py CHANGED
@@ -10,4 +10,4 @@ app.include_router(sms_router)
10
  @app.get("/")
11
  def home():
12
 
13
- return {"message": "Welcome to embedding sms API, use /docs to post SMS text and get demenstions"}
 
10
  @app.get("/")
11
  def home():
12
 
13
+ return {"message": "Welcome to embedding sms API, use /docs to test endpoints"}
routes/sms_router.py CHANGED
@@ -1,43 +1,47 @@
1
  from fastapi import APIRouter, HTTPException
2
- from schemas.schema import SMSRequest, EmbeddingResponse, SimilarityRequest, SimilarityResponse
 
 
 
 
 
 
 
3
  from service.embedded_service import generate_embeddings
4
- import numpy as np # Import NumPy for cosine similarity calculation
 
5
 
6
  # Initialize Router
7
  router = APIRouter()
8
 
9
  @router.post("/get_embeddings/", response_model=EmbeddingResponse)
10
  async def get_embeddings(sms_request: SMSRequest):
11
- # Check if the input list is not empty
12
  if not sms_request.messages:
13
  raise HTTPException(status_code=400, detail="No messages provided.")
14
 
15
- # Generate embeddings
16
  embeddings = generate_embeddings(sms_request.messages)
17
 
18
- # Check if embeddings are generated and are in the correct format
19
  if not embeddings or not all(isinstance(emb, list) for emb in embeddings):
20
  raise HTTPException(status_code=500, detail="Failed to generate embeddings.")
21
 
22
- # Get the dimensions from the first embedding (assuming all are the same)
23
  dimensions = len(embeddings[0]) if embeddings else 0
24
 
25
- # Return the response as per the EmbeddingResponse model
26
  return EmbeddingResponse(dimensions=dimensions, embeddings=embeddings)
27
 
28
  @router.post("/calculate_similarity/", response_model=SimilarityResponse)
29
  async def calculate_similarity(similarity_request: SimilarityRequest):
30
- # Get embeddings for both messages
31
  embeddings = generate_embeddings([similarity_request.message1, similarity_request.message2])
32
 
33
- # Check if embeddings are generated for both messages
34
  if len(embeddings) != 2:
35
  raise HTTPException(status_code=500, detail="Failed to generate embeddings for both messages.")
36
 
37
- # Calculate cosine similarity
38
  vec1 = np.array(embeddings[0])
39
  vec2 = np.array(embeddings[1])
40
  cosine_similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
41
 
42
- # Return response using the SimilarityResponse model
43
  return SimilarityResponse(similarity_score=float(cosine_similarity))
 
 
 
 
 
 
1
  from fastapi import APIRouter, HTTPException
2
+ from schemas.schema import (
3
+ SMSRequest,
4
+ EmbeddingResponse,
5
+ SimilarityRequest,
6
+ SimilarityResponse,
7
+ PredictionRequest,
8
+ PredictionResponse
9
+ )
10
  from service.embedded_service import generate_embeddings
11
+ from service.prediction_service import predict_label
12
+ import numpy as np
13
 
14
  # Initialize Router
15
  router = APIRouter()
16
 
17
  @router.post("/get_embeddings/", response_model=EmbeddingResponse)
18
  async def get_embeddings(sms_request: SMSRequest):
 
19
  if not sms_request.messages:
20
  raise HTTPException(status_code=400, detail="No messages provided.")
21
 
 
22
  embeddings = generate_embeddings(sms_request.messages)
23
 
 
24
  if not embeddings or not all(isinstance(emb, list) for emb in embeddings):
25
  raise HTTPException(status_code=500, detail="Failed to generate embeddings.")
26
 
 
27
  dimensions = len(embeddings[0]) if embeddings else 0
28
 
 
29
  return EmbeddingResponse(dimensions=dimensions, embeddings=embeddings)
30
 
31
  @router.post("/calculate_similarity/", response_model=SimilarityResponse)
32
  async def calculate_similarity(similarity_request: SimilarityRequest):
 
33
  embeddings = generate_embeddings([similarity_request.message1, similarity_request.message2])
34
 
 
35
  if len(embeddings) != 2:
36
  raise HTTPException(status_code=500, detail="Failed to generate embeddings for both messages.")
37
 
 
38
  vec1 = np.array(embeddings[0])
39
  vec2 = np.array(embeddings[1])
40
  cosine_similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
41
 
 
42
  return SimilarityResponse(similarity_score=float(cosine_similarity))
43
+
44
+ @router.post("/predict_label/", response_model=PredictionResponse)
45
+ async def predict_sms_label(prediction_request: PredictionRequest):
46
+ label, probability = predict_label(prediction_request.message)
47
+ return PredictionResponse(label=label, probability=probability)
schemas/schema.py CHANGED
@@ -25,4 +25,11 @@ class SimilarityRequest(BaseModel):
25
  message2: str
26
 
27
  class SimilarityResponse(BaseModel):
28
- similarity_score: float
 
 
 
 
 
 
 
 
25
  message2: str
26
 
27
  class SimilarityResponse(BaseModel):
28
+ similarity_score: float
29
+
30
+ class PredictionRequest(BaseModel):
31
+ message: str
32
+
33
+ class PredictionResponse(BaseModel):
34
+ label: str
35
+ probability: float
service/embedded_service.py CHANGED
@@ -5,4 +5,4 @@ def generate_embeddings(messages: list):
5
  # Generate embeddings
6
  embeddings = model.encode(messages)
7
  embeddings = np.array(embeddings).tolist() # Convert to list for JSON serialization
8
- return embeddings
 
5
  # Generate embeddings
6
  embeddings = model.encode(messages)
7
  embeddings = np.array(embeddings).tolist() # Convert to list for JSON serialization
8
+ return embeddings
service/prediction_service.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from sentence_transformers import SentenceTransformer
3
+ import numpy as np
4
+
5
+ # Load Model and Transformer
6
+ with open('models/logistic_regression_model.pkl', 'rb') as f:
7
+ logistic_model = pickle.load(f)
8
+
9
+ model = SentenceTransformer('models/sentence_transformer')
10
+
11
+ def predict_label(message: str):
12
+ embedding = model.encode([message])
13
+ prediction = logistic_model.predict(embedding)[0]
14
+ probability = logistic_model.predict_proba(embedding)[0].max()
15
+
16
+ return prediction, float(probability)
service/train_model.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.linear_model import LogisticRegression
4
+ from sklearn.metrics import accuracy_score, classification_report
5
+ import pickle
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ file_name = "data/sms_process_data_main.xlsx"
9
+ sheet = "Sheet1"
10
+ df = pd.read_excel(file_name, sheet_name=sheet)
11
+
12
+ X_train, X_test, y_train, y_test = train_test_split(df['MessageText'], df['label'], test_size=0.2, random_state=42)
13
+
14
+ model = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True)
15
+
16
+ X_train_embeddings = model.encode(X_train.tolist())
17
+ X_test_embeddings = model.encode(X_test.tolist())
18
+
19
+ logistic_model = LogisticRegression(max_iter=100)
20
+ logistic_model.fit(X_train_embeddings, y_train)
21
+
22
+ with open('models/logistic_regression_model.pkl', 'wb') as f:
23
+ pickle.dump(logistic_model, f)
24
+
25
+ model.save('models/sentence_transformer')