Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on 10 days ago

Commit

ab46adf

•

1 Parent(s): 31d9e37

Update main.py

Browse files

Files changed (1) hide show

main.py +66 -69

main.py CHANGED Viewed

@@ -19,6 +19,8 @@ import logging
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import re
 app = FastAPI()
@@ -368,91 +370,86 @@ async def fast_classify_text(statement: str = Form(...)):
         # Handle general errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
-# Labels for main classification
-labels = [
     "Change to quote",
     "Copy quote requested",
     "Expired Quote",
     "Notes not clear"
 ]
-# Keywords for sub-classifications
-keyword_map = {
-    "MRSP": ["MSRP", "MRSP copy quote", "msrp only"],
-    "Direct": ["Direct quote", "send directly"],
-    "All": ["All Pricing", "all pricing"],
-    "MRSP & All": ["MSRP & All Pricing", "msrp only with all pricing"]
-}
-# Function to detect if input is blank or vague
-def is_blank_or_vague(text):
-    # Checks for empty or only contains general filler words (adjust as needed)
-    return not text.strip() or re.match(r'^\s*(please|send|quote|request|thank you|thanks)\s*$', text, re.IGNORECASE)
-# Function to identify sub-classifications based on keywords
-def get_sub_classification(text):
-    sub_labels = []
-    for sub_class, keywords in keyword_map.items():
-        if any(keyword.lower() in text.lower() for keyword in keywords):
-            sub_labels.append(sub_class)
-    return sub_labels if sub_labels else ["Uncategorized"]
-@app.post("/classify_text/")
-async def classify_text(statement: str = Form(...)):
-    try:
-        # Handle blank or vague text as "Notes not clear"
-        if is_blank_or_vague(statement):
-            return {
-                "main_classification": {
-                    "label": "Notes not clear",
-                    "confidence": 1.0,
-                    "scores": {"Notes not clear": 1.0}
-                },
-                "sub_classification": {
-                    "labels": ["Uncategorized"],
-                    "scores": {"Uncategorized": 1.0}
-                }
-            }
-        # Run main classification in executor for async handling
-        loop = asyncio.get_running_loop()
-        main_classification_task = loop.run_in_executor(
-            None,
-            lambda: nlp_main_classification(statement, labels)
-        )
-        # Await result
-        main_class_result = await main_classification_task
-        # Extract main classification label and scores
-        main_class_scores = {label: score for label, score in zip(main_class_result["labels"], main_class_result["scores"])}
-        best_main_classification = main_class_result["labels"][0]
-        best_main_score = main_class_result["scores"][0]
-        # Detect sub-classifications using keywords
-        sub_classification = get_sub_classification(statement)
-        # Assign default high confidence for keyword-based sub-classification
-        sub_class_scores = {sub: 1.0 for sub in sub_classification}
-        # Return results
-        return {
-            "main_classification": {
-                "label": best_main_classification,
-                "confidence": best_main_score,
-                "scores": main_class_scores
-            },
-            "sub_classification": {
-                "labels": sub_classification,
-                "scores": sub_class_scores
-            }
-        }
     except asyncio.TimeoutError:
-        return JSONResponse(content="Classification timed out.", status_code=504)
     except HTTPException as http_exc:
         return JSONResponse(content=f"HTTP error: {http_exc.detail}", status_code=http_exc.status_code)
     except Exception as e:
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
 # Set up CORS middleware

 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import re
+from pydantic import BaseModel
+from typing import List, Dict, Any
 app = FastAPI()
         # Handle general errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
+# Labels for main and sub classifications
+main_labels = [
     "Change to quote",
     "Copy quote requested",
     "Expired Quote",
     "Notes not clear"
 ]
+sub_labels = [
+    "MRSP",
+    "Direct",
+    "All",
+    "MRSP & All"
+]
+# Define a model for the response
+class ClassificationResponse(BaseModel):
+    classification: str
+    sub_classification: str
+    confidence: float
+    scores: Dict[str, float]
+# Keyword dictionaries for overriding classifications
+change_to_quote_keywords = ["ATP", "Add", "Revised", "Per", "Remove", "Advise"]
+copy_quote_requested_keywords = ["MSRP", "Quote", "Send", "Copy", "All pricing", "Retail"]
+# Helper function to check for keywords in a case-insensitive way
+def check_keywords(statement: str, keywords: List[str]) -> bool:
+    return any(re.search(rf"\b{keyword}\b", statement, re.IGNORECASE) for keyword in keywords)
+@app.post("/classify_with_subcategory/", response_model=ClassificationResponse, description="Classify text into main categories with subcategories.")
+async def classify_with_subcategory(statement: str = Form(...)) -> ClassificationResponse:
+    try:
+        # Check for keyword-based classification
+        if check_keywords(statement, change_to_quote_keywords):
+            main_best_label = "Change to quote"
+            main_best_score = 1.0  # High confidence since it's a direct match
+        elif check_keywords(statement, copy_quote_requested_keywords):
+            main_best_label = "Copy quote requested"
+            main_best_score = 1.0
+        else:
+            # If no keywords matched, perform the main classification using the model
+            loop = asyncio.get_running_loop()
+            main_classification_result = await loop.run_in_executor(
+                None,
+                lambda: nlp_sequence_classification(statement, main_labels, multi_label=False)
+            )
+            # Extract the best main classification label and confidence score
+            main_best_label = main_classification_result["labels"][0]
+            main_best_score = main_classification_result["scores"][0]
+        # Perform sub-classification if main classification was successful
+        sub_classification_result = await loop.run_in_executor(
+            None,
+            lambda: nlp_sequence_classification(statement, sub_labels, multi_label=True)
+        )
+        # Extract all sub classification scores
+        sub_scores = dict(zip(sub_classification_result["labels"], sub_classification_result["scores"]))
+        # Determine the best sub classification label
+        best_sub_label = sub_classification_result["labels"][0] if sub_classification_result["labels"] else "None"
+        best_sub_score = sub_classification_result["scores"][0] if sub_classification_result["scores"] else 0.0
+        return ClassificationResponse(
+            classification=main_best_label,
+            sub_classification=best_sub_label,
+            confidence=main_best_score,
+            scores={"main": main_best_score, **sub_scores}
+        )
     except asyncio.TimeoutError:
+        # Handle timeout errors
+        return JSONResponse(content="Classification timed out. Try a shorter input or increase timeout.", status_code=504)
     except HTTPException as http_exc:
+        # Handle HTTP errors
         return JSONResponse(content=f"HTTP error: {http_exc.detail}", status_code=http_exc.status_code)
     except Exception as e:
+        # Handle any other errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
 # Set up CORS middleware