Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on 10 days ago

Commit

1a6d882

•

1 Parent(s): 3d61dca

Update main.py

Browse files

Files changed (1) hide show

main.py +24 -21

main.py CHANGED Viewed

@@ -370,7 +370,7 @@ async def fast_classify_text(statement: str = Form(...)):
         # Handle general errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
-# Labels for main and sub classifications
 main_labels = [
     "Change to quote",
     "Copy quote requested",
@@ -378,13 +378,6 @@ main_labels = [
     "Notes not clear"
 ]
-sub_labels = [
-    "MRSP",
-    "Direct",
-    "All",
-    "MRSP & All"
-]
 # Define a model for the response
 class ClassificationResponse(BaseModel):
     classification: str
@@ -395,11 +388,24 @@ class ClassificationResponse(BaseModel):
 # Keyword dictionaries for overriding classifications
 change_to_quote_keywords = ["ATP", "Add", "Revised", "Per", "Remove", "Advise"]
 copy_quote_requested_keywords = ["MSRP", "Quote", "Send", "Copy", "All pricing", "Retail"]
 # Helper function to check for keywords in a case-insensitive way
 def check_keywords(statement: str, keywords: List[str]) -> bool:
     return any(re.search(rf"\b{keyword}\b", statement, re.IGNORECASE) for keyword in keywords)
 @app.post("/classify_with_subcategory/", response_model=ClassificationResponse, description="Classify text into main categories with subcategories.")
 async def classify_with_subcategory(statement: str = Form(...)) -> ClassificationResponse:
     try:
@@ -422,25 +428,22 @@ async def classify_with_subcategory(statement: str = Form(...)) -> Classificatio
             main_best_label = main_classification_result["labels"][0]
             main_best_score = main_classification_result["scores"][0]
-        # Perform sub-classification regardless of how main classification was determined
-        loop = asyncio.get_running_loop()
-        sub_classification_result = await loop.run_in_executor(
-            None,
-            lambda: nlp_sequence_classification(statement, sub_labels, multi_label=True)
-        )
-        # Extract all sub classification scores
-        sub_scores = dict(zip(sub_classification_result["labels"], sub_classification_result["scores"]))
-        # Determine the best sub classification label
-        best_sub_label = sub_classification_result["labels"][0] if sub_classification_result["labels"] else "None"
-        best_sub_score = sub_classification_result["scores"][0] if sub_classification_result["scores"] else 0.0
         return ClassificationResponse(
             classification=main_best_label,
             sub_classification=best_sub_label,
             confidence=main_best_score,
-            scores={"main": main_best_score, **sub_scores}
         )
     except asyncio.TimeoutError:

         # Handle general errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
+# Labels for main classifications
 main_labels = [
     "Change to quote",
     "Copy quote requested",
     "Notes not clear"
 ]
 # Define a model for the response
 class ClassificationResponse(BaseModel):
     classification: str
 # Keyword dictionaries for overriding classifications
 change_to_quote_keywords = ["ATP", "Add", "Revised", "Per", "Remove", "Advise"]
 copy_quote_requested_keywords = ["MSRP", "Quote", "Send", "Copy", "All pricing", "Retail"]
+sub_classification_keywords = {
+    "MRSP": ["MSRP"],
+    "Direct": ["Direct"],
+    "All": ["All pricing"],
+    "MRSP & All": ["MSRP", "All pricing"]
+}
 # Helper function to check for keywords in a case-insensitive way
 def check_keywords(statement: str, keywords: List[str]) -> bool:
     return any(re.search(rf"\b{keyword}\b", statement, re.IGNORECASE) for keyword in keywords)
+# Function to determine sub-classification based on keywords
+def get_sub_classification(statement: str) -> str:
+    for sub_label, keywords in sub_classification_keywords.items():
+        if all(check_keywords(statement, [keyword]) for keyword in keywords):
+            return sub_label
+    return "None"  # Default to "None" if no keywords match
 @app.post("/classify_with_subcategory/", response_model=ClassificationResponse, description="Classify text into main categories with subcategories.")
 async def classify_with_subcategory(statement: str = Form(...)) -> ClassificationResponse:
     try:
             main_best_label = main_classification_result["labels"][0]
             main_best_score = main_classification_result["scores"][0]
+        # Perform sub-classification only if the main classification is "Copy quote requested"
+        if main_best_label == "Copy quote requested":
+            best_sub_label = get_sub_classification(statement)
+        else:
+            best_sub_label = "None"
+        # Gather the scores for response
+        scores = {"main": main_best_score}
+        if best_sub_label != "None":
+            scores[best_sub_label] = 1.0  # Assign full confidence to sub-classification matches
         return ClassificationResponse(
             classification=main_best_label,
             sub_classification=best_sub_label,
             confidence=main_best_score,
+            scores=scores
         )
     except asyncio.TimeoutError: