Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on 10 days ago

Commit

f0e6e2e

•

1 Parent(s): fd35e4e

Update main.py

Browse files

Files changed (1) hide show

main.py +36 -37

main.py CHANGED Viewed

@@ -406,54 +406,53 @@ def get_sub_classification(statement: str) -> str:
             return sub_label
     return "None"  # Default to "None" if no keywords match
-@app.post("/classify_with_subcategory/", response_model=ClassificationResponse, description="Classify text into main categories with subcategories.")
-async def classify_with_subcategory(statement: str = Form(...)) -> ClassificationResponse:
     try:
-        # Keyword-based classification override
-        if check_keywords(statement, change_to_quote_keywords):
-            main_best_label = "Change to quote"
-            main_best_score = 1.0  # High confidence since it's a direct match
-        elif check_keywords(statement, copy_quote_requested_keywords):
-            main_best_label = "Copy quote requested"
-            main_best_score = 1.0
         else:
-            # If no keywords matched, perform the main classification using the model
             loop = asyncio.get_running_loop()
-            main_classification_result = await loop.run_in_executor(
-                None,
-                lambda: nlp_sequence_classification(statement, main_labels, multi_label=False)
             )
-            # Extract the best main classification label and confidence score
-            main_best_label = main_classification_result["labels"][0]
-            main_best_score = main_classification_result["scores"][0]
-        # Perform sub-classification only if the main classification is "Copy quote requested"
-        if main_best_label == "Copy quote requested":
-            best_sub_label = get_sub_classification(statement)
-        else:
-            best_sub_label = "None"
-        # Gather the scores for response
-        scores = {"main": main_best_score}
-        if best_sub_label != "None":
-            scores[best_sub_label] = 1.0  # Assign full confidence to sub-classification matches
-        return ClassificationResponse(
-            classification=main_best_label,
-            sub_classification=best_sub_label,
-            confidence=main_best_score,
-            scores=scores
-        )
     except asyncio.TimeoutError:
-        # Handle timeout errors
         return JSONResponse(content="Classification timed out. Try a shorter input or increase timeout.", status_code=504)
     except HTTPException as http_exc:
-        # Handle HTTP errors
         return JSONResponse(content=f"HTTP error: {http_exc.detail}", status_code=http_exc.status_code)
     except Exception as e:
-        # Handle any other errors
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
 # Set up CORS middleware

             return sub_label
     return "None"  # Default to "None" if no keywords match
+@app.post("/classify_with_subcategory/", description="Quickly classify text into predefined categories.")
+async def fast_classify_text(statement: str = Form(...)):
     try:
+        # Check for empty or "N/A" statements
+        if not statement or statement.strip().lower() == "n/a":
+            return {"classification": "Note not clear", "confidence": 1.0, "sub_classification": "None", "scores": {}}
+        # Determine main classification based on keywords
+        if any(keyword.lower() in statement.lower() for keyword in change_to_quote_keywords):
+            main_classification = "Change to Quote"
+            sub_classification = "None"
+        elif any(keyword.lower() in statement.lower() for keyword in copy_quote_requested_keywords):
+            main_classification = "Copy Quote Requested"
+            # Perform sub-classification for Copy Quote Requested
+            if "msrp" in statement.lower():
+                sub_classification = "MRSP"
+            elif "all pricing" in statement.lower():
+                sub_classification = "All"
+            elif "direct" in statement.lower():
+                sub_classification = "Direct"
+            else:
+                sub_classification = "None"  # No sub-classification when keywords don’t match
         else:
+            # Call the Hugging Face model for cases where keywords don’t match
             loop = asyncio.get_running_loop()
+            result = await loop.run_in_executor(
+                executor,
+                lambda: nlp_sequence_classification(statement, labels, multi_label=False)
             )
+            main_classification = result["labels"][0]
+            main_confidence = result["scores"][0]
+            scores = dict(zip(result["labels"], result["scores"]))
+            sub_classification = "None"  # Set sub-classification to None for non-matching keywords
+            return {
+                "classification": main_classification,
+                "confidence": main_confidence,
+                "sub_classification": sub_classification,
+                "scores": scores
+            }
     except asyncio.TimeoutError:
         return JSONResponse(content="Classification timed out. Try a shorter input or increase timeout.", status_code=504)
     except HTTPException as http_exc:
         return JSONResponse(content=f"HTTP error: {http_exc.detail}", status_code=http_exc.status_code)
     except Exception as e:
         return JSONResponse(content=f"Error in classification pipeline: {str(e)}", status_code=500)
 # Set up CORS middleware