Spaces:

deenasun
/

ai-sl-api

Running

App Files Files Community

deenasun commited on Jun 23

Commit

f37f939

1 Parent(s): 8da3927

fix for catching Gradio DataFile objects when they are passed from API calls as strings

Browse files

Files changed (5) hide show

__pycache__/asl_gloss.cpython-311.pyc +0 -0
__pycache__/document_parsing.cpython-311.pyc +0 -0
app.py +17 -9
asl_gloss.py +0 -1
document_parsing.py +55 -1

__pycache__/asl_gloss.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/asl_gloss.cpython-311.pyc and b/__pycache__/asl_gloss.cpython-311.pyc differ

__pycache__/document_parsing.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/document_parsing.cpython-311.pyc and b/__pycache__/document_parsing.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -199,7 +199,8 @@ def cleanup_temp_video(file_path):
 def determine_input_type(input_data):
     """
     Determine the type of input data and return a standardized format.
-    Returns: (input_type, processed_data) where input_type is 'text', 'file_path', or 'file_object'
     """
     if isinstance(input_data, str):
         # Check if it's a file path (contains file extension)
@@ -209,11 +210,20 @@ def determine_input_type(input_data):
         elif input_data.startswith('{') and 'gradio.FileData' in input_data:
             try:
                 import ast
-                # Safely evaluate the string as a dictionary
-                file_data = ast.literal_eval(input_data)
                 if isinstance(file_data, dict) and 'path' in file_data:
                     return 'file_path', file_data['path']
-            except (ValueError, SyntaxError):
                 pass
         else:
             return 'text', input_data.strip()
@@ -255,9 +265,7 @@ def process_input(input_data):
 async def parse_vectorize_and_search_unified(input_data):
     """
     Unified function that handles both text and file inputs
-    """
-    print(f"Input type: {type(input_data)}")
     # Process the input to get gloss
     gloss = process_input(input_data)
     if not gloss:
@@ -356,7 +364,6 @@ def predict_unified(input_data):
                 "message": "Please provide text or upload a document"
             }, None
-        print("Input", input_data, type(input_data))
         # Use the unified processing function
         result = parse_vectorize_and_search_unified_sync(input_data)
@@ -444,7 +451,8 @@ def predict(text, file):
             "message": "Please provide either text or upload a file"
         }, None
-    print("Input", input_data)
     # Process using the unified function
     return predict_unified(input_data)

 def determine_input_type(input_data):
     """
     Determine the type of input data and return a standardized format.
+    Returns: (input_type, processed_data) where input_type is 'text',
+    'file_path', or 'file_object'
     """
     if isinstance(input_data, str):
         # Check if it's a file path (contains file extension)
         elif input_data.startswith('{') and 'gradio.FileData' in input_data:
             try:
                 import ast
+                import json
+                # Try to parse as JSON first
+                try:
+                    file_data = json.loads(input_data)
+                except json.JSONDecodeError:
+                    # Fall back to ast.literal_eval for safer parsing
+                    file_data = ast.literal_eval(input_data)
                 if isinstance(file_data, dict) and 'path' in file_data:
+                    print(f"Parsed FileData: {file_data}")
                     return 'file_path', file_data['path']
+            except (ValueError, SyntaxError, json.JSONDecodeError) as e:
+                print(f"Error parsing FileData string: {e}")
+                print(f"Input data: {input_data}")
                 pass
         else:
             return 'text', input_data.strip()
 async def parse_vectorize_and_search_unified(input_data):
     """
     Unified function that handles both text and file inputs
+    """
     # Process the input to get gloss
     gloss = process_input(input_data)
     if not gloss:
                 "message": "Please provide text or upload a document"
             }, None
         # Use the unified processing function
         result = parse_vectorize_and_search_unified_sync(input_data)
             "message": "Please provide either text or upload a file"
         }, None
+    print("Input to the prediction function", input_data)
+    print("Input type:", type(input))
     # Process using the unified function
     return predict_unified(input_data)

asl_gloss.py CHANGED Viewed

@@ -10,7 +10,6 @@ that preserves the spatial and grammatical structure of ASL.
 import os
 import sys
 import argparse
-import json
 from typing import Optional, Dict, Any
 from pathlib import Path

 import os
 import sys
 import argparse
 from typing import Optional, Dict, Any
 from pathlib import Path

document_parsing.py CHANGED Viewed

@@ -66,7 +66,61 @@ class DocumentParser:
             '.doc': 'application/msword',
             '.epub': 'application/epub+zip'
         }
-        return extension_map.get(extension, 'unknown')
     def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
         """

             '.doc': 'application/msword',
             '.epub': 'application/epub+zip'
         }
+        mime_type = extension_map.get(extension, 'unknown')
+        # If no extension or unknown extension, try to detect by content
+        if mime_type == 'unknown':
+            mime_type = self._detect_mime_by_content(file_path)
+        return mime_type
+    def _detect_mime_by_content(self, file_path: Union[str, Path]) -> str:
+        """
+        Detect MIME type by reading file content.
+        Args:
+            file_path: Path to the file
+        Returns:
+            MIME type string
+        """
+        try:
+            with open(file_path, 'rb') as f:
+                # Read first 1024 bytes to detect file type
+                header = f.read(1024)
+                # PDF detection
+                if header.startswith(b'%PDF'):
+                    return 'application/pdf'
+                # ZIP-based formats (DOCX, EPUB)
+                if header.startswith(b'PK\x03\x04'):
+                    # Check if it's EPUB by looking for mimetype file
+                    try:
+                        import zipfile
+                        with zipfile.ZipFile(file_path, 'r') as zf:
+                            if 'mimetype' in zf.namelist():
+                                with zf.open('mimetype') as mf:
+                                    mimetype = mf.read().decode('utf-8').strip()
+                                    if mimetype == 'application/epub+zip':
+                                        return 'application/epub+zip'
+                        # If not EPUB, assume DOCX
+                        return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+                    except:
+                        pass
+                # Plain text detection (try to decode as UTF-8)
+                try:
+                    header.decode('utf-8')
+                    return 'text/plain'
+                except UnicodeDecodeError:
+                    pass
+        except Exception as e:
+            logger.warning(f"Error detecting MIME type by content: {e}")
+        return 'unknown'
     def extract_text(self, file_path: Union[str, Path]) -> Optional[str]:
         """