Spaces:

Hadiil
/

Web-ai-app

Sleeping

App Files Files Community

Hadiil commited on Apr 30

Commit

ead791f

verified ·

1 Parent(s): 53b0ba4

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -20

app.py CHANGED Viewed

@@ -22,6 +22,11 @@ import google.generativeai as genai
 from spellchecker import SpellChecker
 import nltk
 from nltk.tokenize import sent_tokenize
 # Configure logging
 logging.basicConfig(
@@ -31,7 +36,7 @@ logging.basicConfig(
 logger = logging.getLogger("cosmic_ai")
 # Set a custom NLTK data directory
-nltk_data_dir = os.getenv('NLTK_DATA_DIR', '/tmp/nltk_data')
 os.makedirs(nltk_data_dir, exist_ok=True)
 nltk.data.path.append(nltk_data_dir)
@@ -60,7 +65,9 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
 app.mount("/images", StaticFiles(directory="images"), name="images")
 # Gemini API Configuration
-API_KEY = "AIzaSyDtLhhmXpy8ubSGb84ImaxM_ywlL0l_8bo"  # Replace with your actual API key
 genai.configure(api_key=API_KEY)
 # Model configurations
@@ -101,6 +108,13 @@ spell = SpellChecker()
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
     try:
         logger.info(f"Loading model for task: {task}, model: {model_name or MODELS.get(task)}")
         start_time = time.time()
@@ -110,8 +124,8 @@ def load_model(task: str, model_name: str = None):
             return genai.GenerativeModel(model_to_load)
         if task == "visual-qa":
-            processor = ViltProcessor.from_pretrained(model_to_load)
-            model = ViltForQuestionAnswering.from_pretrained(model_to_load)
             device = "cuda" if torch.cuda.is_available() else "cpu"
             model.to(device)
@@ -130,8 +144,11 @@ def load_model(task: str, model_name: str = None):
             return vqa_function
-        # Use pipeline for summarization, image-to-text, and file-qa
-        return pipeline(task if task != "file-qa" else "question-answering", model=model_to_load)
     except Exception as e:
         logger.error(f"Model load failed: {str(e)}")
@@ -171,6 +188,7 @@ def translate_text(text: str, target_language: str):
         lang_code = SUPPORTED_LANGUAGES[target_lang]
         if translation_model is None or translation_tokenizer is None:
             raise Exception("Translation model not initialized")
         match = re.search(r'how to say\s+(.+?)\s+in\s+(\w+)', text.lower())
@@ -191,7 +209,11 @@ def translate_text(text: str, target_language: str):
             num_beams=1,
             early_stopping=True
         )
-        translated_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
         logger.info(f"Translation took {time.time() - start_time:.2f} seconds")
         return translated_text
@@ -208,7 +230,6 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
         text_lower = text.lower()
         filename = file.filename.lower() if file.filename else ""
-        # Check for file translation intent
         translate_patterns = [
             r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
             r'convert.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
@@ -222,7 +243,6 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
                     target_language = potential_lang.capitalize()
                     return "file-translate", target_language
-        # Image-related intents
         content_type = file.content_type.lower() if file.content_type else ""
         if content_type.startswith('image/') and text:
             if "what’s this" in text_lower or "does this fly" in text_lower or ("fly" in text_lower and any(q in text_lower for q in ['does', 'can', 'will'])):
@@ -232,7 +252,6 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
             if "generate a caption" in text_lower or "caption" in text_lower:
                 return "image-to-text", target_language
-        # File-related intents
         if filename.endswith(('.xlsx', '.xls', '.csv')):
             return "visualize", target_language
         elif filename.endswith(('.pdf', '.docx', '.doc', '.txt', '.rtf')):
@@ -248,7 +267,6 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
     if any(keyword in text_lower for keyword in ['chat', 'talk', 'converse', 'ask gemini']):
         return "chatbot", target_language
-    # Text translation intent
     translate_patterns = [
         r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
         r'convert.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
@@ -364,7 +382,11 @@ async def process_input(
                         max_length=512,
                         num_beams=1
                     )
-                    translations[lang] = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
                 response = "\n".join(f"{lang.capitalize()}: {translations[lang]}" for lang in translations)
                 logger.info(f"Translated to all supported languages: {', '.join(translations.keys())}")
                 return {"response": response, "type": "translation"}
@@ -382,7 +404,6 @@ async def process_input(
             if not content.strip():
                 raise HTTPException(status_code=400, detail="No text could be extracted from the file")
-            # Split content into chunks to handle large files
             max_chunk_size = 512
             chunks = [content[i:i+max_chunk_size] for i in range(0, len(content), max_chunk_size)]
             translated_chunks = []
@@ -511,15 +532,12 @@ async def process_input(
                 if not answer.endswith(('.', '!', '?')):
                     answer += '.'
-            # Check if the question asks for a specific, factual detail like color
             factual_questions = ['color', 'size', 'number', 'how many', 'what is the']
             is_factual = any(keyword in question.lower() for keyword in factual_questions)
             if is_factual:
-                # Return the raw VQA answer for factual questions
                 final_answer = answer
             else:
-                # Apply cosmic tone for non-factual, open-ended questions
                 chatbot = load_model("chatbot")
                 if "fly" in question.lower():
                     final_answer = chatbot.generate_content(f"Make this fun and spacey: {answer}").text.strip()
@@ -570,7 +588,22 @@ async def process_input(
             if not content.strip():
                 raise HTTPException(status_code=400, detail="No text could be extracted from the file")
-            qa_pipeline = load_model("file-qa")
             question = text.strip()
             if not question.endswith('?'):
@@ -800,7 +833,7 @@ async def startup_event():
     async def load_model_with_timeout(task):
         try:
-            await asyncio.wait_for(asyncio.to_thread(load_model, task), timeout=60.0)
             logger.info(f"Successfully loaded {task} model")
         except asyncio.TimeoutError:
             logger.warning(f"Timeout loading {task} model - will load on demand")
@@ -809,8 +842,8 @@ async def startup_event():
     try:
         model_name = MODELS["translation"]
-        translation_model = M2M100ForConditionalGeneration.from_pretrained(model_name)
-        translation_tokenizer = M2M100Tokenizer.from_pretrained(model_name)
         device = "cuda" if torch.cuda.is_available() else "cpu"
         translation_model.to(device)
         logger.info("Translation model pre-loaded successfully")

 from spellchecker import SpellChecker
 import nltk
 from nltk.tokenize import sent_tokenize
+from dotenv import load_dotenv
+import shutil
+# Load environment variables
+load_dotenv()
 # Configure logging
 logging.basicConfig(
 logger = logging.getLogger("cosmic_ai")
 # Set a custom NLTK data directory
+nltk_data_dir = os.getenv('NLTK_DATA_DIR', '/cache/nltk_data')
 os.makedirs(nltk_data_dir, exist_ok=True)
 nltk.data.path.append(nltk_data_dir)
 app.mount("/images", StaticFiles(directory="images"), name="images")
 # Gemini API Configuration
+API_KEY = os.getenv(AIzaSyDtLhhmXpy8ubSGb84ImaxM_ywlL0l_8bo')
+if not API_KEY:
+    raise ValueError("GEMINI_API_KEY environment variable is not set")
 genai.configure(api_key=API_KEY)
 # Model configurations
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
     try:
+        cache_dir = os.getenv('HF_HOME', '/cache/huggingface')
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir, exist_ok=True)
+        elif not os.access(cache_dir, os.W_OK):
+            logger.warning(f"Cache directory {cache_dir} is not writable. Attempting to clear cache.")
+            shutil.rmtree(cache_dir, ignore_errors=True)
+            os.makedirs(cache_dir, exist_ok=True)
         logger.info(f"Loading model for task: {task}, model: {model_name or MODELS.get(task)}")
         start_time = time.time()
             return genai.GenerativeModel(model_to_load)
         if task == "visual-qa":
+            processor = ViltProcessor.from_pretrained(model_to_load, cache_dir=cache_dir)
+            model = ViltForQuestionAnswering.from_pretrained(model_to_load, cache_dir=cache_dir)
             device = "cuda" if torch.cuda.is_available() else "cpu"
             model.to(device)
             return vqa_function
+        return pipeline(
+            task if task != "file-qa" else "question-answering",
+            model=model_to_load,
+            cache_dir=cache_dir
+        )
     except Exception as e:
         logger.error(f"Model load failed: {str(e)}")
         lang_code = SUPPORTED_LANGUAGES[target_lang]
         if translation_model is None or translation_tokenizer is None:
+           Debugger cannot access local variable 'lang_code' before it was used
             raise Exception("Translation model not initialized")
         match = re.search(r'how to say\s+(.+?)\s+in\s+(\w+)', text.lower())
             num_beams=1,
             early_stopping=True
         )
+        translated_text = translation_tokenizer.batch_decode(
+            generated_tokens,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )[0]
         logger.info(f"Translation took {time.time() - start_time:.2f} seconds")
         return translated_text
         text_lower = text.lower()
         filename = file.filename.lower() if file.filename else ""
         translate_patterns = [
             r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
             r'convert.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
                     target_language = potential_lang.capitalize()
                     return "file-translate", target_language
         content_type = file.content_type.lower() if file.content_type else ""
         if content_type.startswith('image/') and text:
             if "what’s this" in text_lower or "does this fly" in text_lower or ("fly" in text_lower and any(q in text_lower for q in ['does', 'can', 'will'])):
             if "generate a caption" in text_lower or "caption" in text_lower:
                 return "image-to-text", target_language
         if filename.endswith(('.xlsx', '.xls', '.csv')):
             return "visualize", target_language
         elif filename.endswith(('.pdf', '.docx', '.doc', '.txt', '.rtf')):
     if any(keyword in text_lower for keyword in ['chat', 'talk', 'converse', 'ask gemini']):
         return "chatbot", target_language
     translate_patterns = [
         r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
         r'convert.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
                         max_length=512,
                         num_beams=1
                     )
+                    translations[lang] = translation_tokenizer.batch_decode(
+                        generated_tokens,
+                        skip_special_tokens=True,
+                        clean_up_tokenization_spaces=False
+                    )[0]
                 response = "\n".join(f"{lang.capitalize()}: {translations[lang]}" for lang in translations)
                 logger.info(f"Translated to all supported languages: {', '.join(translations.keys())}")
                 return {"response": response, "type": "translation"}
             if not content.strip():
                 raise HTTPException(status_code=400, detail="No text could be extracted from the file")
             max_chunk_size = 512
             chunks = [content[i:i+max_chunk_size] for i in range(0, len(content), max_chunk_size)]
             translated_chunks = []
                 if not answer.endswith(('.', '!', '?')):
                     answer += '.'
             factual_questions = ['color', 'size', 'number', 'how many', 'what is the']
             is_factual = any(keyword in question.lower() for keyword in factual_questions)
             if is_factual:
                 final_answer = answer
             else:
                 chatbot = load_model("chatbot")
                 if "fly" in question.lower():
                     final_answer = chatbot.generate_content(f"Make this fun and spacey: {answer}").text.strip()
             if not content.strip():
                 raise HTTPException(status_code=400, detail="No text could be extracted from the file")
+            try:
+                qa_pipeline = load_model("file-qa")
+            except Exception as e:
+                logger.warning(f"File-QA model failed: {str(e)}. Falling back to Gemini.")
+                question = text.strip()
+                if not question.endswith('?'):
+                    question += '?'
+                response = get_gemini_response(f"Answer this question based on the following text: {content}\nQuestion: {question}")
+                return {
+                    "response": response,
+                    "type": "file_qa",
+                    "additional_data": {
+                        "question": text,
+                        "file_name": file.filename
+                    }
+                }
             question = text.strip()
             if not question.endswith('?'):
     async def load_model_with_timeout(task):
         try:
+            await asyncio.wait_for(asyncio.to_thread(load_model, task), timeout=120.0)
             logger.info(f"Successfully loaded {task} model")
         except asyncio.TimeoutError:
             logger.warning(f"Timeout loading {task} model - will load on demand")
     try:
         model_name = MODELS["translation"]
+        translation_model = M2M100ForConditionalGeneration.from_pretrained(model_name, cache_dir=os.getenv('HF_HOME'))
+        translation_tokenizer = M2M100Tokenizer.from_pretrained(model_name, cache_dir=os.getenv('HF_HOME'))
         device = "cuda" if torch.cuda.is_available() else "cpu"
         translation_model.to(device)
         logger.info("Translation model pre-loaded successfully")