Spaces:

Hadiil
/

Web-ai-app

Sleeping

App Files Files Community

Hadiil commited on Apr 29

Commit

e047fcf

verified ·

1 Parent(s): 4f55411

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -19

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ import torch
 import numpy as np
 from pydantic import BaseModel
 import asyncio
 from spellchecker import SpellChecker
 import nltk
 from nltk.tokenize import sent_tokenize
@@ -42,13 +43,13 @@ except Exception as e:
     logger.error(f"Error verifying NLTK punkt_tab: {str(e)}")
     raise Exception(f"Failed to verify NLTK punkt_tab: {str(e)}")
-# Create upload directory if it doesn't exist
 upload_dir = os.getenv('UPLOAD_DIR', '/tmp/uploads')
 os.makedirs(upload_dir, exist_ok=True)
 app = FastAPI(
     title="Cosmic AI Assistant",
-    description="An advanced AI assistant with space-themed interface, translation, summarization, image analysis, and file question-answering features",
     version="2.0.0"
 )
@@ -58,11 +59,16 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
 # Mount images directory
 app.mount("/images", StaticFiles(directory="images"), name="images")
 # Model configurations
 MODELS = {
     "summarization": "sshleifer/distilbart-cnn-12-6",
     "image-to-text": "Salesforce/blip-image-captioning-large",
     "visual-qa": "dandelin/vilt-b32-finetuned-vqa",
     "translation": "facebook/m2m100_418M",
     "file-qa": "distilbert-base-cased-distilled-squad"
 }
@@ -90,7 +96,7 @@ translation_tokenizer = None
 # Initialize spell checker
 spell = SpellChecker()
-# Cache for model loading
 @lru_cache(maxsize=8)
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
@@ -100,6 +106,9 @@ def load_model(task: str, model_name: str = None):
         model_to_load = model_name or MODELS.get(task)
         if task == "visual-qa":
             processor = ViltProcessor.from_pretrained(model_to_load)
             model = ViltForQuestionAnswering.from_pretrained(model_to_load)
@@ -128,6 +137,21 @@ def load_model(task: str, model_name: str = None):
         logger.error(f"Model load failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Model loading failed: {task} - {str(e)}")
 def translate_text(text: str, target_language: str):
     """Translate text to any target language using pre-loaded M2M100 model"""
     if not text:
@@ -217,10 +241,13 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
             return "summarize", target_language
     if not text:
-        return "summarize", target_language
     text_lower = text.lower()
     # Text translation intent
     translate_patterns = [
         r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
@@ -237,7 +264,7 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
                 return "translate", target_language
             else:
                 logger.warning(f"Invalid language detected: {potential_lang}")
-                return "summarize", target_language
     vqa_patterns = [
         r'how (many|much)',
@@ -273,7 +300,7 @@ def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
     if len(text) > 100:
         return "summarize", target_language
-    return "summarize", target_language
 def preprocess_text(text: str) -> str:
     """Correct spelling errors and improve text readability."""
@@ -288,13 +315,29 @@ class ProcessResponse(BaseModel):
     type: str
     additional_data: Optional[Dict[str, Any]] = None
 @app.post("/process", response_model=ProcessResponse)
 async def process_input(
     request: Request,
     text: str = Form(None),
     file: UploadFile = File(None)
 ):
-    """Enhanced unified endpoint for summarization, translation, image analysis, and file QA"""
     start_time = time.time()
     client_ip = request.client.host
     logger.info(f"Request from {client_ip}: text={text[:50] + '...' if text and len(text) > 50 else text}, file={file.filename if file else None}")
@@ -303,7 +346,11 @@ async def process_input(
     logger.info(f"Detected intent: {intent}, target_language: {target_language}")
     try:
-        if intent == "translate":
             content = await extract_text_from_file(file) if file else text
             if "all languages" in text.lower():
                 translations = {}
@@ -401,6 +448,12 @@ async def process_input(
                     final_summary = summary[0]['summary_text']
                 final_summary = re.sub(r'\s+', ' ', final_summary).strip()
                 if not final_summary.endswith(('.', '!', '?')):
                     final_summary += '.'
@@ -409,7 +462,10 @@ async def process_input(
             except Exception as e:
                 logger.error(f"Summarization error: {str(e)}")
-                raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}")
         elif intent == "image-to-text":
             if not file or not file.content_type.startswith('image/'):
@@ -441,7 +497,10 @@ async def process_input(
             if not question.endswith('?'):
                 question += '?'
-            answer = vqa_pipeline(image=image, question=question)
             answer = answer.strip()
             if not answer or answer.lower() == question.lower():
@@ -452,10 +511,25 @@ async def process_input(
                 if not answer.endswith(('.', '!', '?')):
                     answer += '.'
-            logger.info(f"Final VQA answer: {answer}")
             return {
-                "response": answer,
                 "type": "visual_qa",
                 "additional_data": {
                     "question": text,
@@ -481,11 +555,10 @@ async def process_input(
             return {"response": response, "type": "visualization_code"}
         elif intent == "text-generation":
-            # Simulate text generation without Gemini
-            response = f"Generated text based on '{text}': This is a simulated creative text."
             lines = response.split(". ")
-            formatted_text = "\n".join(line.strip() + ("." if not line.endswith(".") else "") for line in lines if line)
-            return {"response": formatted_text, "type": "generated_text"}
         elif intent == "file-qa":
             if not file or not file.filename.lower().endswith(('.pdf', '.docx', '.doc', '.txt', '.rtf')):
@@ -522,10 +595,17 @@ async def process_input(
             if not best_answer.endswith(('.', '!', '?')):
                 best_answer += '.'
-            logger.info(f"File QA answer: {best_answer}")
             return {
-                "response": best_answer,
                 "type": "file_qa",
                 "additional_data": {
                     "question": text,
@@ -534,7 +614,8 @@ async def process_input(
             }
         else:
-            raise HTTPException(status_code=400, detail="Invalid intent detected")
     except Exception as e:
         logger.error(f"Processing error: {str(e)}", exc_info=True)
@@ -740,6 +821,7 @@ async def startup_event():
         load_model_with_timeout("summarization"),
         load_model_with_timeout("image-to-text"),
         load_model_with_timeout("visual-qa"),
         load_model_with_timeout("file-qa")
     )

 import numpy as np
 from pydantic import BaseModel
 import asyncio
+import google.generativeai as genai
 from spellchecker import SpellChecker
 import nltk
 from nltk.tokenize import sent_tokenize
     logger.error(f"Error verifying NLTK punkt_tab: {str(e)}")
     raise Exception(f"Failed to verify NLTK punkt_tab: {str(e)}")
+# Create app directory if it doesn't exist
 upload_dir = os.getenv('UPLOAD_DIR', '/tmp/uploads')
 os.makedirs(upload_dir, exist_ok=True)
 app = FastAPI(
     title="Cosmic AI Assistant",
+    description="An advanced AI assistant with space-themed interface, translation, and file question-answering features",
     version="2.0.0"
 )
 # Mount images directory
 app.mount("/images", StaticFiles(directory="images"), name="images")
+# Gemini API Configuration
+API_KEY = "AIzaSyDtLhhmXpy8ubSGb84ImaxM_ywlL0l_8bo"  # Replace with your actual API key
+genai.configure(api_key=API_KEY)
 # Model configurations
 MODELS = {
     "summarization": "sshleifer/distilbart-cnn-12-6",
     "image-to-text": "Salesforce/blip-image-captioning-large",
     "visual-qa": "dandelin/vilt-b32-finetuned-vqa",
+    "chatbot": "gemini-1.5-pro",
     "translation": "facebook/m2m100_418M",
     "file-qa": "distilbert-base-cased-distilled-squad"
 }
 # Initialize spell checker
 spell = SpellChecker()
+# Cache for model loading (excluding translation)
 @lru_cache(maxsize=8)
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
         model_to_load = model_name or MODELS.get(task)
+        if task == "chatbot":
+            return genai.GenerativeModel(model_to_load)
         if task == "visual-qa":
             processor = ViltProcessor.from_pretrained(model_to_load)
             model = ViltForQuestionAnswering.from_pretrained(model_to_load)
         logger.error(f"Model load failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Model loading failed: {task} - {str(e)}")
+def get_gemini_response(user_input: str, is_generation: bool = False):
+    """Function to generate response with Gemini for both chat and text generation"""
+    if not user_input:
+        return "Please provide some input."
+    try:
+        chatbot = load_model("chatbot")
+        if is_generation:
+            prompt = f"Generate creative text based on this prompt: {user_input}"
+        else:
+            prompt = user_input
+        response = chatbot.generate_content(prompt)
+        return response.text.strip()
+    except Exception as e:
+        return f"Error: {str(e)}"
 def translate_text(text: str, target_language: str):
     """Translate text to any target language using pre-loaded M2M100 model"""
     if not text:
             return "summarize", target_language
     if not text:
+        return "chatbot", target_language
     text_lower = text.lower()
+    if any(keyword in text_lower for keyword in ['chat', 'talk', 'converse', 'ask gemini']):
+        return "chatbot", target_language
     # Text translation intent
     translate_patterns = [
         r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
                 return "translate", target_language
             else:
                 logger.warning(f"Invalid language detected: {potential_lang}")
+                return "chatbot", target_language
     vqa_patterns = [
         r'how (many|much)',
     if len(text) > 100:
         return "summarize", target_language
+    return "chatbot", target_language
 def preprocess_text(text: str) -> str:
     """Correct spelling errors and improve text readability."""
     type: str
     additional_data: Optional[Dict[str, Any]] = None
+@app.get("/chatbot")
+async def chatbot_interface():
+    """Redirect to the static index.html file for the chatbot interface"""
+    return RedirectResponse(url="/static/index.html")
+@app.post("/chat")
+async def chat_endpoint(data: dict):
+    message = data.get("message", "")
+    if not message:
+        raise HTTPException(status_code=400, detail="No message provided")
+    try:
+        response = get_gemini_response(message)
+        return {"response": response}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
 @app.post("/process", response_model=ProcessResponse)
 async def process_input(
     request: Request,
     text: str = Form(None),
     file: UploadFile = File(None)
 ):
+    """Enhanced unified endpoint with dynamic translation and file translation"""
     start_time = time.time()
     client_ip = request.client.host
     logger.info(f"Request from {client_ip}: text={text[:50] + '...' if text and len(text) > 50 else text}, file={file.filename if file else None}")
     logger.info(f"Detected intent: {intent}, target_language: {target_language}")
     try:
+        if intent == "chatbot":
+            response = get_gemini_response(text)
+            return {"response": response, "type": "chat"}
+        elif intent == "translate":
             content = await extract_text_from_file(file) if file else text
             if "all languages" in text.lower():
                 translations = {}
                     final_summary = summary[0]['summary_text']
                 final_summary = re.sub(r'\s+', ' ', final_summary).strip()
+                if not final_summary or final_summary.lower().startswith(content.lower()[:30]):
+                    logger.warning("Summarizer produced inadequate output, falling back to Gemini")
+                    final_summary = get_gemini_response(
+                        f"Summarize this text in a concise and meaningful way: {content}"
+                    )
                 if not final_summary.endswith(('.', '!', '?')):
                     final_summary += '.'
             except Exception as e:
                 logger.error(f"Summarization error: {str(e)}")
+                final_summary = get_gemini_response(
+                    f"Summarize this text in a concise and meaningful way: {content}"
+                )
+                return {"response": final_summary, "type": "summary", "message": "Text was preprocessed to correct spelling errors"}
         elif intent == "image-to-text":
             if not file or not file.content_type.startswith('image/'):
             if not question.endswith('?'):
                 question += '?'
+            answer = vqa_pipeline(
+                image=image,
+                question=question
+            )
             answer = answer.strip()
             if not answer or answer.lower() == question.lower():
                 if not answer.endswith(('.', '!', '?')):
                     answer += '.'
+            # Check if the question asks for a specific, factual detail like color
+            factual_questions = ['color', 'size', 'number', 'how many', 'what is the']
+            is_factual = any(keyword in question.lower() for keyword in factual_questions)
+            if is_factual:
+                # Return the raw VQA answer for factual questions
+                final_answer = answer
+            else:
+                # Apply cosmic tone for non-factual, open-ended questions
+                chatbot = load_model("chatbot")
+                if "fly" in question.lower():
+                    final_answer = chatbot.generate_content(f"Make this fun and spacey: {answer}").text.strip()
+                else:
+                    final_answer = chatbot.generate_content(f"Make this cosmic and poetic: {answer}").text.strip()
+            logger.info(f"Final VQA answer: {final_answer}")
             return {
+                "response": final_answer,
                 "type": "visual_qa",
                 "additional_data": {
                     "question": text,
             return {"response": response, "type": "visualization_code"}
         elif intent == "text-generation":
+            response = get_gemini_response(text, is_generation=True)
             lines = response.split(". ")
+            formatted_poem = "\n".join(line.strip() + ("." if not line.endswith(".") else "") for line in lines if line)
+            return {"response": formatted_poem, "type": "generated_text"}
         elif intent == "file-qa":
             if not file or not file.filename.lower().endswith(('.pdf', '.docx', '.doc', '.txt', '.rtf')):
             if not best_answer.endswith(('.', '!', '?')):
                 best_answer += '.'
+            try:
+                chatbot = load_model("chatbot")
+                final_answer = chatbot.generate_content(f"Make this cosmic and poetic: {best_answer}").text.strip()
+            except Exception as e:
+                logger.warning(f"Failed to add cosmic tone: {str(e)}. Using raw answer.")
+                final_answer = best_answer
+            logger.info(f"File QA answer: {final_answer}")
             return {
+                "response": final_answer,
                 "type": "file_qa",
                 "additional_data": {
                     "question": text,
             }
         else:
+            response = get_gemini_response(text or "Hello! How can I assist you?")
+            return {"response": response, "type": "chat"}
     except Exception as e:
         logger.error(f"Processing error: {str(e)}", exc_info=True)
         load_model_with_timeout("summarization"),
         load_model_with_timeout("image-to-text"),
         load_model_with_timeout("visual-qa"),
+        load_model_with_timeout("chatbot"),
         load_model_with_timeout("file-qa")
     )