Spaces:

Hadiil
/

Web-ai-app

Sleeping

App Files Files Community

Hadiil commited on Apr 18

Commit

252f82c

verified ·

1 Parent(s): ebb75cd

Update app.py

Browse files

Files changed (1) hide show

app.py +616 -574

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse
 from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, M2M100ForConditionalGeneration, M2M100Tokenizer
-from typing import Optional, Dict, Any, List, Union
 import logging
 import time
 import os
@@ -19,12 +19,6 @@ import numpy as np
 from pydantic import BaseModel
 import asyncio
 import google.generativeai as genai
-import magic  # For MIME type detection
-import datetime
-import matplotlib
-matplotlib.use('Agg')  # Set non-interactive backend
-import matplotlib.pyplot as plt
-import seaborn as sns
 # Configure logging
 logging.basicConfig(
@@ -33,42 +27,28 @@ logging.basicConfig(
 )
 logger = logging.getLogger("cosmic_ai")
-# Initialize FastAPI app
-app = FastAPI(title="Cosmic AI Assistant", version="2.1.0")
-app.mount("/static", StaticFiles(directory="static"), name="static")
-app.mount("/images", StaticFiles(directory="images"), name="images")
-# Ensure directories exist
-UPLOAD_DIR = os.getenv("UPLOAD_DIR", "/app/uploads")
-CACHE_DIR = os.getenv("TRANSFORMERS_CACHE", "/app/cache")
-IMAGES_DIR = "/app/images"
-os.makedirs(CACHE_DIR, exist_ok=True)
-os.makedirs(UPLOAD_DIR, exist_ok=True)
-os.makedirs(IMAGES_DIR, exist_ok=True)
-# Configure Gemini
-API_KEY = os.getenv("GOOGLE_API_KEY", "AIzaSyCwmgD8KxzWiuivtySNtcZF_rfTvx9s9sY")
-genai.configure(api_key=API_KEY)
-# Language mapping for translation
-LANGUAGE_MAPPING = {
-    "english": "en",
-    "french": "fr",
-    "spanish": "es",
-    "german": "de",
-    "italian": "it",
-    "portuguese": "pt",
-    "russian": "ru",
-    "chinese": "zh",
-    "japanese": "ja",
-    "korean": "ko",
-    "arabic": "ar",
-    "hindi": "hi"
-}
-# Inverse language mapping for reference
-LANGUAGE_CODE_TO_NAME = {v: k.title() for k, v in LANGUAGE_MAPPING.items()}
 # Model configurations
 MODELS = {
@@ -77,148 +57,42 @@ MODELS = {
     "visual-qa": "dandelin/vilt-b32-finetuned-vqa",
     "chatbot": "gemini-1.5-pro",
     "translation": "facebook/m2m100_418M",
-    "question-answering": "distilbert-base-cased-distilled-squad",
-    "generate": "gemini-1.5-pro"
 }
-# Response model
-class ProcessResponse(BaseModel):
-    response: str
-    type: str
-    additional_data: Optional[Dict[str, Any]] = None
-# Intent detection with improved pattern matching and language detection
-def detect_intent(text: Optional[str], file: Optional[UploadFile]) -> tuple:
-    """
-    Detect user intent and target language from input
-    Returns a tuple of (intent, target_language)
-    """
-    if not text and not file:
-        return "unknown", "en"
-    text_lower = text.lower() if text else ""
-    # File-based intent detection
-    if file:
-        mime_type = file.content_type.lower() if hasattr(file, 'content_type') else ""
-        filename_lower = file.filename.lower() if hasattr(file, 'filename') else ""
-        # Image processing
-        if mime_type.startswith('image/'):
-            # Check if there's a specific question about the image
-            if text and any(phrase in text_lower for phrase in [
-                "what is", "how many", "does this", "is there", "can you see",
-                "what color", "identify", "explain"
-            ]):
-                return "visual-qa", "en"
-            else:
-                # Just caption the image if no specific question
-                return "image-to-text", "en"
-        # Data visualization for spreadsheets
-        elif any(mime_type.startswith(mt) for mt in ['text/csv', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml']) or \
-             any(filename_lower.endswith(ext) for ext in ['.csv', '.xls', '.xlsx']):
-            return "visualize", "en"
-        # Document processing
-        elif any(mime_type.startswith(mt) for mt in [
-            'application/pdf',
-            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-            'application/msword',
-            'text/plain',
-            'application/rtf',
-            'text/rtf'
-        ]) or any(filename_lower.endswith(ext) for ext in ['.pdf', '.docx', '.doc', '.txt', '.rtf']):
-            # If there's a specific question about the document
-            if text and ("?" in text or any(word in text_lower for word in ["what", "who", "how", "when", "where", "why", "which", "find", "search"])):
-                return "file-qa", "en"
-            # If translation is requested
-            elif text and any(keyword in text_lower for keyword in ["translate", "translation", "convert to"]):
-                # Extract target language
-                target_lang = "en"  # Default to English
-                # Check for language specification patterns
-                lang_pattern = r"to\s+(\w+)"
-                lang_match = re.search(lang_pattern, text_lower)
-                if lang_match:
-                    lang_name = lang_match.group(1).lower()
-                    if lang_name in LANGUAGE_MAPPING:
-                        target_lang = LANGUAGE_MAPPING[lang_name]
-                    # Check if it's a direct language code
-                    elif lang_name in LANGUAGE_MAPPING.values():
-                        target_lang = lang_name
-                return "translate", target_lang
-            # Default to summarization for documents without specific instructions
-            else:
-                return "summarize", "en"
-    # Text-based intent detection (no file)
-    # Translation intent
-    if any(keyword in text_lower for keyword in ["translate", "translation", "say in", "how to say"]):
-        # Try to extract target language
-        target_lang = "en"  # Default
-        # Check for language specification patterns
-        lang_patterns = [
-            r"to\s+(\w+)",
-            r"in\s+(\w+)",
-            r"into\s+(\w+)"
-        ]
-        for pattern in lang_patterns:
-            lang_match = re.search(pattern, text_lower)
-            if lang_match:
-                lang_name = lang_match.group(1).lower()
-                if lang_name in LANGUAGE_MAPPING:
-                    target_lang = LANGUAGE_MAPPING[lang_name]
-                    break
-                # Check if it's a direct language code
-                elif lang_name in LANGUAGE_MAPPING.values():
-                    target_lang = lang_name
-                    break
-        # Check for "all languages" request
-        if "all languages" in text_lower or "all supported languages" in text_lower:
-            target_lang = "all"
-        return "translate", target_lang
-    # Summarization intent
-    elif any(keyword in text_lower for keyword in [
-        "summarize", "summary", "overview", "brief", "condense", "shorten", "tldr"
-    ]) or (len(text) > 500 and not any(keyword in text_lower for keyword in ["write", "generate", "create"])):
-        return "summarize", "en"
-    # Text generation intent (creative writing)
-    elif any(keyword in text_lower for keyword in [
-        "write", "generate", "create", "compose", "draft", "story", "poem", "essay",
-        "script", "letter", "email", "article", "blog"
-    ]):
-        return "generate", "en"
-    # Default to chat
-    return "chat", "en"
-# Model loading with caching
 @lru_cache(maxsize=8)
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
     try:
-        logger.info(f"Loading model for task: {task}, model: {model_name or MODELS.get(task, 'unknown')}")
         start_time = time.time()
-        model_to_load = model_name or MODELS.get(task)
-        if not model_to_load:
-            raise ValueError(f"No model configured for task: {task}")
-        # Gemini models
-        if task == "chatbot" or task == "generate":
             return genai.GenerativeModel(model_to_load)
-        # Visual Question Answering
         if task == "visual-qa":
             processor = ViltProcessor.from_pretrained(model_to_load)
             model = ViltForQuestionAnswering.from_pretrained(model_to_load)
@@ -228,453 +102,621 @@ def load_model(task: str, model_name: str = None):
             def vqa_function(image, question, **generate_kwargs):
                 if image.mode != "RGB":
                     image = image.convert("RGB")
                 inputs = processor(image, question, return_tensors="pt").to(device)
                 logger.info(f"VQA inputs - question: {question}, image size: {image.size}")
                 with torch.no_grad():
                     outputs = model(**inputs)
-                    logits = outputs.logits
-                    idx = logits.argmax(-1).item()
-                    answer = model.config.id2label[idx]
                 logger.info(f"VQA raw output: {answer}")
                 return answer
             return vqa_function
-        # For most transformer models, use the standard pipeline
-        try:
-            if task == "translation":
-                # For translation, return both tokenizer and model
-                tokenizer = M2M100Tokenizer.from_pretrained(model_to_load)
-                model = M2M100ForConditionalGeneration.from_pretrained(model_to_load)
-                return tokenizer, model
-            else:
-                # Map task names to transformers pipeline tasks
-                task_mapping = {
-                    "summarization": "summarization",
-                    "question-answering": "question-answering",
-                    "image-to-text": "image-to-text"
-                }
-                pipeline_task = task_mapping.get(task, task)
-                return pipeline(pipeline_task, model=model_to_load)
-        except Exception as e:
-            logger.error(f"Pipeline creation failed for {task}: {str(e)}")
-            raise
-        logger.info(f"Model loaded in {time.time() - start_time:.2f}s")
     except Exception as e:
         logger.error(f"Model load failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Model loading failed: {task} - {str(e)}")
-# File text extraction with improved error handling and multiple format support
-async def extract_text_from_file(file: UploadFile) -> str:
-    """Extract text from uploaded file (PDF, DOCX, TXT, RTF)"""
-    filename = file.filename.lower() if hasattr(file, 'filename') else "unknown"
-    content = await file.read()
-    # Use Python-magic to detect MIME type
     try:
-        mime = magic.Magic(mime=True)
-        mime_type = mime.from_buffer(content)
     except Exception as e:
-        logger.warning(f"MIME detection failed: {str(e)}, using content_type")
-        mime_type = file.content_type if hasattr(file, 'content_type') else "application/octet-stream"
-    logger.info(f"Processing file: {filename}, size: {len(content)} bytes, MIME type: {mime_type}")
     try:
-        # PDF processing with fallback mechanisms
-        if mime_type == 'application/pdf' or filename.endswith('.pdf'):
-            try:
-                doc = fitz.open(stream=content, filetype="pdf")
-                text = ""
-                for page in doc:
-                    text += page.get_text()
-                doc.close()
-                if not text.strip():
-                    logger.warning(f"No text extracted from PDF: {filename}, attempting OCR fallback")
-                    raise ValueError("No text could be extracted from the PDF")
-                return text
-            except Exception as e:
-                logger.error(f"PyMuPDF failed for {filename}: {str(e)}")
-                # Could implement PDF OCR fallback here if needed
-                raise HTTPException(status_code=400, detail=f"Could not extract text from PDF: {str(e)}")
-        # Word document processing
-        elif any(mime_type.startswith(mt) for mt in [
-            'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-            'application/msword'
-        ]) or filename.endswith(('.docx', '.doc')):
-            doc = Document(io.BytesIO(content))
-            text = "\n".join([para.text for para in doc.paragraphs])
-            if not text.strip():
-                logger.warning(f"No text extracted from DOCX: {filename}")
-                raise ValueError("No text could be extracted from the document")
-            return text
-        # Plain text and RTF processing
-        elif mime_type in ['text/plain', 'text/rtf', 'application/rtf'] or filename.endswith(('.txt', '.rtf')):
-            try:
-                # Try UTF-8 first
-                text = content.decode('utf-8', errors='ignore')
-                # For RTF, do basic cleanup of markup
-                if mime_type in ['text/rtf', 'application/rtf'] or filename.endswith('.rtf'):
-                    # Very basic RTF cleaning (would need a proper RTF parser for better results)
-                    text = re.sub(r'\\[a-zA-Z]+', ' ', text)  # Remove RTF commands
-                    text = re.sub(r'[{}]', '', text)  # Remove braces
-                    text = re.sub(r'\\[0-9]+', '', text)  # Remove numeric commands
-                if not text.strip():
-                    logger.warning(f"No text extracted from text file: {filename}")
-                    raise ValueError("No text could be extracted from the text file")
-                return text
-            except UnicodeDecodeError:
-                # Fallback to latin-1 if UTF-8 fails
-                text = content.decode('latin-1', errors='ignore')
-                return text
         else:
-            logger.error(f"Unsupported file type: {mime_type} for {filename}")
-            raise HTTPException(
-                status_code=400,
-                detail=f"Unsupported file type: {mime_type}. Please upload a PDF, DOCX, TXT, or RTF file"
-            )
-    except HTTPException:
-        # Re-raise HTTP exceptions
-        raise
-    except Exception as e:
-        logger.error(f"Text extraction failed for {filename}: {str(e)}")
-        raise HTTPException(status_code=400, detail=f"Text extraction failed: {str(e)}")
-# Data visualization with enhanced options and error handling
-def generate_visualization_code(df: pd.DataFrame, visualization_type: str = None) -> tuple:
-    """
-    Generate visualization based on data analysis and save to static file
-    Returns tuple of (image_path, description)
-    """
-    try:
-        # Basic data analysis
-        num_rows, num_cols = df.shape
-        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
-        categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
-        # Check for datetime columns
-        date_cols = []
-        for col in df.columns:
-            if pd.api.types.is_datetime64_any_dtype(df[col]):
-                date_cols.append(col)
-            elif df[col].dtype == 'object':
-                # Try to convert to datetime
-                try:
-                    pd.to_datetime(df[col], errors='raise')
-                    date_cols.append(col)
-                except (ValueError, TypeError):
-                    pass
-        # Generate stats summary
-        stats_summary = df.describe().to_string()
-        # File path for saving
-        timestamp = int(time.time())
-        img_filename = f"viz_{timestamp}.png"
-        img_path = os.path.join(IMAGES_DIR, img_filename)
-        # Apply visualization based on type
-        if visualization_type and visualization_type.lower() in ['scatter', 'correlation']:
-            if len(numeric_cols) < 2:
-                raise ValueError("Need at least 2 numeric columns for a scatter plot")
-            plt.figure(figsize=(10, 6))
-            x_col, y_col = numeric_cols[0], numeric_cols[1]
-            # Create enhanced scatter plot
-            sns.scatterplot(data=df, x=x_col, y=y_col, hue=categorical_cols[0] if categorical_cols else None)
-            plt.title(f'Correlation between {x_col} and {y_col}')
-            plt.xlabel(x_col)
-            plt.ylabel(y_col)
-            plt.grid(True, alpha=0.3)
-            # Add regression line
-            sns.regplot(x=x_col, y=y_col, data=df, scatter=False, line_kws={"color": "red"})
-            # Add correlation coefficient as text
-            corr = df[x_col].corr(df[y_col])
-            plt.annotate(f"Correlation: {corr:.2f}",
-                        xy=(0.05, 0.95),
-                        xycoords='axes fraction',
-                        fontsize=12,
-                        bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
-            plt.tight_layout()
-            plt.savefig(img_path)
-            plt.close()
-            description = f"Scatter plot showing correlation between {x_col} and {y_col}. Correlation coefficient: {corr:.4f}"
-        elif visualization_type and visualization_type.lower() in ['bar', 'barplot', 'barchart']:
-            if len(categorical_cols) < 1 or len(numeric_cols) < 1:
-                raise ValueError("Need at least 1 categorical and 1 numeric column for a bar chart")
-            plt.figure(figsize=(12, 7))
-            cat_col = categorical_cols[0]
-            num_col = numeric_cols[0]
-            # Get top categories if too many
-            if df[cat_col].nunique() > 10:
-                top_cats = df.groupby(cat_col)[num_col].sum().nlargest(10).index
-                df_plot = df[df[cat_col].isin(top_cats)]
-                title_suffix = " (top 10 categories)"
-            else:
-                df_plot = df
-                title_suffix = ""
-            # Create bar chart
-            ax = sns.barplot(x=cat_col, y=num_col, data=df_plot, palette='viridis')
-            # Add value labels on top of bars
-            for p in ax.patches:
-                ax.annotate(f'{p.get_height():.1f}',
-                          (p.get_x() + p.get_width() / 2., p.get_height()),
-                          ha='center', va='bottom',
-                          fontsize=9, color='black',
-                          xytext=(0, 5), textcoords='offset points')
-            plt.title(f'Comparison of {num_col} by {cat_col}{title_suffix}', fontsize=14)
-            plt.xlabel(cat_col, fontsize=12)
-            plt.ylabel(num_col, fontsize=12)
-            plt.xticks(rotation=45, ha='right')
-            plt.grid(axis='y', alpha=0.3)
-            plt.tight_layout()
-            plt.savefig(img_path)
-            plt.close()
-            description = f"Bar chart comparing {num_col} across different {cat_col} categories"
-        elif visualization_type and visualization_type.lower() in ['histogram', 'distribution']:
-            if len(numeric_cols) < 1:
-                raise ValueError("Need at least 1 numeric column for a histogram")
-            plt.figure(figsize=(10, 6))
-            num_col = numeric_cols[0]
-            # Create histogram with KDE
-            sns.histplot(df[num_col], kde=True, bins=20, color='purple')
-            # Add mean and median lines
-            mean_val = df[num_col].mean()
-            median_val = df[num_col].median()
-            plt.axvline(mean_val, color='red', linestyle='--', linewidth=1.5, label=f'Mean: {mean_val:.2f}')
-            plt.axvline(median_val, color='green', linestyle='-.', linewidth=1.5, label=f'Median: {median_val:.2f}')
-            plt.title(f'Distribution of {num_col}', fontsize=14)
-            plt.xlabel(num_col, fontsize=12)
-            plt.ylabel('Frequency', fontsize=12)
-            plt.legend()
-            plt.grid(True, alpha=0.3)
-            plt.tight_layout()
-            plt.savefig(img_path)
-            plt.close()
-            # Get descriptive stats for the column
-            desc_stats = df[num_col].describe()
-            description = (f"Histogram showing distribution of {num_col}\n"
-                          f"Mean: {desc_stats['mean']:.2f}, Median: {median_val:.2f}\n"
-                          f"Min: {desc_stats['min']:.2f}, Max: {desc_stats['max']:.2f}\n"
-                          f"Std Dev: {desc_stats['std']:.2f}")
-        else:  # Default dashboard with multiple plots
-            # Create dashboard with multiple plots
-            fig, axes = plt.subplots(2, 2, figsize=(16, 12))
-            fig.suptitle('Data Dashboard', fontsize=16)
-            # Plot 1: Correlation matrix (top-left)
-            if len(numeric_cols) > 1:
-                corr_matrix = df[numeric_cols].corr()
-                sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=axes[0, 0])
-                axes[0, 0].set_title('Correlation Matrix')
             else:
-                axes[0, 0].text(0.5, 0.5, 'Not enough numeric columns for correlation matrix',
-                             ha='center', va='center', fontsize=12)
-                axes[0, 0].axis('off')
-            # Plot 2: Distribution (top-right)
-            if numeric_cols:
-                num_col = numeric_cols[0]
-                sns.histplot(df[num_col], kde=True, ax=axes[0, 1], color='purple')
-                axes[0, 1].set_title(f'Distribution of {num_col}')
-                axes[0, 1].set_xlabel(num_col)
-                axes[0, 1].set_ylabel('Frequency')
             else:
-                axes[0, 1].text(0.5, 0.5, 'No numeric columns for histogram',
-                             ha='center', va='center', fontsize=12)
-                axes[0, 1].axis('off')
-            # Plot 3: Bar chart (bottom-left)
-            if categorical_cols and numeric_cols:
-                cat_col = categorical_cols[0]
-                num_col = numeric_cols[0]
-                # Limit to top categories if too many
-                if df[cat_col].nunique() > 8:
-                    top_cats = df.groupby(cat_col)[num_col].sum().nlargest(8).index
-                    df_plot = df[df[cat_col].isin(top_cats)]
-                    title_suffix = " (top 8)"
-                else:
-                    df_plot = df
-                    title_suffix = ""
-                sns.barplot(x=cat_col, y=num_col, data=df_plot, ax=axes[1, 0], palette='viridis')
-                axes[1, 0].set_title(f'{num_col} by {cat_col}{title_suffix}')
-                axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45, ha='right')
             else:
-                axes[1, 0].text(0.5, 0.5, 'Need both categorical and numeric columns for bar chart',
-                             ha='center', va='center', fontsize=12)
-                axes[1, 0].axis('off')
-            # Plot 4: Box plot (bottom-right)
-            if categorical_cols and numeric_cols:
-                cat_col = categorical_cols[0]
-                num_col = numeric_cols[0]
-                # Limit to top categories if too many
-                if df[cat_col].nunique() > 8:
-                    top_cats = df.groupby(cat_col)[num_col].sum().nlargest(8).index
-                    df_plot = df[df[cat_col].isin(top_cats)]
-                    title_suffix = " (top 8)"
                 else:
-                    df_plot = df
-                    title_suffix = ""
-                sns.boxplot(x=cat_col, y=num_col, data=df_plot, ax=axes[1, 1], palette='Set3')
-                axes[1, 1].set_title(f'Distribution of {num_col} by {cat_col}{title_suffix}')
-                axes[1, 1].set_xticklabels(axes[1, 1].get_xticklabels(), rotation=45, ha='right')
             else:
-                axes[1, 1].text(0.5, 0.5, 'Need both categorical and numeric columns for box plot',
-                             ha='center', va='center', fontsize=12)
-                axes[1, 1].axis('off')
-            plt.tight_layout(rect=[0, 0, 1, 0.97])  # Adjust layout to make room for suptitle
-            plt.savefig(img_path)
-            plt.close()
-            # Generate description with data summary
-            description = (f"Data Dashboard Summary:\n"
-                          f"Dataset dimensions: {num_rows} rows × {num_cols} columns\n"
-                          f"Numeric columns: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}\n"
-                          f"Categorical columns: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}")
-        return f"/images/{img_filename}", description
-    except Exception as e:
-        logger.error(f"Visualization generation failed: {str(e)}")
-        raise ValueError(f"Could not generate visualization: {str(e)}")
-# Enhanced translation with multiple language support
-async def translate_text(text: str, target_lang: str = "en") -> Union[str, Dict[str, str]]:
-    """
-    Translate text to target language or multiple languages
-    If target_lang is "all", returns dict of language:translation
-    """
-    try:
-        tokenizer, model = load_model("translation")
-        # If requesting translation to all supported languages
-        if target_lang == "all":
-            results = {}
-            for lang_code in LANGUAGE_MAPPING.values():
-                try:
-                    tokenizer.src_lang = "en"  # Assuming source is English
-                    tokenizer.tgt_lang = lang_code
-                    encoded = tokenizer(text, return_tensors="pt")
-                    generated_tokens = model.generate(
-                        **encoded,
-                        forced_bos_token_id=tokenizer.get_lang_id(lang_code),
-                        max_length=512
-                    )
-                    translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
-                    results[LANGUAGE_CODE_TO_NAME.get(lang_code, lang_code)] = translation
-                except Exception as lang_error:
-                    logger.error(f"Translation to {lang_code} failed: {str(lang_error)}")
-                    results[LANGUAGE_CODE_TO_NAME.get(lang_code, lang_code)] = f"Translation failed: {str(lang_error)}"
-            return results
         else:
-            # Single language translation
-            tokenizer.src_lang = "en"  # Assuming source is English
-            # Check if target_lang is valid
-            if target_lang not in LANGUAGE_MAPPING.values():
-                # Try to find it in the values
-                for lang_name, lang_code in LANGUAGE_MAPPING.items():
-                    if target_lang.lower() == lang_name:
-                        target_lang = lang_code
-                        break
-                else:
-                    logger.warning(f"Unsupported target language: {target_lang}, defaulting to English")
-                    return text  # Return original text if language not supported
-            tokenizer.tgt_lang = target_lang
-            encoded = tokenizer(text, return_tensors="pt")
-            generated_tokens = model.generate(
-                **encoded,
-                forced_bos_token_id=tokenizer.get_lang_id(target_lang),
-                max_length=512
-            )
-            translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
-            return translation
     except Exception as e:
-        logger.error(f"Translation failed: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
-# Creative text generation with enhanced Gemini capabilities
-async def generate_creative_text(prompt: str) -> str:
-    """Generate creative text content using Gemini model"""
     try:
-        chatbot = load_model("generate")
-        # Identify content type from prompt
-        content_type = "story"  # Default
-        if "poem" in prompt.lower() or "poetry" in prompt.lower():
-            content_type = "poem"
-        elif "essay" in prompt.lower():
-            content_type = "essay"
-        elif "article" in prompt.lower() or "blog" in prompt.lower():
-            content_type = "article"
-        elif "letter" in prompt.lower() or "email" in prompt.lower():
-            content_type = "letter"
-        # Create an enhanced prompt with formatting instructions
-        enhanced_prompt = f"Generate a creative {content_type} based on this prompt: '{prompt}'. Please follow these guidelines: Create engaging, original content with proper structure. Use vivid language and appropriate tone. Format the output with proper paragraphs and line breaks. If generating a poem, use appropriate stanza structure. Include a cosmic or space theme if appropriate."
-        # Generate the content
-        generation_config = {
-            "temperature": 0.8,
-            "top_p": 0.95,
-            "top_k": 40,
-            "max_output_tokens": 1024
-        }
-        # Call the Gemini model
-        response = chatbot.generate_content(
-            enhanced_prompt,
-            generation_config=generation_config
         )
-        return response.text
     except Exception as e:
-        logger.error(f"Text generation failed: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Text generation failed: {str(e)}")

 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import RedirectResponse, JSONResponse, HTMLResponse
 from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, M2M100ForConditionalGeneration, M2M100Tokenizer
+from typing import Optional, Dict, Any, List
 import logging
 import time
 import os
 from pydantic import BaseModel
 import asyncio
 import google.generativeai as genai
 # Configure logging
 logging.basicConfig(
 )
 logger = logging.getLogger("cosmic_ai")
+# Create app directory if it doesn't exist
+upload_dir = os.getenv('UPLOAD_DIR', '/tmp/uploads')
+os.makedirs(upload_dir, exist_ok=True)
+app = FastAPI(
+    title="Cosmic AI Assistant",
+    description="An advanced AI assistant with space-themed interface, translation, and file question-answering features",
+    version="2.0.0"
+)
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Mount videos directory
+app.mount("/videos", StaticFiles(directory="videos"), name="videos")
+# Mount images directory
+app.mount("/images", StaticFiles(directory="images"), name="images")
+# Gemini API Configuration
+API_KEY = "AIzaSyCwmgD8KxzWiuivtySNtcZF_rfTvx9s9sY"  # Replace with your actual API key
+genai.configure(api_key=API_KEY)
 # Model configurations
 MODELS = {
     "visual-qa": "dandelin/vilt-b32-finetuned-vqa",
     "chatbot": "gemini-1.5-pro",
     "translation": "facebook/m2m100_418M",
+    "file-qa": "distilbert-base-cased-distilled-squad"  # New model for file QA
 }
+# Supported languages for translation
+SUPPORTED_LANGUAGES = {
+    "english": "en",
+    "french": "fr",
+    "german": "de",
+    "spanish": "es",
+    "italian": "it",
+    "russian": "ru",
+    "chinese": "zh",
+    "japanese": "ja",
+    "arabic": "ar",
+    "hindi": "hi",
+    "portuguese": "pt",
+    "korean": "ko"
+}
+# Global variables for pre-loaded translation model
+translation_model = None
+translation_tokenizer = None
+# Cache for model loading (excluding translation)
 @lru_cache(maxsize=8)
 def load_model(task: str, model_name: str = None):
     """Cached model loader with proper task names and error handling"""
     try:
+        logger.info(f"Loading model for task: {task}, model: {model_name or MODELS.get(task)}")
         start_time = time.time()
+        model_to_load = model_name or MODELS.get(task)
+        if task == "chatbot":
             return genai.GenerativeModel(model_to_load)
         if task == "visual-qa":
             processor = ViltProcessor.from_pretrained(model_to_load)
             model = ViltForQuestionAnswering.from_pretrained(model_to_load)
             def vqa_function(image, question, **generate_kwargs):
                 if image.mode != "RGB":
                     image = image.convert("RGB")
                 inputs = processor(image, question, return_tensors="pt").to(device)
                 logger.info(f"VQA inputs - question: {question}, image size: {image.size}")
                 with torch.no_grad():
                     outputs = model(**inputs)
+                logits = outputs.logits
+                idx = logits.argmax(-1).item()
+                answer = model.config.id2label[idx]
                 logger.info(f"VQA raw output: {answer}")
                 return answer
             return vqa_function
+        return pipeline(task, model=model_to_load)
     except Exception as e:
         logger.error(f"Model load failed: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Model loading failed: {task} - {str(e)}")
+def get_gemini_response(user_input: str, is_generation: bool = False):
+    """Function to generate response with Gemini for both chat and text generation"""
+    if not user_input:
+        return "Please provide some input."
     try:
+        chatbot = load_model("chatbot")
+        if is_generation:
+            prompt = f"Generate creative text based on this prompt: {user_input}"
+        else:
+            prompt = user_input
+        response = chatbot.generate_content(prompt)
+        return response.text.strip()
     except Exception as e:
+        return f"Error: {str(e)}"
+def translate_text(text: str, target_language: str):
+    """Translate text to any target language using pre-loaded M2M100 model"""
+    if not text:
+        return "Please provide text to translate."
     try:
+        global translation_model, translation_tokenizer
+        target_lang = target_language.lower()
+        if target_lang not in SUPPORTED_LANGUAGES:
+            similar = [lang for lang in SUPPORTED_LANGUAGES if target_lang in lang or lang in target_lang]
+            if similar:
+                target_lang = similar[0]
+            else:
+                return f"Language '{target_language}' not supported. Available languages: {', '.join(SUPPORTED_LANGUAGES.keys())}"
+        lang_code = SUPPORTED_LANGUAGES[target_lang]
+        if translation_model is None or translation_tokenizer is None:
+            raise Exception("Translation model not initialized")
+        match = re.search(r'how to say\s+(.+?)\s+in\s+(\w+)', text.lower())
+        if match:
+            text_to_translate = match.group(1)
         else:
+            content_match = re.search(r'(?:translate|convert).*to\s+[a-zA-Z]+\s*[:\s]*(.+)', text, re.IGNORECASE)
+            text_to_translate = content_match.group(1) if content_match else text
+        translation_tokenizer.src_lang = "en"
+        encoded = translation_tokenizer(text_to_translate, return_tensors="pt", padding=True, truncation=True).to(translation_model.device)
+        start_time = time.time()
+        generated_tokens = translation_model.generate(
+            **encoded,
+            forced_bos_token_id=translation_tokenizer.get_lang_id(lang_code),
+            max_length=512,
+            num_beams=1,
+            early_stopping=True
+        )
+        translated_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        logger.info(f"Translation took {time.time() - start_time:.2f} seconds")
+        return translated_text
+    except Exception as e:
+        logger.error(f"Translation error: {str(e)}", exc_info=True)
+        return f"Translation error: {str(e)}"
+def detect_intent(text: str = None, file: UploadFile = None) -> tuple[str, str]:
+    """Enhanced intent detection with dynamic translation and file QA support"""
+    target_language = "English"  # Default
+    if file:
+        content_type = file.content_type.lower() if file.content_type else ""
+        filename = file.filename.lower() if file.filename else ""
+        if content_type.startswith('image/') and text:
+            text_lower = text.lower()
+            if "what’s this" in text_lower:
+                return "visual-qa", target_language
+            if "does this fly" in text_lower:
+                return "visual-qa", target_language
+            if "fly" in text_lower and any(q in text_lower for q in ['does', 'can', 'will']):
+                return "visual-qa", target_language
+        if content_type.startswith('image/'):
+            if text and any(q in text.lower() for q in ['what is', 'what\'s', 'describe', 'tell me about', 'explain','how many', 'what color', 'is there', 'are they', 'does the']):
+                return "visual-qa", target_language
+            return "image-to-text", target_language
+        elif filename.endswith(('.xlsx', '.xls', '.csv')):
+            return "visualize", target_language
+        elif filename.endswith(('.pdf', '.docx', '.doc', '.txt', '.rtf')):
+            if text and any(q in text.lower() for q in ['what is', 'who is', 'where', 'when', 'why', 'how', 'what are', 'who are']):
+                return "file-qa", target_language  # New intent for file QA
+            return "summarize", target_language
+    if not text:
+        return "chatbot", target_language
+    text_lower = text.lower()
+    if any(keyword in text_lower for keyword in ['chat', 'talk', 'converse', 'ask gemini']):
+        return "chatbot", target_language
+    translate_patterns = [
+        r'translate.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
+        r'convert.*to\s+\[?([a-zA-Z]+)\]?:?\s*(.*)',
+        r'how to say.*in\s+\[?([a-zA-Z]+)\]?:?\s*(.*)'
+    ]
+    for pattern in translate_patterns:
+        translate_match = re.search(pattern, text_lower)
+        if translate_match:
+            potential_lang = translate_match.group(1).lower()
+            if potential_lang in SUPPORTED_LANGUAGES:
+                target_language = potential_lang.capitalize()
+                return "translate", target_language
             else:
+                logger.warning(f"Invalid language detected: {potential_lang}")
+                return "chatbot", target_language
+    vqa_patterns = [
+        r'how (many|much)',
+        r'what (color|size|position|shape)',
+        r'is (there|that|this) (a|an)',
+        r'are (they|there) (any|some)',
+        r'does (the|this) (image|picture) (show|contain)'
+    ]
+    if any(re.search(pattern, text_lower) for pattern in vqa_patterns):
+        return "visual-qa", target_language
+    summarization_patterns = [
+        r'\b(summar(y|ize|ise)|brief( overview)?)\b',
+        r'\b(long article|text|document)\b',
+        r'\bcan you (summar|brief|condense)\b',
+        r'\b(short summary|brief explanation)\b',
+        r'\b(overview|main points|key ideas)\b',
+        r'\b(tl;?dr|too long didn\'?t read)\b'
+    ]
+    if any(re.search(pattern, text_lower) for pattern in summarization_patterns):
+        return "summarize", target_language
+    generation_patterns = [
+        r'\b(write|generate|create|compose)\b',
+        r'\b(story|poem|essay|text|content)\b'
+    ]
+    if any(re.search(pattern, text_lower) for pattern in generation_patterns):
+        return "text-generation", target_language
+    if len(text) > 100:
+        return "summarize", target_language
+    if file and file.content_type and file.content_type.startswith('image/'):
+        if text and "what’s this" in text_lower:
+            return "visual-qa", target_language
+        if text and any(q in text_lower for q in ['does this', 'is this', 'can this']):
+            return "visual-qa", target_language
+    return "chatbot", target_language
+class ProcessResponse(BaseModel):
+    response: str
+    type: str
+    additional_data: Optional[Dict[str, Any]] = None
+@app.get("/chatbot")
+async def chatbot_interface():
+    """Redirect to the static index.html file for the chatbot interface"""
+    return RedirectResponse(url="/static/index.html")
+@app.post("/chat")
+async def chat_endpoint(data: dict):
+    message = data.get("message", "")
+    if not message:
+        raise HTTPException(status_code=400, detail="No message provided")
+    try:
+        response = get_gemini_response(message)
+        return {"response": response}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
+@app.post("/process", response_model=ProcessResponse)
+async def process_input(
+    request: Request,
+    text: str = Form(None),
+    file: UploadFile = File(None)
+):
+    """Enhanced unified endpoint with dynamic translation and file QA"""
+    start_time = time.time()
+    client_ip = request.client.host
+    logger.info(f"Request from {client_ip}: text={text[:50] + '...' if text and len(text) > 50 else text}, file={file.filename if file else None}")
+    intent, target_language = detect_intent(text, file)
+    logger.info(f"Detected intent: {intent}, target_language: {target_language}")
+    try:
+        if intent == "chatbot":
+            response = get_gemini_response(text)
+            return {"response": response, "type": "chat"}
+        elif intent == "translate":
+            content = await extract_text_from_file(file) if file else text
+            if "all languages" in text.lower():
+                translations = {}
+                phrase_to_translate = "I want to explore the stars" if "I want to explore the stars" in text else content
+                for lang, code in SUPPORTED_LANGUAGES.items():
+                    translation_tokenizer.src_lang = "en"
+                    encoded = translation_tokenizer(phrase_to_translate, return_tensors="pt").to(translation_model.device)
+                    generated_tokens = translation_model.generate(
+                        **encoded,
+                        forced_bos_token_id=translation_tokenizer.get_lang_id(code),
+                        max_length=512,
+                        num_beams=1
+                    )
+                    translations[lang] = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+                response = "\n".join(f"{lang.capitalize()}: {translations[lang]}" for lang in translations)
+                logger.info(f"Translated to all supported languages: {', '.join(translations.keys())}")
+                return {"response": response, "type": "translation"}
             else:
+                translated_text = translate_text(content, target_language)
+                return {"response": translated_text, "type": "translation"}
+        elif intent == "summarize":
+            content = await extract_text_from_file(file) if file else text
+            summarizer = load_model("summarization")
+            content_length = len(content.split())
+            max_len = max(30, min(150, content_length//2))
+            min_len = max(15, min(30, max_len//2))
+            if len(content) > 1024:
+                chunks = [content[i:i+1024] for i in range(0, len(content), 1024)]
+                summaries = []
+                for chunk in chunks[:3]:
+                    summary = summarizer(
+                        chunk,
+                        max_length=max_len,
+                        min_length=min_len,
+                        do_sample=False,
+                        truncation=True
+                    )
+                    summaries.append(summary[0]['summary_text'])
+                final_summary = " ".join(summaries)
             else:
+                summary = summarizer(
+                    content,
+                    max_length=max_len,
+                    min_length=min_len,
+                    do_sample=False,
+                    truncation=True
+                )
+                final_summary = summary[0]['summary_text']
+            final_summary = re.sub(r'\s+', ' ', final_summary).strip()
+            return {"response": final_summary, "type": "summary"}
+        elif intent == "image-to-text":
+            if not file or not file.content_type.startswith('image/'):
+                raise HTTPException(status_code=400, detail="An image file is required")
+            image = Image.open(io.BytesIO(await file.read()))
+            captioner = load_model("image-to-text")
+            caption = captioner(image, max_new_tokens=50)
+            return {"response": caption[0]['generated_text'], "type": "caption"}
+        elif intent == "visual-qa":
+            if not file or not file.content_type.startswith('image/'):
+                raise HTTPException(status_code=400, detail="An image file is required")
+            if not text:
+                raise HTTPException(status_code=400, detail="A question is required for VQA")
+            image = Image.open(io.BytesIO(await file.read())).convert("RGB")
+            vqa_pipeline = load_model("visual-qa")
+            question = text.strip()
+            if not question.endswith('?'):
+                question += '?'
+            answer = vqa_pipeline(
+                image=image,
+                question=question
+            )
+            answer = answer.strip()
+            if not answer or answer.lower() == question.lower():
+                logger.warning(f"VQA failed to generate a meaningful answer: {answer}")
+                answer = "I couldn't determine the answer from the image."
+            else:
+                answer = answer.capitalize()
+                if not answer.endswith(('.', '!', '?')):
+                    answer += '.'
+            chatbot = load_model("chatbot")
+            if "fly" in question.lower():
+                answer = chatbot.generate_content(f"Make this fun and spacey: {answer}").text.strip()
+            else:
+                answer = chatbot.generate_content(f"Make this cosmic and poetic: {answer}").text.strip()
+            logger.info(f"Final VQA answer: {answer}")
+            return {
+                "response": answer,
+                "type": "visual_qa",
+                "additional_data": {
+                    "question": text,
+                    "image_size": f"{image.width}x{image.height}"
+                }
+            }
+        elif intent == "visualize":
+            if not file:
+                raise HTTPException(status_code=400, detail="An Excel file is required")
+            file_content = await file.read()
+            if file.filename.endswith('.csv'):
+                df = pd.read_csv(io.BytesIO(file_content))
+            else:
+                df = pd.read_excel(io.BytesIO(file_content))
+            code = generate_visualization_code(df, text)
+            stats = df.describe().to_string()
+            response = f"Stats:\n{stats}\n\nChart Code:\n{code}"
+            return {"response": response, "type": "visualization_code"}
+        elif intent == "text-generation":
+            response = get_gemini_response(text, is_generation=True)
+            lines = response.split(". ")
+            formatted_poem = "\n".join(line.strip() + ("." if not line.endswith(".") else "") for line in lines if line)
+            return {"response": formatted_poem, "type": "generated_text"}
+        elif intent == "file-qa":
+            if not file or not file.filename.lower().endswith(('.pdf', '.docx', '.doc', '.txt', '.rtf')):
+                raise HTTPException(status_code=400, detail="A text-based file (PDF, DOCX, TXT, RTF) is required")
+            if not text:
+                raise HTTPException(status_code=400, detail="A question about the file is required")
+            content = await extract_text_from_file(file)
+            if not content.strip():
+                raise HTTPException(status_code=400, detail="No text could be extracted from the file")
+            qa_pipeline = load_model("file-qa")
+            question = text.strip()
+            if not question.endswith('?'):
+                question += '?'
+            # Chunk content if too long (model context limit ~512 tokens)
+            if len(content) > 1024:
+                chunks = [content[i:i+1024] for i in range(0, len(content), 1024)]
+                answers = []
+                for chunk in chunks[:3]:  # Limit to 3 chunks to avoid excessive processing
+                    result = qa_pipeline(question=question, context=chunk)
+                    if result['score'] > 0.1:  # Only include high-confidence answers
+                        answers.append((result['answer'], result['score']))
+                if answers:
+                    # Select the answer with the highest confidence score
+                    best_answer = max(answers, key=lambda x: x[1])[0]
                 else:
+                    best_answer = "I couldn't find a clear answer in the document."
             else:
+                result = qa_pipeline(question=question, context=content)
+                best_answer = result['answer'] if result['score'] > 0.1 else "I couldn't find a clear answer in the document."
+            best_answer = best_answer.strip().capitalize()
+            if not best_answer.endswith(('.', '!', '?')):
+                best_answer += '.'
+            # Add cosmic tone
+            chatbot = load_model("chatbot")
+            final_answer = chatbot.generate_content(f"Make this cosmic and poetic: {best_answer}").text.strip()
+            logger.info(f"File QA answer: {final_answer}")
+            return {
+                "response": final_answer,
+                "type": "file_qa",
+                "additional_data": {
+                    "question": text,
+                    "file_name": file.filename
+                }
+            }
         else:
+            response = get_gemini_response(text or "Hello! How can I assist you?")
+            return {"response": response, "type": "chat"}
     except Exception as e:
+        logger.error(f"Processing error: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        process_time = time.time() - start_time
+        logger.info(f"Request processed in {process_time:.2f} seconds")
+async def extract_text_from_file(file: UploadFile) -> str:
+    """Enhanced text extraction with multiple fallbacks"""
+    if not file:
+        return ""
+    content = await file.read()
+    filename = file.filename.lower()
     try:
+        if filename.endswith('.pdf'):
+            try:
+                doc = fitz.open(stream=content, filetype="pdf")
+                if doc.is_encrypted:
+                    return "PDF is encrypted and cannot be read"
+                text = ""
+                for page in doc:
+                    text += page.get_text()
+                return text
+            except Exception as pdf_error:
+                logger.warning(f"PyMuPDF failed: {str(pdf_error)}. Trying pdfminer.six...")
+                from pdfminer.high_level import extract_text
+                from io import BytesIO
+                return extract_text(BytesIO(content))
+        elif filename.endswith(('.docx', '.doc')):
+            doc = Document(io.BytesIO(content))
+            return "\n".join(para.text for para in doc.paragraphs)
+        elif filename.endswith('.txt'):
+            return content.decode('utf-8', errors='replace')
+        elif filename.endswith('.rtf'):
+            text = content.decode('utf-8', errors='replace')
+            text = re.sub(r'\\[a-z]+', ' ', text)
+            text = re.sub(r'\{|\}|\\', '', text)
+            return text
+        else:
+            raise HTTPException(status_code=400, detail=f"Unsupported file format: {filename}")
+    except Exception as e:
+        logger.error(f"File extraction error: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error extracting text: {str(e)}. Supported formats: PDF, DOCX, TXT, RTF"
         )
+def generate_visualization_code(df: pd.DataFrame, request: str = None) -> str:
+    """Generate visualization code based on data analysis"""
+    num_rows, num_cols = df.shape
+    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+    categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
+    date_cols = [col for col in df.columns if df[col].dtype == 'datetime64[ns]' or
+                (isinstance(df[col].dtype, object) and pd.to_datetime(df[col], errors='coerce').notna().all())]
+    if request:
+        request_lower = request.lower()
+    else:
+        request_lower = ""
+    if len(numeric_cols) >= 2 and ("scatter" in request_lower or "correlation" in request_lower):
+        x_col = numeric_cols[0]
+        y_col = numeric_cols[1]
+        return f"""import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+df = pd.read_excel('data.xlsx')
+plt.figure(figsize=(10, 6))
+sns.regplot(x='{x_col}', y='{y_col}', data=df, scatter_kws={{'alpha': 0.6}})
+plt.title('Correlation between {x_col} and {y_col}')
+plt.grid(True, alpha=0.3)
+plt.tight_layout()
+plt.savefig('correlation_plot.png')
+plt.show()
+correlation = df['{x_col}'].corr(df['{y_col}'])
+print(f"Correlation coefficient: {{correlation:.4f}}")"""
+    elif len(numeric_cols) >= 1 and len(categorical_cols) >= 1 and ("bar" in request_lower or "comparison" in request_lower):
+        cat_col = categorical_cols[0]
+        num_col = numeric_cols[0]
+        return f"""import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+df = pd.read_excel('data.xlsx')
+plt.figure(figsize=(12, 7))
+ax = sns.barplot(x='{cat_col}', y='{num_col}', data=df, palette='viridis')
+for p in ax.patches:
+    ax.annotate(f'{{p.get_height():.1f}}',
+                (p.get_x() + p.get_width() / 2., p.get_height()),
+                ha='center', va='bottom', fontsize=10, color='black', xytext=(0, 5),
+                textcoords='offset points')
+plt.title('Comparison of {num_col} by {cat_col}', fontsize=15)
+plt.xlabel('{cat_col}', fontsize=12)
+plt.ylabel('{num_col}', fontsize=12)
+plt.xticks(rotation=45, ha='right')
+plt.grid(axis='y', alpha=0.3)
+plt.tight_layout()
+plt.savefig('comparison_chart.png')
+plt.show()"""
+    elif len(numeric_cols) >= 1 and ("distribution" in request_lower or "histogram" in request_lower):
+        num_col = numeric_cols[0]
+        return f"""import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+df = pd.read_excel('data.xlsx')
+plt.figure(figsize=(10, 6))
+sns.histplot(df['{num_col}'], kde=True, bins=20, color='purple')
+plt.title('Distribution of {num_col}', fontsize=15)
+plt.xlabel('{num_col}', fontsize=12)
+plt.ylabel('Frequency', fontsize=12)
+plt.grid(True, alpha=0.3)
+plt.tight_layout()
+plt.savefig('distribution_plot.png')
+plt.show()
+print(df['{num_col}'].describe())"""
+    else:
+        return f"""import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+df = pd.read_excel('data.xlsx')
+print("Descriptive statistics:")
+print(df.describe())
+fig, axes = plt.subplots(2, 2, figsize=(15, 12))
+numeric_df = df.select_dtypes(include=[np.number])
+if not numeric_df.empty and numeric_df.shape[1] > 1:
+    sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm', fmt='.2f', ax=axes[0, 0])
+    axes[0, 0].set_title('Correlation Matrix')
+if not numeric_df.empty:
+    for i, col in enumerate(numeric_df.columns[:1]):
+        sns.histplot(df[col], kde=True, ax=axes[0, 1], color='purple')
+        axes[0, 1].set_title(f'Distribution of {{col}}')
+        axes[0, 1].set_xlabel(col)
+        axes[0, 1].set_ylabel('Frequency')
+categorical_cols = df.select_dtypes(include=['object']).columns
+if len(categorical_cols) > 0 and not numeric_df.empty:
+    cat_col = categorical_cols[0]
+    num_col = numeric_df.columns[0]
+    sns.barplot(x=cat_col, y=num_col, data=df, ax=axes[1, 0], palette='viridis')
+    axes[1, 0].set_title(f'{{num_col}} by {{cat_col}}')
+    axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45, ha='right')
+if not numeric_df.empty and len(categorical_cols) > 0:
+    cat_col = categorical_cols[0]
+    num_col = numeric_df.columns[0]
+    sns.boxplot(x=cat_col, y=num_col, data=df, ax=axes[1, 1], palette='Set3')
+    axes[1, 1].set_title(f'Distribution of {{num_col}} by {{cat_col}}')
+    axes[1, 1].set_xticklabels(axes[1, 1].get_xticklabels(), rotation=45, ha='right')
+plt.tight_layout()
+plt.savefig('dashboard.png')
+plt.show()"""
+@app.get("/", include_in_schema=False)
+async def home():
+    """Redirect to the static index.html file"""
+    return RedirectResponse(url="/static/index.html")
+@app.get("/health", include_in_schema=True)
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy", "version": "2.0.0"}
+@app.get("/models", include_in_schema=True)
+async def list_models():
+    """List available models"""
+    return {"models": MODELS}
+@app.on_event("startup")
+async def startup_event():
+    """Pre-load models at startup with timeout"""
+    global translation_model, translation_tokenizer
+    logger.info("Starting model pre-loading...")
+    async def load_model_with_timeout(task):
+        try:
+            await asyncio.wait_for(asyncio.to_thread(load_model, task), timeout=60.0)
+            logger.info(f"Successfully loaded {task} model")
+        except asyncio.TimeoutError:
+            logger.warning(f"Timeout loading {task} model - will load on demand")
+        except Exception as e:
+            logger.error(f"Error pre-loading {task}: {str(e)}")
+    try:
+        model_name = MODELS["translation"]
+        translation_model = M2M100ForConditionalGeneration.from_pretrained(model_name)
+        translation_tokenizer = M2M100Tokenizer.from_pretrained(model_name)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        translation_model.to(device)
+        logger.info("Translation model pre-loaded successfully")
     except Exception as e:
+        logger.error(f"Error pre-loading translation model: {str(e)}")
+    await asyncio.gather(
+        load_model_with_timeout("summarization"),
+        load_model_with_timeout("image-to-text"),
+        load_model_with_timeout("visual-qa"),
+        load_model_with_timeout("chatbot"),
+        load_model_with_timeout("file-qa")  # Pre-load file QA model
+    )
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)