Spaces:

Hadiil
/

Web-ai-app

Sleeping

App Files Files Community

Hadiil commited on Apr 2

Commit

3bd7faf

verified ·

1 Parent(s): 97e0748

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -101

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.staticfiles import StaticFiles
-from fastapi.responses import RedirectResponse
 from transformers import pipeline, MarianMTModel, MarianTokenizer
 from typing import Optional
 import logging
 from PIL import Image
@@ -10,18 +11,18 @@ from docx import Document
 import fitz  # PyMuPDF
 import pandas as pd
 from functools import lru_cache
-import os
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-app = FastAPI(title="Vion AI Chatbot")
 # Mount static files
 app.mount("/static", StaticFiles(directory="static"), name="static")
-# ---- Model Initialization ----
 MODELS = {
     "summarization": "t5-small",
     "translation": {
@@ -30,12 +31,11 @@ MODELS = {
         "de": "Helsinki-NLP/opus-mt-en-de"
     },
     "image_captioning": "Salesforce/blip-image-captioning-base",
-    "qa": "deepset/roberta-base-squad2"  # Better for QA than t5-small
 }
-@lru_cache(maxsize=1)
-def get_pipeline(task: str, model_name: str = None):
-    """Cached model loader with error handling"""
     try:
         if task == "translation" and model_name:
             tokenizer = MarianTokenizer.from_pretrained(model_name)
@@ -43,108 +43,130 @@ def get_pipeline(task: str, model_name: str = None):
             return pipeline("translation", model=model, tokenizer=tokenizer)
         return pipeline(task, model=model_name or MODELS.get(task))
     except Exception as e:
-        logger.error(f"Failed to load {task} model: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Model loading failed: {task}")
-# ---- Core Endpoints ----
-@app.post("/summarize")
-async def summarize_text(file: UploadFile = File(None), text: str = Form(None)):
-    """Improved summarization endpoint"""
     try:
-        if file:
-            text = await extract_text_from_file(file)
-        elif not text:
-            raise HTTPException(status_code=400, detail="No content provided")
-        summarizer = get_pipeline("summarization")
-        summary = summarizer(
-            f"summarize: {text[:2000]}",  # Truncate long texts
-            max_length=150,
-            min_length=30,
-            do_sample=False
-        )
-        return {"summary": summary[0]['summary_text']}
-    except Exception as e:
-        logger.error(f"Summarization error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Summarization failed")
-@app.post("/answer")
-async def answer_question(
-    question: str = Form(...),
-    context: str = Form(None),
-    file: UploadFile = File(None)
-):
-    """Fixed QA endpoint with proper answer extraction"""
-    try:
-        if file:
-            context = await extract_text_from_file(file)
-        elif not context:
-            raise HTTPException(status_code=400, detail="Missing context")
-        qa_pipeline = get_pipeline("qa")
-        result = qa_pipeline(question=question, context=context[:2000])  # Truncate long contexts
-        return {"answer": result["answer"]}
-    except Exception as e:
-        logger.error(f"QA error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Question answering failed")
-@app.post("/caption")
-async def caption_image(file: UploadFile = File(...)):
-    """Image captioning endpoint"""
-    try:
-        if file.size > 5 * 1024 * 1024:  # 5MB limit
-            raise HTTPException(status_code=413, detail="File too large (max 5MB)")
-        image = Image.open(io.BytesIO(await file.read()))
-        if image.format not in ["JPEG", "PNG"]:
-            raise HTTPException(status_code=400, detail="Only JPEG/PNG supported")
-        captioner = get_pipeline("image_captioning")
-        result = captioner(image)
-        return {"caption": result[0]['generated_text']}
-    except Exception as e:
-        logger.error(f"Captioning error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Image processing failed")
-@app.post("/translate")
-async def translate_text(
-    text: str = Form(...),
-    target_lang: str = Form(...),
     file: UploadFile = File(None)
 ):
-    """Translation endpoint"""
     try:
-        if file:
-            text = await extract_text_from_file(file)
-        if target_lang not in MODELS["translation"]:
-            raise HTTPException(status_code=400, detail="Unsupported language")
-        translator = get_pipeline("translation", MODELS["translation"][target_lang])
-        translated = translator(text[:1000])  # Limit translation length
-        return {"translation": translated[0]['translation_text']}
     except Exception as e:
-        logger.error(f"Translation error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Translation failed")
-# ---- Helper Functions ----
 async def extract_text_from_file(file: UploadFile) -> str:
-    """Extracts text from PDF/DOCX/TXT files"""
-    try:
-        content = await file.read()
-        if file.filename.endswith(".pdf"):
-            doc = fitz.open(stream=content, filetype="pdf")
-            return " ".join([page.get_text() for page in doc])
-        elif file.filename.endswith(".docx"):
-            doc = Document(io.BytesIO(content))
-            return "\n".join([para.text for para in doc.paragraphs])
-        elif file.filename.endswith(".txt"):
-            return content.decode("utf-8")
-        else:
-            raise HTTPException(status_code=400, detail="Unsupported file type")
-    except Exception as e:
-        logger.error(f"File extraction error: {str(e)}")
-        raise HTTPException(status_code=500, detail="File processing failed")
 @app.get("/", include_in_schema=False)
 async def home():

 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.staticfiles import StaticFiles
+from fastapi.responses import RedirectResponse, JSONResponse
 from transformers import pipeline, MarianMTModel, MarianTokenizer
+from langdetect import detect, LangDetectException
 from typing import Optional
 import logging
 from PIL import Image
 import fitz  # PyMuPDF
 import pandas as pd
 from functools import lru_cache
+import re
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+app = FastAPI(title="Auto-Detect AI Chatbot")
 # Mount static files
 app.mount("/static", StaticFiles(directory="static"), name="static")
+# Model configurations
 MODELS = {
     "summarization": "t5-small",
     "translation": {
         "de": "Helsinki-NLP/opus-mt-en-de"
     },
     "image_captioning": "Salesforce/blip-image-captioning-base",
+    "qa": "deepset/roberta-base-squad2"
 }
+@lru_cache(maxsize=4)
+def load_model(task: str, model_name: str = None):
     try:
         if task == "translation" and model_name:
             tokenizer = MarianTokenizer.from_pretrained(model_name)
             return pipeline("translation", model=model, tokenizer=tokenizer)
         return pipeline(task, model=model_name or MODELS.get(task))
     except Exception as e:
+        logger.error(f"Model load failed: {str(e)}")
+        raise HTTPException(status_code=500, detail="Model loading error")
+def detect_intent(text: str = None, file: UploadFile = None) -> str:
+    """Auto-detects user intent from input"""
+    # File-based detection
+    if file:
+        if file.content_type.startswith('image/'):
+            return "image_caption"
+        elif file.filename.endswith(('.xlsx', '.xls')):
+            return "visualize"
+        elif file.filename.endswith(('.pdf', '.docx', '.txt')):
+            return "summarize"
+    # Text analysis
+    if not text:
+        return "unknown"
+    text_lower = text.lower()
+    # Translation detection
+    lang_codes = ['fr', 'es', 'de', 'translate', 'traduire']
+    if any(re.search(rf'\b{lang}\b', text_lower) for lang in lang_codes):
+        return "translate"
+    # Question detection
+    question_words = ['what', 'when', 'why', 'how', '?', 'explain']
+    if any(word in text_lower for word in question_words):
+        return "qa"
+    # Language detection for non-English text
     try:
+        if detect(text) != 'en' and len(text.split()) > 3:
+            return "translate"
+    except LangDetectException:
+        pass
+    # Default to summarization for long text
+    if len(text) > 100:
+        return "summarize"
+    return "unknown"
+@app.post("/process")
+async def process_input(
+    text: str = Form(None),
     file: UploadFile = File(None)
 ):
+    """Unified endpoint for all processing"""
+    intent = detect_intent(text, file)
+    logger.info(f"Detected intent: {intent}")
     try:
+        if intent == "summarize":
+            content = await extract_text_from_file(file) if file else text
+            summarizer = load_model("summarization")
+            summary = summarizer(
+                f"summarize: {content[:2000]}",
+                max_length=150,
+                min_length=30
+            )
+            return {"response": summary[0]['summary_text'], "type": "summary"}
+        elif intent == "translate":
+            content = await extract_text_from_file(file) if file else text
+            # Extract target language
+            target_lang = "fr"  # Default
+            if text:
+                match = re.search(r'\b(fr|es|de)\b', text.lower())
+                if match:
+                    target_lang = match.group(1)
+            translator = load_model("translation", MODELS["translation"][target_lang])
+            translated = translator(content[:1000])
+            return {"response": translated[0]['translation_text'], "type": "translation"}
+        elif intent == "qa":
+            context = await extract_text_from_file(file) if file else None
+            qa_pipeline = load_model("qa")
+            result = qa_pipeline(question=text, context=context[:2000] if context else "")
+            return {"response": result["answer"], "type": "answer"}
+        elif intent == "image_caption":
+            image = Image.open(io.BytesIO(await file.read()))
+            captioner = load_model("image_captioning")
+            caption = captioner(image)
+            return {"response": caption[0]['generated_text'], "type": "caption"}
+        elif intent == "visualize":
+            df = pd.read_excel(io.BytesIO(await file.read()))
+            code = generate_visualization_code(df, text)
+            return {"response": code, "type": "visualization_code"}
+        else:
+            return {"response": "Please clarify your request", "type": "clarification"}
     except Exception as e:
+        logger.error(f"Processing error: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
 async def extract_text_from_file(file: UploadFile) -> str:
+    """Extracts text from supported files"""
+    content = await file.read()
+    if file.filename.endswith('.pdf'):
+        doc = fitz.open(stream=content, filetype="pdf")
+        return " ".join(page.get_text() for page in doc)
+    elif file.filename.endswith('.docx'):
+        doc = Document(io.BytesIO(content))
+        return "\n".join(para.text for para in doc.paragraphs)
+    elif file.filename.endswith('.txt'):
+        return content.decode('utf-8')
+    raise HTTPException(status_code=400, detail="Unsupported file type")
+def generate_visualization_code(df: pd.DataFrame, request: str) -> str:
+    """Generates Python visualization code"""
+    if "bar" in request.lower():
+        return f"""import matplotlib.pyplot as plt
+plt.bar(df['{df.columns[0]}'], df['{df.columns[1]}'])
+plt.title('Bar Chart')
+plt.show()"""
+    else:
+        return f"""import seaborn as sns
+sns.pairplot(df)
+plt.title('Data Visualization')
+plt.show()"""
 @app.get("/", include_in_schema=False)
 async def home():