Imsg

Paused

App Files Files Community

Makhinur commited on May 1

Commit

e4ba08d

verified ·

1 Parent(s): 0db54a4

Update main.py

Browse files

Files changed (1) hide show

main.py +134 -99

main.py CHANGED Viewed

@@ -5,15 +5,18 @@ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 # from fastapi.templating import Jinja2Templates
 # from fastapi.responses import FileResponse
-import requests
-import base64
 import os
 import random
 # Import necessary classes from transformers
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig # Added BitsAndBytesConfig in case you ever need quantization
 from deep_translator import GoogleTranslator
 from deep_translator.exceptions import InvalidSourceOrTargetLanguage
@@ -22,125 +25,164 @@ from deep_translator.exceptions import InvalidSourceOrTargetLanguage
 app = FastAPI()
 # --- Hugging Face Model Setup (Local) ---
-# Model name for Gemma 2B Instruction-Tuned
-# This version is trained to follow instructions, ideal for your task.
-model_name = "google/gemma-2b-it"
 tokenizer = None
 model = None
-# Function to load the model and tokenizer
 def load_model():
     global tokenizer, model
-    print(f"Loading model: {model_name}...")
-    # Load tokenizer
-    # trust_remote_code=True might be needed for some newer models/features,
-    # but standard Gemma usually works without it. Let's omit it for security unless necessary.
     tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Load model - Gemma can be loaded in float16 to save RAM
-    # On CPU, float16 performance can vary, but it reduces memory bandwidth
-    # which can sometimes help. 16GB RAM is plenty for Gemma 2B float16 (~2GB).
-    # We don't need quantization (load_in_8bit/4bit) for Gemma 2B with 16GB RAM,
-    # but it's an option for larger models or less RAM.
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.float16, # Use float16 precision
-        # device_map="auto" # Not strictly needed for single CPU inference
     )
-    # model.to("cpu") # Explicitly ensure it's on CPU, although from_pretrained on CPU does this.
-    print(f"Model {model_name} loaded successfully.")
-# Load the model when the app starts
 @app.on_event("startup")
 async def startup_event():
     load_model()
-# --- Image Captioning (External API - Keep) ---
-# Keep this as it is, it uses an external service
-def generate_image_caption(image_data):
-    payload = {"data": ["data:image/jpeg;base64," + base64.b64encode(image_data).decode('utf-8')]}
-    # Use the correct URL for the captioning API. This is the one from your original code.
-    # Ensure it's stable or replace if needed.
-    response = requests.post("https://makhinur-image-to-text-salesforce-blip-image-cap-c0a9076.hf.space/run/predict", json=payload)
-    if response.status_code == 200:
-        try:
-            result = response.json()
-            caption = result.get("data", ["Error: Unexpected API response format"])[0]
-            return caption
-        except Exception as e:
-            return f"Error: Failed to parse caption API response: {e}"
-    else:
-        return f"Error: Caption API returned status code {response.status_code}: {response.text}"
-# --- Gemma Story Generation Function ---
-# Replace the old generation function with one specific to Gemma-IT
-def generate_story_gemma(prompt_text: str, max_new_tokens: int = 300, temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50) -> str:
     """
-    Generates text using the loaded Gemma model.
-    Applies the Gemma-IT chat template to the prompt.
     """
     if tokenizer is None or model is None:
-        raise RuntimeError("Model and tokenizer not loaded. App startup failed?")
-    # Gemma-IT uses a specific chat template. We wrap the user's prompt in it.
     messages = [
         {"role": "user", "content": prompt_text}
-        # You could add a system prompt here if desired, but Gemma-IT
-        # often works well with a detailed user prompt.
     ]
-    # Apply the chat template. This adds the necessary special tokens
-    # and formatting for the model to understand the instruction.
-    # `add_generation_prompt=True` adds the token that signals the model
-    # should start generating its response.
-    input_text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False, # Keep as string for encoding later
-        add_generation_prompt=True # Add the assistant turn prompt
-    )
-    # Encode the templated prompt
-    # Max length should consider the prompt length + generated length
-    # Max input context for Gemma is 8192 tokens, but keeping prompt shorter is better for CPU
-    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=1024) # Using a reasonable max_length for input
-    # Ensure inputs are on the correct device (CPU by default)
-    # inputs = {k: v.to(model.device) for k, v in inputs.items()} # Redundant on CPU
-    # Generate text
-    # The generate method returns the input_ids plus the generated tokens
     generate_ids = model.generate(
         inputs.input_ids,
         max_new_tokens=max_new_tokens,
-        do_sample=True, # Set to True for creative text generation
         temperature=temperature,
         top_p=top_p,
         top_k=top_k,
-        pad_token_id=tokenizer.pad_token_id, # Use the pad token during generation
-        # Gemma's EOS token is handled by default generate logic often
-        # eos_token_id=tokenizer.eos_token_id
     )
-    # Decode the generated text.
-    # We slice generate_ids to exclude the input prompt tokens, only decoding the new ones.
-    # The slicing [0, inputs.input_ids.shape[-1]:] selects the generated part for the first (and only) item in the batch
-    # The `skip_special_tokens=True` removes special tokens like <start_of_turn>, <end_of_turn>, <eos>
     generated_text = tokenizer.decode(generate_ids[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
-    # Gemma responses might sometimes include extra whitespace or turn markers if decoding is not perfect.
-    # Further cleanup might be needed depending on the exact output format, but skip_special_tokens helps.
-    # We can also remove leading/trailing whitespace.
     return generated_text.strip()
 # --- FastAPI Endpoint ---
 @app.post("/generate-story/")
 async def generate_story_endpoint(image_file: UploadFile = File(...), language: str = Form(...)):
-    image_data = await image_file.read()
-    # Choose a random theme for the story prompt
     story_theme = random.choice([
         'an adventurous journey',
         'a mysterious encounter',
@@ -154,35 +196,33 @@ async def generate_story_endpoint(image_file: UploadFile = File(...), language:
         'a journey into the unknown'
     ])
-    # Get image caption
-    caption = generate_image_caption(image_data)
     if caption.startswith("Error"):
         print(f"Caption generation failed: {caption}")
         raise HTTPException(status_code=500, detail=caption)
-    # Construct the detailed prompt for Gemma-IT.
-    # Instruct it clearly to write a story based on the theme and incorporating the caption.
     prompt_text = f"Write an attractive story of around 300 words about {story_theme}. Incorporate the following details from an image description into the story: {caption}\n\nStory:"
-    # Generate the story using the local Gemma model
     try:
-        story = generate_story_gemma(
             prompt_text,
-            max_new_tokens=300, # Generate up to 300 new tokens
-            temperature=0.7,    # Controls randomness (higher = more random)
-            top_p=0.9,          # Controls diversity (nucleus sampling)
-            top_k=50            # Controls diversity (top-k sampling)
         )
-        # Basic cleanup: Sometimes models might start with whitespace or unwanted characters
         story = story.strip()
     except Exception as e:
-        print(f"Story generation failed: {e}") # Log generation errors
-        # Provide more detail in the HTTP exception for debugging
         raise HTTPException(status_code=500, detail=f"Story generation failed: {e}. Please check Space logs for details.")
-    # Translate the story if the target language is not English
     if language.lower() != "english":
         try:
             translator = GoogleTranslator(source='english', target=language.lower())
@@ -190,7 +230,6 @@ async def generate_story_endpoint(image_file: UploadFile = File(...), language:
             if translated_story is None:
                  print(f"Translation returned None for language: {language}")
-                 # Return English story with a warning
                  return {"story": story + "\n\n(Note: Automatic translation to your requested language failed.)"}
             story = translated_story
@@ -199,21 +238,17 @@ async def generate_story_endpoint(image_file: UploadFile = File(...), language:
              print(f"Invalid target language requested: {language}")
              raise HTTPException(status_code=400, detail=f"Invalid target language: {language}")
         except Exception as e:
-             print(f"Translation failed for language {language}: {e}") # Log translation errors
              raise HTTPException(status_code=500, detail=f"Translation failed: {e}")
-    # Return the generated (and potentially translated) story
     return {"story": story}
-# --- Optional: Serve a simple HTML form for testing (Needs templates dir and index.html) ---
 # from fastapi import Request
 # from fastapi.templating import Jinja2Templates
 # from fastapi.staticfiles import StaticFiles
 # templates = Jinja2Templates(directory="templates")
 # app.mount("/static", StaticFiles(directory="static"), name="static")
 # @app.get("/", response_class=HTMLResponse)
 # async def read_root(request: Request):
 #     return templates.TemplateResponse("index.html", {"request": request})

 # from fastapi.templating import Jinja2Templates
 # from fastapi.responses import FileResponse
+# Removed 'requests' as we'll primarily use gradio_client for captioning
+# import requests
+import base64 # Still useful if you need base64 for anything else
 import os
 import random
 # Import necessary classes from transformers
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Import the Gradio Client
+from gradio_client import Client
 from deep_translator import GoogleTranslator
 from deep_translator.exceptions import InvalidSourceOrTargetLanguage
 app = FastAPI()
 # --- Hugging Face Model Setup (Local) ---
+# Model name for TinyLlama 1.1B Chat (instruction-tuned version)
+# Or use "google/gemma-2b-it" if you got access and prefer its quality
+model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = None
 model = None
+# Global Gradio Client for Captioning
+caption_client = None
+# The Space URL for the captioning API
+CAPTION_SPACE_URL = "Makhinur/Image-to-Text-Salesforce-blip-image-captioning-base"
+# Function to load the language model and tokenizer
 def load_model():
     global tokenizer, model
+    print(f"Loading language model: {model_name}...")
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    if tokenizer.pad_token is None:
+         tokenizer.pad_token = tokenizer.eos_token
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        torch_dtype=torch.float16, # Use float16 precision to save RAM
+        # device_map="auto" # Not needed for single CPU
     )
+    # model.to("cpu") # Explicitly move if needed, though default is CPU
+    print(f"Language model {model_name} loaded successfully.")
+# Function to initialize the Gradio Client
+def initialize_caption_client():
+    global caption_client
+    print(f"Initializing Gradio client for {CAPTION_SPACE_URL}...")
+    try:
+        caption_client = Client(CAPTION_SPACE_URL)
+        print("Gradio client initialized successfully.")
+    except Exception as e:
+        print(f"Error initializing Gradio client: {e}")
+        # Depending on your needs, you might raise an exception here
+        # or handle it gracefully later if caption_client is None.
+        caption_client = None # Ensure it's None if initialization failed
+# Load models and initialize clients when the app starts
 @app.on_event("startup")
 async def startup_event():
     load_model()
+    initialize_caption_client()
+# --- Image Captioning (Using gradio_client) ---
+# Modify to accept UploadFile directly and use the gradio_client
+def generate_image_caption(image_file: UploadFile):
+    """
+    Generates a caption for the uploaded image using the external Gradio Space API.
+    """
+    if caption_client is None:
+        # Handle cases where client initialization failed
+        error_msg = "Gradio caption client not initialized. Cannot generate caption."
+        print(error_msg)
+        return f"Error: {error_msg}"
+    try:
+        print(f"Calling caption API /predict for file {image_file.filename}...")
+        # The gradio_client can take a file-like object directly.
+        # image_file.file is the actual SpooledTemporaryFile object.
+        caption = caption_client.predict(img=image_file.file, api_name="/predict")
+        print(f"Caption generated: {caption}")
+        return caption
+    except Exception as e:
+        # Catch potential exceptions from gradio_client.predict (network, API error, etc.)
+        print(f"Error during caption generation API call: {e}")
+        return f"Error: Unable to generate caption from API. Details: {e}"
+# --- Language Model Story Generation Function ---
+# Use the appropriate function based on your chosen model (TinyLlama or Gemma)
+# This function name should match the model_name you've chosen.
+def generate_story_tinyllama(prompt_text: str, max_new_tokens: int = 300, temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50) -> str:
     """
+    Generates text using the loaded TinyLlama model.
+    Applies the chat template.
     """
     if tokenizer is None or model is None:
+        raise RuntimeError("Language model and tokenizer not loaded. App startup failed?")
+    # TinyLlama-Chat uses a chat template similar to Llama/Gemma
     messages = [
         {"role": "user", "content": prompt_text}
     ]
+    try:
+         input_text = tokenizer.apply_chat_template(
+             messages,
+             tokenize=False,
+             add_generation_prompt=True
+         )
+    except AttributeError: # Fallback for models without apply_chat_template
+         print("Warning: apply_chat_template not found. Using basic prompt formatting.")
+         input_text = f"<s>[INST] {prompt_text} [/INST]"
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=1024)
     generate_ids = model.generate(
         inputs.input_ids,
         max_new_tokens=max_new_tokens,
+        do_sample=True,
         temperature=temperature,
         top_p=top_p,
         top_k=top_k,
+        pad_token_id=tokenizer.pad_token_id,
     )
     generated_text = tokenizer.decode(generate_ids[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
     return generated_text.strip()
+# If using Gemma 2B instead of TinyLlama, use this function:
+# def generate_story_gemma(prompt_text: str, max_new_tokens: int = 300, temperature: float = 0.7, top_p: float = 0.9, top_k: int = 50) -> str:
+#     """
+#     Generates text using the loaded Gemma model.
+#     Applies the Gemma-IT chat template.
+#     """
+#     if tokenizer is None or model is None:
+#         raise RuntimeError("Language model and tokenizer not loaded. App startup failed?")
+#     messages = [
+#         {"role": "user", "content": prompt_text}
+#     ]
+#     input_text = tokenizer.apply_chat_template(
+#         messages,
+#         tokenize=False,
+#         add_generation_prompt=True
+#     )
+#     inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True, max_length=1024)
+#     generate_ids = model.generate(
+#         inputs.input_ids,
+#         max_new_tokens=max_new_tokens,
+#         do_sample=True,
+#         temperature=temperature,
+#         top_p=top_p,
+#         top_k=top_k,
+#         pad_token_id=tokenizer.pad_token_id,
+#     )
+#     generated_text = tokenizer.decode(generate_ids[0, inputs.input_ids.shape[-1]:], skip_special_tokens=True)
+#     return generated_text.strip()
 # --- FastAPI Endpoint ---
 @app.post("/generate-story/")
 async def generate_story_endpoint(image_file: UploadFile = File(...), language: str = Form(...)):
+    # No longer need to read the image data fully here
+    # image_data = await image_file.read()
     story_theme = random.choice([
         'an adventurous journey',
         'a mysterious encounter',
         'a journey into the unknown'
     ])
+    # Get image caption using the gradio_client function
+    # Pass the UploadFile object directly
+    caption = generate_image_caption(image_file)
     if caption.startswith("Error"):
         print(f"Caption generation failed: {caption}")
         raise HTTPException(status_code=500, detail=caption)
+    # Construct the prompt for the language model
     prompt_text = f"Write an attractive story of around 300 words about {story_theme}. Incorporate the following details from an image description into the story: {caption}\n\nStory:"
+    # Generate the story using the appropriate function (adjust if using Gemma)
     try:
+        story = generate_story_tinyllama( # <--- Make sure this matches your chosen model function
             prompt_text,
+            max_new_tokens=300,
+            temperature=0.7,
+            top_p=0.9,
+            top_k=50
         )
         story = story.strip()
     except Exception as e:
+        print(f"Story generation failed: {e}")
         raise HTTPException(status_code=500, detail=f"Story generation failed: {e}. Please check Space logs for details.")
+    # Translate the story
     if language.lower() != "english":
         try:
             translator = GoogleTranslator(source='english', target=language.lower())
             if translated_story is None:
                  print(f"Translation returned None for language: {language}")
                  return {"story": story + "\n\n(Note: Automatic translation to your requested language failed.)"}
             story = translated_story
              print(f"Invalid target language requested: {language}")
              raise HTTPException(status_code=400, detail=f"Invalid target language: {language}")
         except Exception as e:
+             print(f"Translation failed for language {language}: {e}")
              raise HTTPException(status_code=500, detail=f"Translation failed: {e}")
     return {"story": story}
+# --- Optional: HTML form for testing (Needs templates dir and index.html) ---
 # from fastapi import Request
 # from fastapi.templating import Jinja2Templates
 # from fastapi.staticfiles import StaticFiles
 # templates = Jinja2Templates(directory="templates")
 # app.mount("/static", StaticFiles(directory="static"), name="static")
 # @app.get("/", response_class=HTMLResponse)
 # async def read_root(request: Request):
 #     return templates.TemplateResponse("index.html", {"request": request})