Spaces:

cong182
/

firstAI

Sleeping

App Files Files Community

ndc8 commited on Aug 11

Commit

994c0b4

1 Parent(s): 4b4e9ed

d

Browse files

Files changed (3) hide show

backend_service.py +12 -2
requirements.txt +3 -0
verify_config.py +40 -0

backend_service.py CHANGED Viewed

@@ -90,7 +90,7 @@ class ChatMessage(BaseModel):
         return v
 class ChatCompletionRequest(BaseModel):
-    model: str = Field(default_factory=lambda: os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it"), description="The model to use for completion")
     messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
     max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
     temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
@@ -139,7 +139,14 @@ class CompletionRequest(BaseModel):
 # Model can be configured via environment variable - defaults to Gemma 3n (transformers format)
-current_model = os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it")
 vision_model = os.environ.get("VISION_MODEL", "Salesforce/blip-image-captioning-base")
 # Transformers model support
@@ -194,11 +201,13 @@ async def lifespan(app: FastAPI):
     """Application lifespan manager for startup and shutdown events"""
     global processor, model, image_text_pipeline, current_model
     logger.info("🚀 Starting AI Backend Service (Hugging Face Spaces mode)...")
     try:
         logger.info(f"📥 Loading model with transformers: {current_model}")
         # For Gemma 3n models, use the specific classes
         if "gemma-3n" in current_model.lower():
             processor = AutoProcessor.from_pretrained(current_model)
             model = Gemma3nForConditionalGeneration.from_pretrained(
                 current_model,
@@ -208,6 +217,7 @@ async def lifespan(app: FastAPI):
             ).eval()
         else:
             # Fallback for other models
             processor = AutoTokenizer.from_pretrained(current_model)
             model = AutoModelForCausalLM.from_pretrained(
                 current_model,

         return v
 class ChatCompletionRequest(BaseModel):
+    model: str = Field(default_factory=lambda: "google/gemma-3n-E4B-it", description="The model to use for completion")
     messages: List[ChatMessage] = Field(..., description="List of messages in the conversation")
     max_tokens: Optional[int] = Field(default=512, ge=1, le=2048, description="Maximum tokens to generate")
     temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0, description="Sampling temperature")
 # Model can be configured via environment variable - defaults to Gemma 3n (transformers format)
+# Force the correct model for Hugging Face Spaces deployment
+ai_model_env = os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it")
+# Override GGUF models to use transformers-compatible version
+if "GGUF" in ai_model_env:
+    current_model = "google/gemma-3n-E4B-it"
+    print(f"🔄 Overriding GGUF model {ai_model_env} with transformers-compatible model: {current_model}")
+else:
+    current_model = ai_model_env
 vision_model = os.environ.get("VISION_MODEL", "Salesforce/blip-image-captioning-base")
 # Transformers model support
     """Application lifespan manager for startup and shutdown events"""
     global processor, model, image_text_pipeline, current_model
     logger.info("🚀 Starting AI Backend Service (Hugging Face Spaces mode)...")
+    logger.info(f"🔧 Using model: {current_model}")
     try:
         logger.info(f"📥 Loading model with transformers: {current_model}")
         # For Gemma 3n models, use the specific classes
         if "gemma-3n" in current_model.lower():
+            logger.info("🔍 Detected Gemma 3n model - using specialized classes")
             processor = AutoProcessor.from_pretrained(current_model)
             model = Gemma3nForConditionalGeneration.from_pretrained(
                 current_model,
             ).eval()
         else:
             # Fallback for other models
+            logger.info("🔍 Using standard transformers classes")
             processor = AutoTokenizer.from_pretrained(current_model)
             model = AutoModelForCausalLM.from_pretrained(
                 current_model,

requirements.txt CHANGED Viewed

@@ -17,5 +17,8 @@ sentencepiece>=0.2.0
 tokenizers
 regex
 # Optional: gradio for demo UI
 # gradio

 tokenizers
 regex
+# Required for Gemma 3n vision components
+timm
 # Optional: gradio for demo UI
 # gradio

verify_config.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/usr/bin/env python3
+"""
+Verification script to show current model configuration
+"""
+import os
+def show_model_config():
+    """Show what model will be used"""
+    print("🔍 Model Configuration Analysis")
+    print("=" * 50)
+    # Check environment variable
+    ai_model_env = os.environ.get("AI_MODEL", "google/gemma-3n-E4B-it")
+    print(f"📝 Environment variable AI_MODEL: {ai_model_env}")
+    # Apply override logic
+    if "GGUF" in ai_model_env:
+        current_model = "google/gemma-3n-E4B-it"
+        print(f"🔄 OVERRIDE: GGUF model detected, using: {current_model}")
+        print(f"   Original: {ai_model_env}")
+        print(f"   Fixed to: {current_model}")
+    else:
+        current_model = ai_model_env
+        print(f"✅ Using: {current_model}")
+    print(f"\n🎯 Final model that will be loaded: {current_model}")
+    # Check if it's Gemma 3n
+    is_gemma_3n = "gemma-3n" in current_model.lower()
+    print(f"🤖 Is Gemma 3n model: {is_gemma_3n}")
+    if is_gemma_3n:
+        print("📚 Will use: AutoProcessor + Gemma3nForConditionalGeneration")
+    else:
+        print("📚 Will use: AutoTokenizer + AutoModelForCausalLM")
+    return current_model
+if __name__ == "__main__":
+    show_model_config()