Spaces:

aledraa
/

generate_api

Sleeping

App Files Files Community

aledraa commited on Jun 23, 2025

Commit

f7cc5b0

verified ·

1 Parent(s): 211f8ae

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -166

app.py CHANGED Viewed

@@ -1,185 +1,77 @@
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-import json
-import random
-import os
-from typing import List, Optional
-app = FastAPI(title="Qwen Data Generator API")
-# Global variables for model and tokenizer
-model = None
-tokenizer = None
 model_name = "Qwen/Qwen2.5-3B-Instruct"
-def load_model():
-    """Load model and tokenizer with proper error handling"""
-    global model, tokenizer
-    try:
-        print("Loading model...")
-        print(f"Cache directory: {os.environ.get('HF_HOME', 'Not set')}")
-        # Load tokenizer first (smaller download)
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            trust_remote_code=True
-        )
-        print("Tokenizer loaded successfully!")
-        # Load model with specific configurations for better compatibility
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float16,  # Use float16 to save memory
-            device_map="auto",
-            trust_remote_code=True,
-            low_cpu_mem_usage=True
-        )
-        print("Model loaded successfully!")
-    except Exception as e:
-        print(f"Error loading model: {str(e)}")
-        raise e
-# Load model on startup
-load_model()
 class GenerationRequest(BaseModel):
-    llm_commands: List[str]
     batch_size: int = 50
-    seed: Optional[int] = None
 class GenerationResponse(BaseModel):
-    success: bool
-    data: List[List[str]]
-    error: Optional[str] = None
-def generate_data_prompt(llm_commands: List[str], batch_size: int) -> str:
-    columns_description = "\n".join([
-        f"Column {i+1}: {cmd}" for i, cmd in enumerate(llm_commands)
-    ])
-    return f"""Generate {batch_size} unique random rows of data based on these specifications:
-{columns_description}
-Requirements:
-- Each row must be different and realistic
-- Return ONLY a JSON array format: [["value1","value2"],["value1","value2"],...]
-- No additional text, explanations, or formatting
-- Values should be diverse and not repetitive
-JSON Array:"""
-@app.post("/generate", response_model=GenerationResponse)
-async def generate_data(request: GenerationRequest):
-    global model, tokenizer
-    if model is None or tokenizer is None:
-        raise HTTPException(status_code=503, detail="Model not loaded")
     try:
-        # Set seed for reproducibility if provided
-        if request.seed:
-            torch.manual_seed(request.seed)
-            random.seed(request.seed)
-        # Build prompt
-        prompt = generate_data_prompt(request.llm_commands, request.batch_size)
-        # Prepare messages for chat template
-        messages = [
-            {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant that generates structured data."},
-            {"role": "user", "content": prompt}
-        ]
-        # Apply chat template
-        text = tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        # Tokenize and generate
-        model_inputs = tokenizer([text], return_tensors="pt")
-        # Move inputs to same device as model
-        if torch.cuda.is_available():
-            model_inputs = model_inputs.to('cuda')
-        with torch.no_grad():
-            generated_ids = model.generate(
-                **model_inputs,
-                max_new_tokens=2048,
-                temperature=0.8,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id
-            )
-        # Decode response
-        generated_ids = [
-            output_ids[len(input_ids):]
-            for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-        ]
-        response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        # Parse JSON from response
-        try:
-            # Find JSON array in the response
-            start_idx = response_text.find('[')
-            end_idx = response_text.rfind(']') + 1
-            if start_idx == -1 or end_idx == 0:
-                raise ValueError("No JSON array found in response")
-            json_str = response_text[start_idx:end_idx]
-            parsed_data = json.loads(json_str)
-            # Validate data structure
-            if not isinstance(parsed_data, list):
-                raise ValueError("Response is not a list")
-            # Filter and validate rows
-            valid_rows = []
-            expected_columns = len(request.llm_commands)
-            for row in parsed_data:
-                if isinstance(row, list) and len(row) == expected_columns:
-                    # Convert all values to strings
-                    valid_rows.append([str(cell) for cell in row])
-            return GenerationResponse(
-                success=True,
-                data=valid_rows
-            )
-        except json.JSONDecodeError as e:
-            return GenerationResponse(
-                success=False,
-                data=[],
-                error=f"Failed to parse JSON: {str(e)}"
-            )
-        except Exception as e:
-            return GenerationResponse(
-                success=False,
-                data=[],
-                error=f"Data processing error: {str(e)}"
-            )
     except Exception as e:
-        return GenerationResponse(
-            success=False,
-            data=[],
-            error=f"Generation error: {str(e)}"
-        )
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy", "model": model_name}
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI
 from pydantic import BaseModel
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- App and Model Loading ---
+app = FastAPI()
 model_name = "Qwen/Qwen2.5-3B-Instruct"
+print("Loading model...")
+# To leverage a GPU on Hugging Face Spaces, device_map="auto" is key
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+print("Model loaded successfully.")
+# --- API Request and Response Models ---
 class GenerationRequest(BaseModel):
+    llm_commands: list[str]
     batch_size: int = 50
 class GenerationResponse(BaseModel):
+    data: list
+# --- API Endpoint ---
+@app.post("/generate", response_model=GenerationResponse)
+async def generate_data(request: GenerationRequest):
+    prompt = f"""
+You are a data generator. Your task is to generate {request.batch_size} random, non-similar rows of data based on the following commands.
+Each command corresponds to a column.
+Commands: {request.llm_commands}
+Return the data as a valid JSON array of arrays, where each inner array represents a row.
+For example, for the commands ["an age between 20 and 30", "a random city in California"], the output should look like:
+[[25, "Los Angeles"], [22, "San Francisco"]]
+Do not include any extra text, explanations, or markdown formatting in your response. Only output the raw JSON array.
+"""
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant that generates structured data."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=2048 # Increased to handle larger batches
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     try:
+        # The model might still add extra text, so we clean it
+        json_response = torch.tensor(eval(response_text.strip()))
+        return {"data": json_response.tolist()}
     except Exception as e:
+        print(f"Error parsing model output: {e}")
+        print(f"Raw output was: {response_text}")
+        # Return empty on failure to prevent crashing the Inngest job
+        return {"data": []}
+@app.get("/")
+def read_root():
+    return {"status": "ok"}