Spaces:

abhisheksan
/

poetica

Sleeping

App Files Files Community

abhisheksan commited on 16 days ago

Commit

67dd542

•

1 Parent(s): 11be554

Enhance Poetry Generator API; implement health check endpoint, improve model loading with logging, and update request/response models

Browse files

Files changed (4) hide show

app/config.py +16 -0
download_model.py +30 -0
main.py +130 -46
requirements.txt +1 -1

app/config.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+from pathlib import Path
+# Base project directory
+BASE_DIR = Path(__file__).resolve().parent.parent
+# Model settings
+MODEL_DIR = BASE_DIR / "models"
+MODEL_NAME = "llama-2-7b-chat.q4_K_M.gguf"
+MODEL_PATH = MODEL_DIR / MODEL_NAME
+# Ensure model directory exists
+MODEL_DIR.mkdir(parents=True, exist_ok=True)
+# Model download URL
+MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"

download_model.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import requests
+from tqdm import tqdm
+from app.config import MODEL_PATH, MODEL_URL, MODEL_DIR
+import sys
+def download_model():
+    """Download the model if it doesn't exist"""
+    if MODEL_PATH.exists():
+        print(f"Model already exists at {MODEL_PATH}")
+        return
+    print(f"Downloading model to {MODEL_PATH}")
+    MODEL_DIR.mkdir(parents=True, exist_ok=True)
+    response = requests.get(MODEL_URL, stream=True)
+    total_size = int(response.headers.get('content-length', 0))
+    with open(MODEL_PATH, 'wb') as file, tqdm(
+        desc="Downloading",
+        total=total_size,
+        unit='iB',
+        unit_scale=True,
+        unit_divisor=1024,
+    ) as pbar:
+        for data in response.iter_content(chunk_size=1024):
+            size = file.write(data)
+            pbar.update(size)
+if __name__ == "__main__":
+    download_model()

main.py CHANGED Viewed

@@ -1,81 +1,165 @@
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-from transformers import AutoModelForCausalLM
 import time
-# Initialize FastAPI app
-app = FastAPI(title="Poetry Generator")
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-# Initialize the model (lazy loading)
 model = None
-def load_model():
-    global model
-    if model is None:
-        # Load a quantized GGUF model
-        # You can download models from huggingface.co
-        # Example: GPT2 or Llama-2-7b-chat.Q4_K_M.gguf
-        model = AutoModelForCausalLM.from_pretrained(
-            "TheBloke/Llama-2-7B-Chat-GGUF",
-            model_file="llama-2-7b-chat.q4_K_M.gguf",
-            model_type="llama",
-            max_new_tokens=256,
-            context_length=512,
-            gpu_layers=0  # CPU only
-        )
 class PoetryRequest(BaseModel):
-    prompt: str
-    style: str = "free verse"
-    max_length: int = 200
 class PoetryResponse(BaseModel):
     poem: str
     generation_time: float
 @app.on_event("startup")
 async def startup_event():
-    load_model()
-@app.post("/generate_poem", response_model=PoetryResponse)
 async def generate_poem(request: PoetryRequest):
     try:
         start_time = time.time()
-        # Construct the prompt
-        full_prompt = f"""Write a {request.style} poem about {request.prompt}.
-        Make it creative and meaningful. The poem should be:
-        """
-        # Generate the poem
         output = model(
             full_prompt,
             max_new_tokens=request.max_length,
-            temperature=0.7,
             top_p=0.95,
             repeat_penalty=1.2
         )
-        # Clean up the output
-        poem = output.strip()
         generation_time = time.time() - start_time
         return PoetryResponse(
-            poem=poem,
-            generation_time=generation_time
         )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

+from fastapi import FastAPI, HTTPException, status
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from ctransformers import AutoModelForCausalLM
 import time
+import logging
+from .app.config import MODEL_PATH
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="Poetry Generator API",
+    description="An API for generating poetry using a local LLM",
+    version="1.0.0"
 )
+# Global model variable
 model = None
 class PoetryRequest(BaseModel):
+    prompt: str = Field(..., description="The topic or theme for the poem", min_length=1)
+    style: str = Field(
+        default="free verse",
+        description="Style of the poem to generate"
+    )
+    max_length: int = Field(
+        default=200,
+        description="Maximum length of the generated poem",
+        ge=50,
+        le=500
+    )
+    temperature: float = Field(
+        default=0.7,
+        description="Temperature for text generation",
+        ge=0.1,
+        le=2.0
+    )
 class PoetryResponse(BaseModel):
     poem: str
     generation_time: float
+    prompt: str
+    style: str
+class ModelInfo(BaseModel):
+    status: str
+    model_name: str
+    model_path: str
+    supported_styles: List[str]
+    max_context_length: int
 @app.on_event("startup")
 async def startup_event():
+    """Initialize the model during startup"""
+    global model
+    try:
+        if not MODEL_PATH.exists():
+            raise FileNotFoundError(
+                f"Model file not found at {MODEL_PATH}. "
+                "Please run download_model.py first."
+            )
+        logger.info(f"Loading model from {MODEL_PATH}")
+        model = AutoModelForCausalLM.from_pretrained(
+            str(MODEL_PATH.parent),
+            model_file=MODEL_PATH.name,
+            model_type="llama",
+            max_new_tokens=512,
+            context_length=512,
+            gpu_layers=0  # CPU only
+        )
+        logger.info("Model loaded successfully")
+    except Exception as e:
+        logger.error(f"Failed to load model: {str(e)}")
+        raise RuntimeError("Failed to initialize model")
+@app.get(
+    "/health",
+    response_model=ModelInfo,
+    status_code=status.HTTP_200_OK,
+    tags=["Health Check"]
+)
+async def health_check():
+    """Check if the model is loaded and get basic information"""
+    if model is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Model not loaded"
+        )
+    return ModelInfo(
+        status="ready",
+        model_name="Llama-2-7B-Chat",
+        model_path=str(MODEL_PATH),
+        supported_styles=[
+            "free verse",
+            "haiku",
+            "sonnet",
+            "limerick",
+            "tanka"
+        ],
+        max_context_length=512
+    )
+@app.post(
+    "/generate",
+    response_model=PoetryResponse,
+    status_code=status.HTTP_200_OK,
+    tags=["Generation"]
+)
 async def generate_poem(request: PoetryRequest):
+    """Generate a poem based on the provided prompt and parameters"""
+    if model is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Model not loaded"
+        )
     try:
         start_time = time.time()
+        prompt_templates = {
+            "haiku": "Write a haiku about {prompt}. Follow the 5-7-5 syllable pattern:\n\n",
+            "sonnet": "Write a Shakespearean sonnet about {prompt}. Follow the traditional 14-line format with rhyme scheme ABAB CDCD EFEF GG:\n\n",
+            "limerick": "Write a limerick about {prompt}. Follow the AABBA rhyme scheme:\n\n",
+            "free verse": "Write a free verse poem about {prompt}. Make it creative and meaningful:\n\n",
+            "tanka": "Write a tanka about {prompt}. Follow the 5-7-5-7-7 syllable pattern:\n\n"
+        }
+        template = prompt_templates.get(request.style.lower(), prompt_templates["free verse"])
+        full_prompt = template.format(prompt=request.prompt)
         output = model(
             full_prompt,
             max_new_tokens=request.max_length,
+            temperature=request.temperature,
             top_p=0.95,
             repeat_penalty=1.2
         )
         generation_time = time.time() - start_time
         return PoetryResponse(
+            poem=output.strip(),
+            generation_time=generation_time,
+            prompt=request.prompt,
+            style=request.style
         )
     except Exception as e:
+        logger.error(f"Generation error: {str(e)}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to generate poem: {str(e)}"
+        )
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)

requirements.txt CHANGED Viewed

@@ -18,5 +18,5 @@ accelerate==0.27.2
 python-jose==3.3.0  # for JWT handling if you add auth later
 gunicorn==21.2.0    # for production deployment
 python-dotenv==1.0.0  # for environment variables
 pyllamacpp==2.4.0

 python-jose==3.3.0  # for JWT handling if you add auth later
 gunicorn==21.2.0    # for production deployment
 python-dotenv==1.0.0  # for environment variables
+ctransformers
 pyllamacpp==2.4.0