Spaces:

abhisheksan
/

poetica

Sleeping

App Files Files Community

abhisheksan commited on Nov 9, 2024

Commit

11be554

1 Parent(s): 2107ef1

Refactor poem generation logic; implement lazy loading for model and update request/response models

Browse files

Files changed (2) hide show

main.py +46 -39
models/gpt4all-lora-quantized-ggml.bin +0 -3

main.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from pyllamacpp.model import Model
-import os
-from typing import Optional
 # Initialize FastAPI app
 app = FastAPI(title="Poetry Generator")
@@ -17,58 +16,66 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Model path - adjust this to your model location
-MODEL_PATH = "models/gpt4all-lora-quantized-ggml.bin"
-# Initialize the model at startup
 model = None
 class PoetryRequest(BaseModel):
-    theme: str
-    style: Optional[str] = "free verse"
-    length: Optional[int] = 100
 class PoetryResponse(BaseModel):
     poem: str
 @app.on_event("startup")
 async def startup_event():
-    global model
-    if not os.path.exists(MODEL_PATH):
-        raise Exception(f"Model file not found at {MODEL_PATH}")
-    try:
-        model = Model(
-            model_path=MODEL_PATH,
-        )
-    except Exception as e:
-        raise Exception(f"Failed to load model: {str(e)}")
 @app.post("/generate_poem", response_model=PoetryResponse)
 async def generate_poem(request: PoetryRequest):
-    if model is None:
-        raise HTTPException(status_code=500, detail="Model not initialized")
     try:
-        # Craft the prompt
-        prompt = f"""Write a {request.style} poem about {request.theme}.
-        Keep it approximately {request.length} characters long.
-        Make it creative and meaningful.\n\nPoem:"""
         # Generate the poem
-        generated_text = model.generate(
-            prompt,
-            n_predict=request.length,
-            temp=0.7,
-            top_k=40,
-            top_p=0.9,
-            repeat_penalty=1.1,
-            n_batch=8  # Reduced batch size for lower memory usage
         )
-        # Clean up the generated text
-        poem = generated_text.replace(prompt, "").strip()
-        return PoetryResponse(poem=poem)
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error generating poem: {str(e)}")

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from transformers import AutoModelForCausalLM
+import time
 # Initialize FastAPI app
 app = FastAPI(title="Poetry Generator")
     allow_headers=["*"],
 )
+# Initialize the model (lazy loading)
 model = None
+def load_model():
+    global model
+    if model is None:
+        # Load a quantized GGUF model
+        # You can download models from huggingface.co
+        # Example: GPT2 or Llama-2-7b-chat.Q4_K_M.gguf
+        model = AutoModelForCausalLM.from_pretrained(
+            "TheBloke/Llama-2-7B-Chat-GGUF",
+            model_file="llama-2-7b-chat.q4_K_M.gguf",
+            model_type="llama",
+            max_new_tokens=256,
+            context_length=512,
+            gpu_layers=0  # CPU only
+        )
 class PoetryRequest(BaseModel):
+    prompt: str
+    style: str = "free verse"
+    max_length: int = 200
 class PoetryResponse(BaseModel):
     poem: str
+    generation_time: float
 @app.on_event("startup")
 async def startup_event():
+    load_model()
 @app.post("/generate_poem", response_model=PoetryResponse)
 async def generate_poem(request: PoetryRequest):
     try:
+        start_time = time.time()
+        # Construct the prompt
+        full_prompt = f"""Write a {request.style} poem about {request.prompt}.
+        Make it creative and meaningful. The poem should be:
+        """
         # Generate the poem
+        output = model(
+            full_prompt,
+            max_new_tokens=request.max_length,
+            temperature=0.7,
+            top_p=0.95,
+            repeat_penalty=1.2
         )
+        # Clean up the output
+        poem = output.strip()
+        generation_time = time.time() - start_time
+        return PoetryResponse(
+            poem=poem,
+            generation_time=generation_time
+        )
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

models/gpt4all-lora-quantized-ggml.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d9af98b0350fc8af7211097e816ffbb8bae9a18f8aea8c50ff94a99bd6cb2c7c
-size 4212860154