Spaces:

novamysticX
/

mistrial-props

Runtime error

novamysticX commited on 27 days ago

Commit

fca0532

•

1 Parent(s): 45ca055

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,17 +10,17 @@ logger = logging.getLogger(__name__)
 app = FastAPI(title="SQL Coder API")
-# Ensure cache directory exists
-cache_dir = os.getenv('TRANSFORMERS_CACHE', '/home/user/.cache/huggingface')
-os.makedirs(cache_dir, exist_ok=True)
 # Initialize pipeline
 try:
-    pipe = pipeline("text-generation",
-                   model="defog/llama-3-sqlcoder-8b",
-                   device_map="auto",
-                   torch_dtype="auto",
-                   cache_dir=cache_dir)
     logger.info("Pipeline initialized successfully")
 except Exception as e:
     logger.error(f"Error initializing pipeline: {str(e)}")
@@ -32,7 +32,7 @@ class ChatMessage(BaseModel):
 class QueryRequest(BaseModel):
     messages: list[ChatMessage]
-    max_length: int = 1024
     temperature: float = 0.7
 class QueryResponse(BaseModel):
@@ -47,10 +47,11 @@ async def generate(request: QueryRequest):
         # Generate response using pipeline
         response = pipe(
             formatted_prompt,
-            max_length=request.max_length,
             temperature=request.temperature,
             do_sample=True,
-            num_return_sequences=1
         )
         # Extract generated text

 app = FastAPI(title="SQL Coder API")
+# Set environment variable for cache directory
+os.environ['TRANSFORMERS_CACHE'] = '/home/user/.cache/huggingface'
 # Initialize pipeline
 try:
+    pipe = pipeline(
+        "text-generation",
+        model="defog/llama-3-sqlcoder-8b",
+        device_map="auto",
+        model_kwargs={"torch_dtype": "auto"}
+    )
     logger.info("Pipeline initialized successfully")
 except Exception as e:
     logger.error(f"Error initializing pipeline: {str(e)}")
 class QueryRequest(BaseModel):
     messages: list[ChatMessage]
+    max_new_tokens: int = 1024
     temperature: float = 0.7
 class QueryResponse(BaseModel):
         # Generate response using pipeline
         response = pipe(
             formatted_prompt,
+            max_new_tokens=request.max_new_tokens,
             temperature=request.temperature,
             do_sample=True,
+            num_return_sequences=1,
+            pad_token_id=pipe.tokenizer.eos_token_id
         )
         # Extract generated text