Spaces:

SpiceyToad
/

demo-falc-api

Sleeping

SpiceyToad commited on Dec 5, 2024

Commit

81fe8c1

•

1 Parent(s): 184700f

Upload 2 files

Updated Dockerfile

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,17 +1,24 @@
-# Use a lightweight PyTorch image with GPU support
-FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
-# Set the working directory
-WORKDIR /app
-# Copy the application files into the container
-COPY . /app
-# Install required Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
-# Expose the FastAPI port
-EXPOSE 7860
-# Command to run the FastAPI application
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Use a lightweight PyTorch image with GPU support
+FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
+# Set the working directory
+WORKDIR /app
+# Set writable directories for Hugging Face cache
+ENV TRANSFORMERS_CACHE=/app/cache
+ENV HF_HOME=/app/cache
+# Create the cache directory
+RUN mkdir -p /app/cache
+# Copy the application files into the container
+COPY . /app
+# Install required Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose the FastAPI port
+EXPOSE 7860
+# Command to run the FastAPI application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,26 +1,26 @@
-from fastapi import FastAPI, Request
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import os
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Hugging Face API token
-app = FastAPI()
-# Load Falcon 7B
-MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
-)
-@app.post("/generate")
-async def generate_text(request: Request):
-    data = await request.json()
-    prompt = data.get("prompt", "")
-    max_length = data.get("max_length", 50)
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(inputs["input_ids"], max_length=max_length)
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return {"generated_text": response}

+from fastapi import FastAPI, Request
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import os
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Hugging Face API token
+app = FastAPI()
+# Load Falcon 7B
+MODEL_NAME = "SpiceyToad/demo-falc"  # Replace with your model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
+)
+@app.post("/generate")
+async def generate_text(request: Request):
+    data = await request.json()
+    prompt = data.get("prompt", "")
+    max_length = data.get("max_length", 50)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(inputs["input_ids"], max_length=max_length)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return {"generated_text": response}