SpiceyToad commited on
Commit
81fe8c1
1 Parent(s): 184700f

Upload 2 files

Browse files

Updated Dockerfile

Files changed (2) hide show
  1. Dockerfile +24 -17
  2. app.py +26 -26
Dockerfile CHANGED
@@ -1,17 +1,24 @@
1
- # Use a lightweight PyTorch image with GPU support
2
- FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
3
-
4
- # Set the working directory
5
- WORKDIR /app
6
-
7
- # Copy the application files into the container
8
- COPY . /app
9
-
10
- # Install required Python dependencies
11
- RUN pip install --no-cache-dir -r requirements.txt
12
-
13
- # Expose the FastAPI port
14
- EXPOSE 7860
15
-
16
- # Command to run the FastAPI application
17
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
1
+ # Use a lightweight PyTorch image with GPU support
2
+ FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
3
+
4
+ # Set the working directory
5
+ WORKDIR /app
6
+
7
+ # Set writable directories for Hugging Face cache
8
+ ENV TRANSFORMERS_CACHE=/app/cache
9
+ ENV HF_HOME=/app/cache
10
+
11
+ # Create the cache directory
12
+ RUN mkdir -p /app/cache
13
+
14
+ # Copy the application files into the container
15
+ COPY . /app
16
+
17
+ # Install required Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Expose the FastAPI port
21
+ EXPOSE 7860
22
+
23
+ # Command to run the FastAPI application
24
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,26 +1,26 @@
1
- from fastapi import FastAPI, Request
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import os
5
-
6
- HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Hugging Face API token
7
-
8
- app = FastAPI()
9
-
10
- # Load Falcon 7B
11
- MODEL_NAME = "SpiceyToad/demo-falc" # Replace with your model
12
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
13
- model = AutoModelForCausalLM.from_pretrained(
14
- MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
15
- )
16
-
17
- @app.post("/generate")
18
- async def generate_text(request: Request):
19
- data = await request.json()
20
- prompt = data.get("prompt", "")
21
- max_length = data.get("max_length", 50)
22
-
23
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
24
- outputs = model.generate(inputs["input_ids"], max_length=max_length)
25
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
- return {"generated_text": response}
 
1
+ from fastapi import FastAPI, Request
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ import os
5
+
6
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Hugging Face API token
7
+
8
+ app = FastAPI()
9
+
10
+ # Load Falcon 7B
11
+ MODEL_NAME = "SpiceyToad/demo-falc" # Replace with your model
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_API_TOKEN)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, token=HF_API_TOKEN
15
+ )
16
+
17
+ @app.post("/generate")
18
+ async def generate_text(request: Request):
19
+ data = await request.json()
20
+ prompt = data.get("prompt", "")
21
+ max_length = data.get("max_length", 50)
22
+
23
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
24
+ outputs = model.generate(inputs["input_ids"], max_length=max_length)
25
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
+ return {"generated_text": response}