Dockerfile β†’ Dockerfile.hf RENAMED
@@ -1,12 +1,9 @@
1
- # Use an official CUDA runtime as a parent image
2
- FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
3
 
4
  # Set the working directory to /code
5
  WORKDIR /code
6
 
7
- # Install Python
8
- RUN apt-get update && apt-get install -y python3.10 python3-pip
9
-
10
  # Copy the current directory contents into the container at /code
11
  COPY ./requirements.txt /code/requirements.txt
12
 
@@ -18,7 +15,7 @@ RUN useradd -m -u 1000 user
18
  # Switch to the "user" user
19
  USER user
20
  # Set home to the user's home directory
21
- ENV HOME=/home/user \
22
  PATH=/home/user/.local/bin:$PATH
23
 
24
  # Set the working directory to the user's home directory
@@ -28,4 +25,4 @@ WORKDIR $HOME/app
28
  COPY --chown=user . $HOME/app
29
 
30
  # Start the FastAPI app on port 7860, the default port expected by Spaces
31
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Use the official Python 3.9 image
2
+ FROM python:3.9
3
 
4
  # Set the working directory to /code
5
  WORKDIR /code
6
 
 
 
 
7
  # Copy the current directory contents into the container at /code
8
  COPY ./requirements.txt /code/requirements.txt
9
 
 
15
  # Switch to the "user" user
16
  USER user
17
  # Set home to the user's home directory
18
+ ENV HOME=/home/user \\
19
  PATH=/home/user/.local/bin:$PATH
20
 
21
  # Set the working directory to the user's home directory
 
25
  COPY --chown=user . $HOME/app
26
 
27
  # Start the FastAPI app on port 7860, the default port expected by Spaces
28
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -9,10 +9,6 @@ colorTo: gray
9
 
10
  sdk: docker
11
 
12
- python_version: 3.9
13
-
14
- suggested_hardware: a10g-small
15
-
16
  pinned: false
17
 
18
  license: openrail
 
9
 
10
  sdk: docker
11
 
 
 
 
 
12
  pinned: false
13
 
14
  license: openrail
__init__.py DELETED
File without changes
cache/__init__.py DELETED
File without changes
cache/local_cache.py DELETED
@@ -1,42 +0,0 @@
1
- from datetime import datetime, timedelta
2
- from functools import wraps
3
- from io import BytesIO
4
-
5
- from fastapi.responses import StreamingResponse
6
-
7
- CACHE_SIZE = 50
8
-
9
- _cache = {}
10
- _cache_time = {}
11
-
12
-
13
- def ttl_cache(key_name, media_type=None, ttl_secs=20):
14
- def decorator(func):
15
- @wraps(func)
16
- async def wrapper(*args, **kwargs):
17
- # Assuming the prompt is the key for caching, change as necessary
18
- key = kwargs.get(key_name)
19
- ttl = timedelta(seconds=ttl_secs)
20
- # Check cache
21
- if key in _cache:
22
- if datetime.now() - _cache_time[key] > ttl:
23
- # Cache has expired
24
- del _cache[key]
25
- del _cache_time[key]
26
- else:
27
- # if media_type == 'image/png':
28
- # return StreamingResponse(BytesIO(_cache[key]), media_type=media_type)
29
- # else:
30
- return StreamingResponse(BytesIO(_cache[key]), media_type="image/png")
31
-
32
- # Call the actual function if not in cache or expired
33
- response, image_data = await func(*args, **kwargs)
34
- # Cache the content of the response's body.
35
- _cache[key] = image_data
36
- _cache_time[key] = datetime.now()
37
-
38
- return response
39
-
40
- return wrapper
41
-
42
- return decorator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.py DELETED
@@ -1,15 +0,0 @@
1
- import os
2
-
3
- from pydantic import BaseModel
4
-
5
-
6
- class Settings(BaseModel):
7
- hf_token: str = os.environ.get("hf_token")
8
- base_sd_model: str = os.environ.get("base_sd_model") or "stabilityai/stable-diffusion-xl-base-1.0"
9
- refiner_sd_model: str = os.environ.get("refiner_sd_model") or "stabilityai/stable-diffusion-xl-refiner-1.0"
10
- version: str = "0.1.0"
11
- url_version: str = "v1"
12
- prefix: str = "/v1/unik-ml"
13
-
14
-
15
- settings = Settings()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -1,38 +1,64 @@
1
- import uvicorn
2
- from fastapi import FastAPI
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from huggingface_hub import login
5
-
6
- from config import settings
7
- from routers.intference import stable_diffusion
8
-
9
- login(settings.hf_token)
10
 
11
- app = FastAPI(
12
- title="UNIK ML",
13
- version=settings.version,
14
- openapi_url=f"{settings.prefix}/openapi.json",
15
- docs_url=f"{settings.prefix}/docs",
16
- redoc_url=f"{settings.prefix}/redoc",
17
- swagger_ui_oauth2_redirect_url=f"{settings.prefix}/docs/oauth2-redirect")
18
 
19
- app.add_middleware(
20
- CORSMiddleware,
21
- allow_origins=["*"],
22
- allow_methods=["*"],
23
- allow_headers=["*"],
24
- allow_credentials=True,
 
 
 
 
 
 
 
 
25
  )
 
 
 
26
 
 
 
27
 
28
- @app.get("/")
29
- async def root():
30
- return {"message": "UNIK ML API"}
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- app.include_router(stable_diffusion.router, prefix=settings.prefix, tags=["inference"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Start your FastAPI application
36
- # if __name__ == "__main__":
37
- # uvicorn.run(app, host="0.0.0.0", port=8000)
38
- #
 
1
+ from io import BytesIO
 
 
 
 
 
 
 
 
2
 
3
+ import torch
4
+ from diffusers import DiffusionPipeline
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import StreamingResponse
 
 
 
7
 
8
+ # load both base & refiner
9
+ base = DiffusionPipeline.from_pretrained(
10
+ "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
11
+ )
12
+ base.to("cuda")
13
+ # base.enable_model_cpu_offload()
14
+ base.enable_attention_slicing()
15
+ refiner = DiffusionPipeline.from_pretrained(
16
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
17
+ text_encoder_2=base.text_encoder_2,
18
+ vae=base.vae,
19
+ torch_dtype=torch.float16,
20
+ use_safetensors=True,
21
+ variant="fp16",
22
  )
23
+ refiner.to("cuda")
24
+ # refiner.enable_model_cpu_offload()
25
+ refiner.enable_attention_slicing()
26
 
27
+ # Create a new FastAPI app instance
28
+ app = FastAPI()
29
 
 
 
 
30
 
31
+ # Define a function to handle the GET request at `/generate`
32
+ # The generate() function is defined as a FastAPI route that takes a
33
+ # string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
34
+ # containing the generated text under the key "output"
35
+ @app.get("/generate")
36
+ def generate(text: str):
37
+ """
38
+ generate image
39
+ """
40
+ # Define how many steps and what % of steps to be run on each experts (80/20) here
41
+ n_steps = 40
42
+ high_noise_frac = 0.8
43
+ negative = "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly. bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, disgusting, poorly drawn hands, missing limb, floating limbs, disconnected limbs, malformed hands, blurry, mutated hands and fingers, watermark, watermarked, oversaturated, censored, distorted hands, amputation, missing hands, obese, doubled face, double hands, two women, anime style, cartoon, toon."
44
+ prompt = "Designs should play with different textures and layering but stick to a monochrome palette. Think leather jackets over mesh tops, or satin draped over matte cotton. in a studio. zoomed-in. single model."
45
 
46
+ # run both experts
47
+ image = base(
48
+ prompt=prompt,
49
+ negative_prompt=negative,
50
+ num_inference_steps=n_steps,
51
+ denoising_end=high_noise_frac,
52
+ output_type="latent",
53
+ ).images
54
+ final_image = refiner(
55
+ prompt=prompt,
56
+ negative_prompt=negative,
57
+ num_inference_steps=n_steps,
58
+ denoising_start=high_noise_frac,
59
+ image=image,
60
+ ).images[0]
61
 
62
+ return StreamingResponse(BytesIO(final_image), media_type="image/png")
63
+ # Return the generated text in a JSON response
64
+ # return {"output": output[0]["generated_text"]}
 
requirements.txt CHANGED
@@ -1,12 +1,7 @@
1
- # --extra-index-url https://download.pytorch.org/whl/cu118
2
  fastapi==0.100.1
3
  pydantic==2.1.1
4
  pylint==2.17.5
5
  uvicorn>=0.23.2
6
  torch==2.0.1
7
  transformers==4.31.0
8
- accelerate==0.21.0
9
- diffusers==0.19.3
10
- torchvision==0.15.2
11
- safetensors==0.3.1
12
- huggingface-hub==0.16.4
 
 
1
  fastapi==0.100.1
2
  pydantic==2.1.1
3
  pylint==2.17.5
4
  uvicorn>=0.23.2
5
  torch==2.0.1
6
  transformers==4.31.0
7
+ accelerate==0.21.0
 
 
 
 
routers/__init__.py DELETED
File without changes
routers/intference/__init__.py DELETED
File without changes
routers/intference/stable_diffusion.py DELETED
@@ -1,70 +0,0 @@
1
- # load both base & refiner
2
- from io import BytesIO
3
-
4
- import torch
5
- from diffusers import DiffusionPipeline
6
- from fastapi import APIRouter
7
- from fastapi.responses import StreamingResponse
8
-
9
- from cache.local_cache import ttl_cache
10
- from config import settings
11
-
12
- router = APIRouter()
13
-
14
- base = DiffusionPipeline.from_pretrained(
15
- settings.base_sd_model, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
16
- )
17
-
18
- base.to("cuda")
19
- # base.enable_model_cpu_offload()
20
- base.enable_attention_slicing()
21
- refiner = DiffusionPipeline.from_pretrained(
22
- settings.refiner_sd_model,
23
- text_encoder_2=base.text_encoder_2,
24
- vae=base.vae,
25
- torch_dtype=torch.float16,
26
- use_safetensors=True,
27
- variant="fp16",
28
- )
29
- refiner.to("cuda")
30
- # refiner.enable_model_cpu_offload()
31
- refiner.enable_attention_slicing()
32
-
33
-
34
- @router.get("/generate")
35
- @ttl_cache(key_name='prompt', media_type="image/png", ttl_secs=20)
36
- async def generate(prompt: str):
37
- """
38
- generate image
39
- """
40
- # Define how many steps and what % of steps to be run on each experts (80/20) here
41
- n_steps = 40
42
- high_noise_frac = 0.8
43
- negative = "disfigured, ugly, bad, immature, cartoon, anime, 3d, painting, b&w, sketch, blurry, deformed, bad anatomy, poorly drawn face, mutation, multiple people."
44
-
45
- prompt = f"single image. single model. {prompt}. zoomed in. full-body. real person. realistic. 4k. best quality."
46
- print(prompt)
47
-
48
- # run both experts
49
- image = base(
50
- prompt=prompt,
51
- negative_prompt=negative,
52
- num_inference_steps=n_steps,
53
- denoising_end=high_noise_frac,
54
- output_type="latent",
55
- ).images[0]
56
- final_image = refiner(
57
- prompt=prompt,
58
- negative_prompt=negative,
59
- num_inference_steps=n_steps,
60
- denoising_start=high_noise_frac,
61
- image=image,
62
- ).images[0]
63
-
64
- memory_stream = BytesIO()
65
- final_image.save(memory_stream, format="PNG")
66
- image_data = memory_stream.getvalue() # get bytes of the image
67
-
68
- memory_stream.seek(0)
69
- return StreamingResponse(memory_stream, media_type="image/png"), image_data
70
-