yiyii commited on
Commit
33158af
·
1 Parent(s): 71370d1
Files changed (3) hide show
  1. Dockerfile +43 -0
  2. main.py +138 -0
  3. requirements.txt +32 -0
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python 3.9 image
2
+ FROM python:3.9
3
+
4
+ #to solve: ImportError: libGL.so.1: cannot open shared object file: No such file or directory
5
+ RUN apt-get update && apt-get install -y libgl1-mesa-glx
6
+
7
+ # Set the working directory to /code
8
+ WORKDIR /code
9
+
10
+ # Copy the current directory contents into the container at /code
11
+ COPY ./requirements.txt /code/requirements.txt
12
+
13
+ #In the Docker container, the working directory /code is used during the installation of requirements,
14
+ #but later in the Dockerfile, it switches to /home/user/app as the working directory.
15
+
16
+ # Install requirements.txt
17
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
18
+
19
+ # Set up a new user named "user" with user ID 1000
20
+ RUN useradd -m -u 1000 user
21
+
22
+ # Switch to the "user" user
23
+ USER user
24
+
25
+ # Set home to the user's home directory
26
+ ENV HOME=/home/user \
27
+ PATH=/home/user/.local/bin:$PATH
28
+
29
+ # Set the working directory to the user's home directory
30
+ WORKDIR $HOME/app
31
+
32
+ # Create a directory for deepface to avoid permission issues
33
+ RUN mkdir -p $HOME/.deepface/weights && chmod -R 777 $HOME/.deepface
34
+
35
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
36
+ COPY --chown=user . $HOME/app
37
+
38
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
39
+
40
+
41
+
42
+
43
+
main.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile,HTTPException, Form
2
+ from pydantic import BaseModel
3
+ from deepface import DeepFace
4
+ from transformers import pipeline
5
+ import io
6
+ import base64
7
+ import pandas as pd
8
+ import numpy as ny
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from PIL import Image
13
+ from fastapi.staticfiles import StaticFiles
14
+ from fastapi.responses import HTMLResponse
15
+
16
+ app = FastAPI()
17
+
18
+ # Allow all origins during development, update for production
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"],
22
+ allow_credentials=True,
23
+ allow_methods=["*"],
24
+ allow_headers=["*"],
25
+ )
26
+
27
+ class ImageInfo(BaseModel):
28
+ #image: str
29
+ image: UploadFile
30
+
31
+ #define quantization parameters through the BitsandBytesConfig from transformers
32
+ bnb_config = BitsAndBytesConfig(
33
+ load_in_4bit=True,
34
+ bnb_4bit_use_double_quant=True,
35
+ bnb_4bit_quant_type="nf4",
36
+ bnb_4bit_compute_dtype=torch.bfloat16
37
+ )
38
+
39
+ get_blip = pipeline("image-to-text",model="Salesforce/blip-image-captioning-large")
40
+
41
+ # using deepface to detect age, gender, emotion
42
+ def analyze_face(image):
43
+ #convert PIL image to numpy array
44
+ image_array = ny.array(image)
45
+ face_result = DeepFace.analyze(image_array, actions=['age','gender','emotion'], enforce_detection=False)
46
+ #convert the resulting dictionary to a dataframe
47
+ df = pd.DataFrame(face_result)
48
+ return df['dominant_gender'][0],df['age'][0],df['dominant_emotion'][0]
49
+ #The [0] at the end is for accessing the value at the first row in a DataFrame column.
50
+
51
+ #using blip to generate caption
52
+ #image_to_base64_str function to convert image to base64 format
53
+ def image_to_base64_str(pil_image):
54
+ byte_arr = io.BytesIO()
55
+ pil_image.save(byte_arr, format='PNG')
56
+ byte_arr = byte_arr.getvalue()
57
+ return str(base64.b64encode(byte_arr).decode('utf-8'))
58
+
59
+ #captioner function to take an image
60
+ def captioner(image):
61
+ base64_image = image_to_base64_str(image)
62
+ caption = get_blip(base64_image)
63
+ return caption[0]['generated_text']
64
+ #The [0] at the beginning is for accessing the first element in a container (like a list or dictionary).
65
+
66
+ def get_image_info(image):
67
+ #call captioner() function
68
+ image_caption = captioner(image)
69
+ #call analyze_face() function
70
+ gender, age, emotion = analyze_face(image)
71
+ #return image_caption,face_attributes
72
+ return image_caption, gender, age, emotion
73
+
74
+ #load model with quantization
75
+ model_id = "mistralai/Mistral-7B-Instruct-v0.1"
76
+ #model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")
77
+ #model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
78
+ #no GPU, RuntimeError: No GPU found. A GPU is needed for quantization.
79
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto",low_cpu_mem_usage=True)
80
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
81
+
82
+ def generate_story(image, storyType, length):
83
+ image_caption, gender, age, emotion = get_image_info(image)
84
+ device = "cuda:0"
85
+ messages = [
86
+ {
87
+ "role": "user",
88
+ "content":f"generate a {storyType} story for the person in the image which describes a scenario:{image_caption}. Please also notice the person's age:{age}, gender:{gender} and emotion:{emotion} in the image\n\n"
89
+ }
90
+ ]
91
+ #要不要在prompt里面让它注意 生成故事的时候的句子 要考虑到年龄 比如小孩子 就用简单的句子。
92
+ encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
93
+ model_inputs = encodeds.to(device)
94
+ generated_ids = model.generate(model_inputs, max_new_tokens=length, do_sample=True)
95
+ decoded = tokenizer.batch_decode(generated_ids)
96
+ generated_Story = decoded[0].replace("<s>", "").replace("</s>", "").replace("[INST]","").replace( "[/INST]","" ).strip()
97
+ return generated_Story
98
+
99
+
100
+
101
+ # Mount the static directory to serve HTML, JS, and CSS files
102
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
103
+
104
+ # Additional route to serve the HTML form
105
+ @app.get("/")
106
+ async def read_item():
107
+ content = open("app/static/index.html", "r").read()
108
+ return HTMLResponse(content=content, status_code=200)
109
+
110
+ @app.post("/generate_story")
111
+ async def generate_story_endpoint(
112
+ image: UploadFile = File(...),
113
+ storyType: str = Form(...),
114
+ length: int = Form(...),
115
+ ):
116
+
117
+ try:
118
+ contents = await image.read()
119
+ pil_image = Image.open(io.BytesIO(contents))
120
+ generated_story = generate_story(pil_image, storyType, length)
121
+ return {"generated_story": generated_story}
122
+ except Exception as e:
123
+ raise HTTPException(status_code=500, detail=f"Error generating story: {str(e)}")
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
requirements.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bitsandbytes
2
+ #importlib.metadata.PackageNotFoundError: bitsandbytes
3
+
4
+ accelerate
5
+ #ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`
6
+
7
+ # FastAPI and related dependencies
8
+ fastapi==0.99.1
9
+ uvicorn[standard]==0.17.*
10
+
11
+ # DeepFace and related dependencies
12
+ deepface
13
+ opencv-python-headless
14
+ pandas
15
+ numpy
16
+
17
+ # Transformers and related dependencies
18
+ transformers
19
+ torch
20
+ torchvision
21
+ pillow
22
+
23
+ # Additional dependencies
24
+ pydantic==1.10.12
25
+ requests==2.27.*
26
+
27
+ python-multipart
28
+ #handling from data with file uploads. RuntimeError: Form data requires "python-multipart" to be installed.
29
+
30
+ #opencv-python-headless: DeepFace relies on OpenCV, and using the headless version is suitable for server environments.
31
+ #torchvision: it is often needed when working with PyTorch models.
32
+ #pillow: is included for working with images in Python.