yiyii commited on
Commit
11f3494
·
1 Parent(s): 53a257c
Files changed (4) hide show
  1. __pycache__/app.cpython-311.pyc +0 -0
  2. app.py +233 -0
  3. requirements.txt +24 -0
  4. story.txt +0 -0
__pycache__/app.cpython-311.pyc ADDED
Binary file (5.45 kB). View file
 
app.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from deepface import DeepFace
3
+ from transformers import pipeline
4
+ import io
5
+ import base64
6
+ import pandas as pd
7
+ import numpy as ny
8
+ from huggingface_hub import InferenceClient
9
+
10
+ from langchain.text_splitter import TokenTextSplitter
11
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
13
+ from langchain.vectorstores import Chroma
14
+ # from langchain.chain import RetrievalQA
15
+ # from langchain import PromptTemplate
16
+
17
+ get_blip = pipeline("image-to-text",model="Salesforce/blip-image-captioning-large")
18
+
19
+ # using deepface to detect age, gender, emotion(happy,neutral,surprise,sad,angry,fear,disgust)
20
+ def analyze_face(image):
21
+ #convert PIL image to numpy array
22
+ image_array = ny.array(image)
23
+ face_result = DeepFace.analyze(image_array, actions=['age','gender','emotion'], enforce_detection=False)
24
+ #convert the resulting dictionary to a dataframe
25
+ df = pd.DataFrame(face_result)
26
+ return df['dominant_gender'][0],df['age'][0],df['dominant_emotion'][0]
27
+ #The [0] at the end is for accessing the value at the first row in a DataFrame column.
28
+
29
+ #using blip to generate caption
30
+ #image_to_base64_str function to convert image to base64 format
31
+ def image_to_base64_str(pil_image):
32
+ byte_arr = io.BytesIO()
33
+ pil_image.save(byte_arr, format='PNG')
34
+ byte_arr = byte_arr.getvalue()
35
+ return str(base64.b64encode(byte_arr).decode('utf-8'))
36
+ #captioner function to take an image
37
+ def captioner(image):
38
+ base64_image = image_to_base64_str(image)
39
+ caption = get_blip(base64_image)
40
+ return caption[0]['generated_text']
41
+ #The [0] at the beginning is for accessing the first element in a container (like a list or dictionary).
42
+
43
+ def get_image_info(image):
44
+ #call captioner() function
45
+ image_caption = captioner(image)
46
+
47
+ #call analyze_face() function
48
+ gender, age, emotion = analyze_face(image)
49
+
50
+ #return image_caption,face_attributes
51
+ return image_caption, gender, age, emotion
52
+
53
+
54
+ # loading the embedding model
55
+ model_name = "BAAI/bge-large-en-v1.5"
56
+ model_kwargs = {'device':'cpu'}
57
+ #encode_kwargs = {'normalize_embeddings':False}
58
+ # the embeddings will be normalized, normalization can make cosine similarity(angular distance) calculations more effective,
59
+ # bacause it is comparison tasks based on directional similarity between vectors.
60
+ encode_kwargs = {'normalize_embeddings':True}
61
+ # initialize embeddings
62
+ embeddings = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
63
+ print("embeddings model loaded....................")
64
+ # load the txt file
65
+ with open("story.txt", "r") as f:
66
+ # r: read mode, reading only
67
+ state_of_the_union = f.read()
68
+ # read the file into a single string
69
+ # split the content into chunks
70
+ text_splitter = TokenTextSplitter(chunk_size=200, chunk_overlap=20)
71
+ # TokenTextSplitter() can ensure the integrity of words
72
+ # each chunk to overlap with the previous chunk by 20 tokens
73
+ texts = text_splitter.split_text(state_of_the_union)
74
+ print("...........................................")
75
+ # print the first chunk
76
+ print("text[0]: ", texts[0])
77
+ # create embeddings for chunks by using bge model, and then save these vectors into chroma vector database
78
+ # use hnsw(hierarchical navigable small world) index to facilitate efficient searching
79
+ # use cosine similarity to measure similiarity.(similarity is crucial in performing similarity search.)
80
+ # hnsw: builds a graph-based index for approximate nearest neighber searches.
81
+ # hnsw is used for organizing the data into an efficient structure that supports rapid retrieval operations(speed up the search).
82
+ # cosine similarity is used for telling the hnsw algorithm how to measure the distance between vectors.
83
+ # by setting space to cosine space, the index will operate using cosine similarity to measuer the vectors' similarity.
84
+ vector_store = Chroma.from_texts(texts, embeddings, collection_metadata = {"hnsw:space":"cosine"}, persist_directory="stores/story_cosine" )
85
+ print("vector store created........................")
86
+
87
+ # load_vector_store = Chroma(persist_directory="stores/story_cosine", embedding_function=embeddings)
88
+ # persist_directory="stores/story_cosine": laod the existing vector store form "stores/story_cosine"
89
+ # embedding_function=embeddings: using the bge embedding model when add the new data to the vector store
90
+
91
+ # Only get the 3 most similar document from the dataset
92
+ retriever = vector_store.as_retriever(search_kwargs={"k":3})
93
+
94
+ client = InferenceClient(
95
+ "mistralai/Mistral-7B-Instruct-v0.1"
96
+ )
97
+ # client = InferenceClient(
98
+ # "mistralai/Mistral-7B-v0.1"
99
+ # )
100
+
101
+ def generate(image, temperature=0.9, max_new_tokens=1500, top_p=0.95, repetition_penalty=1.0):
102
+ image_caption, gender, age, emotion = get_image_info(image)
103
+ print("............................................")
104
+ print("image_caption:", image_caption)
105
+ print("age:", age)
106
+ print("gender:", gender)
107
+ print("emotion:", emotion)
108
+ print("............................................")
109
+ query = f"{image_caption}. {age} years old {gender}"
110
+ # retrieve documents based on query
111
+ documents = retriever.get_relevant_documents(query)
112
+ # the embedding of the query abd comparing query embedding and chunks embedding are handle internally by the get_relevant_documents() method.
113
+ # embedding query: When a query is made, the retriever first converts the query text into a vector using the same embedding model
114
+ # that was used for creating the document vectors in the store. This ensures that the query vector and document vectors are compatible for similarity comparisons.
115
+ # the method of comparing the similarity between query vector and chunk vectors is:
116
+ # cosine similarity and hnsw. because we've configured the vector store with {"hnsw:space":"cosine"}.
117
+ # the methods used for both embedding the query and comparing the query vector with the stored document vectors are directly influenced by the configurations of the vector store we set up.
118
+ # get_relevant_document() use the embedding function specified when we set up the Chroma database.
119
+ if documents:
120
+ print("document:", dir(documents[0]))
121
+ # print the directory of the methods and attributes of the first document
122
+ print(documents[0])
123
+ print(".....................................")
124
+ print(documents)
125
+ else:
126
+ print("no documents")
127
+
128
+ # dir(documents[0]):
129
+ """
130
+ document: ['Config', '__abstractmethods__', '__annotations__', '__class__', '__class_vars__', '__config__', '__custom_root_type__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__exclude_fields__',
131
+ '__fields__', '__fields_set__', '__format__', '__ge__', '__get_validators__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__include_fields__', '__init__', '__init_subclass__', '__iter__', '__json_encoder__',
132
+ '__le__', '__lt__', '__module__', '__ne__', '__new__', '__post_root_validators__', '__pre_root_validators__', '__pretty__', '__private_attributes__', '__reduce__', '__reduce_ex__', '__repr__', '__repr_args__', '__repr_name__',
133
+ '__repr_str__', '__rich_repr__', '__schema_cache__', '__setattr__', '__setstate__', '__signature__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__try_update_forward_refs__', '__validators__', '_abc_impl', '_calculate_keys',
134
+ '_copy_and_set_values', '_decompose_class', '_enforce_dict_if_root', '_get_value', '_init_private_attributes', '_iter', 'construct', 'copy', 'dict', 'from_orm', 'get_lc_namespace', 'is_lc_serializable', 'json', 'lc_attributes', 'lc_id',
135
+ 'lc_secrets', 'metadata', 'page_content', 'parse_file', 'parse_obj', 'parse_raw', 'schema', 'schema_json', 'to_json', 'to_json_not_implemented', 'type', 'update_forward_refs', 'validate']
136
+ """
137
+
138
+ # context = ' '.join([doc.page_content for doc in documents])
139
+ #context = '\n'.join([f"Document {index + 1}: {doc}" for index, doc in enumerate(documents)])
140
+ # make the documents' format more clear
141
+ context = '\n'.join([f"Document {index + 1}: {doc.page_content}" for index, doc in enumerate(documents)])
142
+ #prompt = f"[INS] Generate a story based on person’s emotion: {emotion}, age: {age}, gender: {gender} of the image, and image’s caption: {image_caption}. Please use simple words and a child-friendly tone for children, a mature tone for adults, and a considerate, reflective tone for elders.[/INS]"
143
+ print("....................................................................")
144
+ print("context:",context)
145
+ #prompt = f"[INS] Generate a story based on person’s emotion: {emotion}, age: {age}, gender: {gender} of the image, and image’s caption: {image_caption}. The following are some sentence examples: {context}[/INS]"
146
+ prompt = (
147
+ f"[INS] Please generate a detailed and engaging story based on the person's"
148
+ f"age: {age}, and gender: {gender} shown in the image. Begin with the scene described in the image's caption: '{image_caption}'. "
149
+ f"Just use the following example story plots and formats as an inspiration: "
150
+ f"{context} "
151
+ f"The generated story should include a beginning, middle, and end.[/INS]"
152
+ )
153
+
154
+ print("prompt:", prompt)
155
+
156
+ temperature = float(temperature)
157
+ if temperature < 1e-2:
158
+ temperature = 1e-2
159
+ top_p = float(top_p)
160
+
161
+ generate_kwargs = dict(
162
+ temperature=temperature,
163
+ max_new_tokens=max_new_tokens,
164
+ top_p=top_p,
165
+ repetition_penalty=repetition_penalty,
166
+ do_sample=True,
167
+ seed=42,
168
+ )
169
+ stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
170
+ # return_full_text=False: only has generated story
171
+ # return_full_text=True: include original prompt and generated story
172
+ output = ""
173
+ for response in stream:
174
+ output += response.token.text
175
+ # yield "".join(output)
176
+ yield output
177
+ print("..........................................................")
178
+ print("generated story:", output)
179
+ return output
180
+
181
+ demo = gr.Interface(fn=generate,
182
+ inputs=[
183
+ #gr.Video(sources=["webcam"], label="video")
184
+ gr.Image(sources=["upload", "webcam"], label="Upload Image", type="pil"),
185
+
186
+ gr.Slider(
187
+ label="Temperature",
188
+ value=0.9,
189
+ minimum=0.0,
190
+ maximum=1.0,
191
+ step=0.05,
192
+ interactive=True,
193
+ info="Higher values produce more diverse outputs",
194
+ ),
195
+
196
+ gr.Slider(
197
+ label="Max new tokens",
198
+ value=1500,
199
+ minimum=0,
200
+ maximum=3000,
201
+ step=1.0,
202
+ interactive=True,
203
+ info="The maximum numbers of new tokens"),
204
+
205
+ gr.Slider(
206
+ label="Top-p (nucleus sampling)",
207
+ value=0.90,
208
+ minimum=0.0,
209
+ maximum=1,
210
+ step=0.05,
211
+ interactive=True,
212
+ info="Higher values sample more low-probability tokens",
213
+ ),
214
+ gr.Slider(
215
+ label="Repetition penalty",
216
+ value=1.2,
217
+ minimum=1.0,
218
+ maximum=2.0,
219
+ step=0.05,
220
+ interactive=True,
221
+ info="Penalize repeated tokens",
222
+ )
223
+ ],
224
+ outputs=[gr.Textbox(label="Generated Story")],
225
+ title="story generation",
226
+ description="generate a story for you",
227
+ allow_flagging="never"
228
+
229
+ )
230
+ demo.launch(debug=(True))
231
+
232
+
233
+
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tensorflow==2.15.0
2
+ #bitsandbytes==0.42.0
3
+ #accelerate
4
+ deepface
5
+ transformers
6
+ tf-keras
7
+ #torch
8
+ huggingface_hub
9
+
10
+ langchain
11
+ tiktoken
12
+ sentence_transformers
13
+ chromadb
14
+
15
+ #Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers,
16
+ #Please install the backwards-compatible tf-keras package with `pip install tf-keras`.
17
+
18
+ #after install tf-keras still got new error: ValueError: The layer sequential has never been called and thus has no defined input.
19
+ # something wrong in analyze_face function. solution: try to use another model rather than the default model
20
+ # solution: downgrade tf version, beacuse Serengil added the dependencies last week
21
+
22
+
23
+
24
+
story.txt ADDED
The diff for this file is too large to render. See raw diff