warhawkmonk commited on
Commit
973f016
·
verified ·
1 Parent(s): 45ed2fb

Upload 6 files

Browse files
animation.ipynb ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "\n",
10
+ "import json \n",
11
+ "import requests \n",
12
+ " \n",
13
+ "import streamlit as st \n",
14
+ "from streamlit_lottie import st_lottie \n",
15
+ "import os\n",
16
+ "folder_path = \"lotte_animation_saver/\"\n",
17
+ "\n",
18
+ "# Get the list of all files in the folder\n",
19
+ "file_names = os.listdir(folder_path)\n",
20
+ "max_file_name=max([int(i.split(\"_\")[-1][:-5]) for i in file_names]) \n",
21
+ "url = requests.get( \n",
22
+ " \"https://lottie.host/34b4b005-c8c9-4d57-a0fd-0e9ed3b49835/nCkgflSg9J.json\") \n",
23
+ "# Creating a blank dictionary to store JSON file, \n",
24
+ "# as their structure is similar to Python Dictionary \n",
25
+ "url_json = dict() \n",
26
+ " \n",
27
+ "if url.status_code == 200: \n",
28
+ "\n",
29
+ " url_json = url.json() \n",
30
+ " with open(\"lotte_animation_saver/animation_\"+str(max_file_name+1)+\".json\",\"w\") as read:\n",
31
+ " json.dump(url_json,read,indent=2)\n",
32
+ "\n",
33
+ "else: \n",
34
+ " print(\"Error in the URL\") "
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": []
43
+ }
44
+ ],
45
+ "metadata": {
46
+ "kernelspec": {
47
+ "display_name": "env",
48
+ "language": "python",
49
+ "name": "python3"
50
+ },
51
+ "language_info": {
52
+ "codemirror_mode": {
53
+ "name": "ipython",
54
+ "version": 3
55
+ },
56
+ "file_extension": ".py",
57
+ "mimetype": "text/x-python",
58
+ "name": "python",
59
+ "nbconvert_exporter": "python",
60
+ "pygments_lexer": "ipython3",
61
+ "version": "3.12.4"
62
+ }
63
+ },
64
+ "nbformat": 4,
65
+ "nbformat_minor": 2
66
+ }
app.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from PIL import Image
3
+ import streamlit as st
4
+ import cv2
5
+ from streamlit_drawable_canvas import st_canvas
6
+ import torch
7
+ from diffusers import AutoPipelineForInpainting
8
+ import numpy as np
9
+ from streamlit_image_select import image_select
10
+ import os
11
+ import requests
12
+ from streamlit_navigation_bar import st_navbar
13
+ from langchain_community.llms import Ollama
14
+ import base64
15
+ from io import BytesIO
16
+ from PIL import Image, ImageDraw
17
+ from streamlit_lottie import st_lottie
18
+ from streamlit_option_menu import option_menu
19
+ import json
20
+ from transformers import pipeline
21
+ import streamlit as st
22
+ from streamlit_modal import Modal
23
+ import streamlit.components.v1 as components
24
+ from datetime import datetime
25
+
26
+ def image_to_base64(image_path):
27
+ with open(image_path, "rb") as img_file:
28
+ return base64.b64encode(img_file.read()).decode()
29
+
30
+
31
+ @st.cache_resource
32
+ def load_model():
33
+ pipeline_ = AutoPipelineForInpainting.from_pretrained("kandinsky-community/kandinsky-2-2-decoder-inpaint", torch_dtype=torch.float16).to("cuda")
34
+ return pipeline_
35
+
36
+ # @st.cache_resource
37
+ def prompt_improvment(pre_prompt):
38
+
39
+ llm = Ollama(model="llama3:latest",num_ctx=1000)
40
+ enhancement="Please use details from the prompt mentioned above, focusing only what user is thinking with the prompt and also add 8k resolution. Its a request only provide image description and brief prompt no other text."
41
+ prompt = pre_prompt+"\n"+enhancement
42
+ # result = llm.invoke(prompt)
43
+ return llm.stream(prompt)
44
+ def numpy_to_list(array):
45
+
46
+ current=[]
47
+ for value in array:
48
+ if isinstance(value,type(np.array([]))):
49
+ result=numpy_to_list(value)
50
+ current.append(result)
51
+ else:
52
+
53
+ current.append(int(value))
54
+ return current
55
+
56
+
57
+
58
+ @st.cache_resource
59
+ def llm_text_response():
60
+ llm = Ollama(model="llama3:latest",num_ctx=1000)
61
+ return llm.stream
62
+
63
+ def model_single_out(prompt):
64
+ pipe=load_model()
65
+ image = pipe(prompt).images[0]
66
+ return image
67
+
68
+ def model_out_put(init_image,mask_image,prompt,negative_prompt):
69
+ pipeline_ = load_model()
70
+ image = pipeline_(prompt=prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask_image).images[0]
71
+ return image
72
+
73
+ @st.cache_resource
74
+ def multimodel():
75
+ pipeline_ = pipeline("text-classification", model = "model_collection\model_4")
76
+ return pipeline_
77
+
78
+ def multimodel_output(prompt):
79
+ pipeline_ = multimodel()
80
+ image = pipeline_(prompt)
81
+ return image[0]['label']
82
+
83
+ def d4_to_3d(image):
84
+ formatted_array=[]
85
+ for j in image:
86
+ neste_list=[]
87
+ for k in j:
88
+ if any([True if i>0 else False for i in k]):
89
+ neste_list.append(True)
90
+ else:
91
+ neste_list.append(False)
92
+ formatted_array.append(neste_list)
93
+ print(np.shape(formatted_array))
94
+ return np.array(formatted_array)
95
+
96
+ st.set_page_config(layout="wide")
97
+
98
+
99
+ img_selection=None
100
+ # Specify canvas parameters in application
101
+ drawing_mode = st.sidebar.selectbox(
102
+ "Drawing tool:", ("freedraw","point", "line", "rect", "circle", "transform")
103
+ )
104
+
105
+
106
+ dictionary=st.session_state
107
+ if "every_prompt_with_val" not in dictionary:
108
+ dictionary['every_prompt_with_val']=[]
109
+ if "current_image" not in dictionary:
110
+ dictionary['current_image']=[]
111
+ if "prompt_collection" not in dictionary:
112
+ dictionary['prompt_collection']=[]
113
+ if "user" not in dictionary:
114
+ dictionary['user']=None
115
+ if "current_session" not in dictionary:
116
+ dictionary['current_session']=None
117
+
118
+ stroke_width = st.sidebar.slider("Stroke width: ", 1, 25, 20)
119
+ if drawing_mode == 'point':
120
+ point_display_radius = st.sidebar.slider("Point display radius: ", 1, 25, 3)
121
+ stroke_color = '#000000'
122
+ bg_color = "#eee"
123
+
124
+
125
+ column1,column2=st.columns([0.7,0.35])
126
+
127
+ with open("DataBase\datetimeRecords.json","r") as read:
128
+ dateTimeRecord=json.load(read)
129
+ with column2:
130
+ st.header("HISTORY")
131
+ tab1,tab2,tab3,tab4=st.tabs(["CHAT HISTORY","IMAGES","PROMPT IMPROVEMENT","LOGIN"])
132
+ with tab1:
133
+
134
+
135
+
136
+ if not len(dictionary['every_prompt_with_val']):
137
+ st.header("I will store all the chat for the current session")
138
+ with open("lotte_animation_saver\\animation_4.json") as read:
139
+ url_json=json.load(read)
140
+ st_lottie(url_json,height = 400)
141
+ else:
142
+
143
+ with st.container(height=600):
144
+
145
+
146
+ for index,prompts_ in enumerate(dictionary['every_prompt_with_val'][::-1]):
147
+ if prompts_[-1]=="@working":
148
+ if index==0:
149
+ st.write(prompts_[0].upper())
150
+ data_need=st.write_stream(llm_text_response()(prompts_[0]))
151
+ dictionary['every_prompt_with_val'][-1]=(prompts_[0],str(data_need))
152
+
153
+ elif isinstance(prompts_[-1],str):
154
+ if index==0:
155
+ st.text_area(label=prompts_[0].upper(),value=prompts_[-1],height=500)
156
+ else:
157
+ st.text_area(label=prompts_[0].upper(),value=prompts_[-1])
158
+
159
+ else:
160
+ st.write(prompts_[0].upper())
161
+ with st.container(height=400):
162
+ format1,format2=st.columns([0.2,0.8])
163
+ with format1:
164
+ new_img=Image.open("ALL_image_formation\image_gen.png")
165
+ st.write("<br>",unsafe_allow_html=True)
166
+ size = min(new_img.size)
167
+ mask = Image.new('L', (size, size), 0)
168
+ draw = ImageDraw.Draw(mask)
169
+ draw.ellipse((0, 0, size, size), fill=255)
170
+
171
+ image = new_img.crop((0, 0, size, size))
172
+ image.putalpha(mask)
173
+ st.image(image)
174
+ with format2:
175
+
176
+ st.write("<br>",unsafe_allow_html=True)
177
+ size = min(prompts_[-1].size)
178
+ mask = Image.new('L', (size, size), 0)
179
+ draw = ImageDraw.Draw(mask)
180
+ draw.ellipse((0, 0, size, size), fill=255)
181
+
182
+ # Crop the image to a square and apply the mask
183
+ image = prompts_[-1].crop((0, 0, size, size))
184
+ image.putalpha(mask)
185
+ st.image(image)
186
+
187
+ with tab2:
188
+
189
+ if "current_image" in dictionary and len(dictionary['current_image']):
190
+ with st.container(height=600):
191
+ dictinory_length=len(dictionary['current_image'])
192
+
193
+ img_selection = image_select(
194
+ label="",
195
+ images=dictionary['current_image'] if len(dictionary['current_image'])!=0 else None,
196
+ )
197
+ if img_selection in dictionary['current_image']:
198
+ dictionary['current_image'].remove(img_selection)
199
+ dictionary['current_image'].insert(0,img_selection)
200
+ # st.rerun()
201
+
202
+ img_selection.save("image.png")
203
+ with open("image.png", "rb") as file:
204
+ downl=st.download_button(label="DOWNLOAD",data=file,file_name="image.png",mime="image/png")
205
+ os.remove("image.png")
206
+ else:
207
+
208
+ st.header("This section will store the updated images")
209
+ with open("lotte_animation_saver\\animation_1.json") as read:
210
+ url_json=json.load(read)
211
+ st_lottie(url_json,height = 400)
212
+ with tab3:
213
+ if len(dictionary['prompt_collection'])!=0:
214
+ with st.container(height=600):
215
+ prompt_selection=st.selectbox(label="Select the prompt for improvment",options=["Mention below are prompt history"]+dictionary["prompt_collection"],index=0)
216
+
217
+ if prompt_selection!="Mention below are prompt history":
218
+
219
+ generated_prompt=prompt_improvment(prompt_selection)
220
+ dictionary['generated_image_prompt'].append(generated_prompt)
221
+ st.write_stream(generated_prompt)
222
+
223
+ else:
224
+
225
+ st.header("This section will provide prompt improvement section")
226
+ with open("lotte_animation_saver\\animation_3.json") as read:
227
+ url_json=json.load(read)
228
+ st_lottie(url_json,height = 400)
229
+ with tab4:
230
+
231
+ # with st.container(height=600):
232
+
233
+ if not dictionary['user'] :
234
+ with st.form("my_form"):
235
+ # st.header("Please login for save your data")
236
+ with open("lotte_animation_saver\\animation_5.json") as read:
237
+ url_json=json.load(read)
238
+ st_lottie(url_json,height = 200)
239
+ user_id = st.text_input("user login")
240
+ password = st.text_input("password",type="password")
241
+ submitted_login = st.form_submit_button("Submit")
242
+ # Every form must have a submit button.
243
+
244
+ if submitted_login:
245
+ with open("DataBase\login.json","r") as read:
246
+ login_base=json.load(read)
247
+ if user_id in login_base and login_base[user_id]==password:
248
+ dictionary['user']=user_id
249
+ st.rerun()
250
+ else:
251
+ st.error("userid or password incorrect")
252
+
253
+ st.write("working")
254
+ modal = Modal(
255
+ "Sign up",
256
+ key="demo-modal",
257
+
258
+ padding=10, # default value
259
+ max_width=600 # default value
260
+ )
261
+ open_modal = st.button("sign up")
262
+ if open_modal:
263
+ modal.open()
264
+
265
+ if modal.is_open():
266
+ with modal.container():
267
+
268
+ with st.form("my_form1"):
269
+ sign_up_column_left,sign_up_column_right=st.columns(2)
270
+ with sign_up_column_left:
271
+ with open("lotte_animation_saver\\animation_6.json") as read:
272
+ url_json=json.load(read)
273
+ st_lottie(url_json,height = 200)
274
+
275
+ with sign_up_column_right:
276
+ user_id = st.text_input("user login")
277
+ password = st.text_input("password",type="password")
278
+ submitted_signup = st.form_submit_button("Submit")
279
+
280
+ if submitted_signup:
281
+ with open("DataBase\login.json","r") as read:
282
+ login_base=json.load(read)
283
+ if not login_base:
284
+ login_base={}
285
+ if user_id not in login_base:
286
+ login_base[user_id]=password
287
+ with open("DataBase\login.json","w") as write:
288
+ json.dump(login_base,write,indent=2)
289
+ st.success("you are a part now")
290
+ dictionary['user']=user_id
291
+ modal.close()
292
+ else:
293
+ st.error("user id already exists")
294
+ else:
295
+ st.header("REPORTED ISSUES")
296
+ with st.container(height=370):
297
+
298
+ with open("DataBase\datetimeRecords.json") as feedback:
299
+ temp_issue=json.load(feedback)
300
+
301
+ arranged_feedback=reversed(temp_issue['database'])
302
+
303
+ for report in arranged_feedback:
304
+ user_columns,user_feedback=st.columns([0.3,0.8])
305
+
306
+ with user_columns:
307
+ st.write(report[-1])
308
+ with user_feedback:
309
+ st.write(report[1])
310
+
311
+ feedback=st.text_area("Feedback Report and Improvement",placeholder="")
312
+ summit=st.button("submit")
313
+ if summit:
314
+ with open("DataBase\datetimeRecords.json","r") as feedback_sumit:
315
+ temp_issue_submit=json.load(feedback_sumit)
316
+ if "database" not in temp_issue_submit:
317
+ temp_issue_submit["database"]=[]
318
+ temp_issue_submit["database"].append((str(datetime.now()),feedback,dictionary['user']))
319
+ with open("DataBase\datetimeRecords.json","w") as feedback_sumit:
320
+ json.dump(temp_issue_submit,feedback_sumit)
321
+
322
+
323
+
324
+ # st.rerun()
325
+
326
+
327
+
328
+
329
+
330
+
331
+ bg_image = st.sidebar.file_uploader("PLEASE UPLOAD IMAGE FOR EDITING:", type=["png", "jpg"])
332
+ bg_doc = st.sidebar.file_uploader("PLEASE UPLOAD DOC FOR PPT/PDF/STORY:", type=["pdf","xlsx"])
333
+
334
+
335
+ if "bg_image" not in dictionary:
336
+ dictionary["bg_image"]=None
337
+
338
+ if img_selection and dictionary['bg_image']==bg_image:
339
+ gen_image=dictionary['current_image'][0]
340
+ else:
341
+ if bg_image:
342
+ gen_image=Image.open(bg_image)
343
+ else:
344
+ gen_image=None
345
+
346
+
347
+
348
+
349
+
350
+
351
+ with column1:
352
+ # Create a canvas component
353
+ changes,implementation,current=st.columns([0.3,0.6,0.3])
354
+
355
+ with implementation:
356
+ st.write("<br>"*5,unsafe_allow_html=True)
357
+ canvas_result = st_canvas(
358
+ fill_color="rgba(255, 165, 0, 0.3)", # Fixed fill color with some opacity
359
+ stroke_width=stroke_width,
360
+ stroke_color=stroke_color,
361
+ background_color=bg_color,
362
+ background_image=gen_image if gen_image else Image.open("ALL_image_formation\image_gen.png"),
363
+ update_streamlit=True,
364
+ height=500,
365
+ width=500,
366
+ drawing_mode=drawing_mode,
367
+ point_display_radius=point_display_radius if drawing_mode == 'point' else 0,
368
+ key="canvas",
369
+ )
370
+
371
+
372
+
373
+
374
+
375
+ with column1:
376
+ # prompt=st.text_area("Please provide the prompt")
377
+ prompt=st.chat_input("Please provide the prompt")
378
+
379
+ negative_prompt="the black masked area"
380
+
381
+ # run=st.button("run_experiment")
382
+
383
+
384
+
385
+ if canvas_result.image_data is not None:
386
+ if prompt:
387
+
388
+ text_or_image=multimodel_output(prompt)
389
+
390
+ if text_or_image=="LABEL_0":
391
+
392
+ if "generated_image_prompt" not in dictionary:
393
+ dictionary['generated_image_prompt']=[]
394
+ if prompt not in dictionary['prompt_collection'] and prompt not in dictionary['generated_image_prompt']:
395
+ dictionary['prompt_collection']=[prompt]+dictionary['prompt_collection']
396
+ new_size=np.array(canvas_result.image_data).shape[:2]
397
+ new_size=(new_size[-1],new_size[0])
398
+ if bg_image!=dictionary["bg_image"] :
399
+ dictionary["bg_image"]=bg_image
400
+ if bg_image!=None:
401
+ imf=Image.open(bg_image).resize(new_size)
402
+ else:
403
+ with open("lotte_animation_saver/animation_4.json") as read:
404
+ url_json=json.load(read)
405
+ st_lottie(url_json)
406
+ imf=Image.open("ALL_image_formation\home_screen.jpg").resize(new_size)
407
+ else:
408
+ if len(dictionary['current_image'])!=0:
409
+ imf=dictionary['current_image'][0]
410
+ else:
411
+ with open("lotte_animation_saver/animation_4.json") as read:
412
+ url_json=json.load(read)
413
+ st_lottie(url_json)
414
+ imf=Image.open("ALL_image_formation\home_screen.jpg")
415
+
416
+ negative_image =d4_to_3d(np.array(canvas_result.image_data))
417
+ if np.sum(negative_image)==0:
418
+ negative_image=Image.fromarray(np.where(negative_image == False, True, negative_image))
419
+ else:
420
+ negative_image=Image.fromarray(negative_image)
421
+
422
+ modifiedValue=model_out_put(imf,negative_image,prompt,negative_prompt)
423
+ modifiedValue.save("ALL_image_formation/current_session_image.png")
424
+ dictionary['current_image']=[modifiedValue]+dictionary['current_image']
425
+ dictionary['every_prompt_with_val'].append((prompt,modifiedValue))
426
+ st.rerun()
427
+ else:
428
+ st.write("nothing importent")
429
+ modifiedValue="@working"
430
+ dictionary['every_prompt_with_val'].append((prompt,modifiedValue))
431
+ st.rerun()
432
+ # st.image(modifiedValue,width=300)
433
+
434
+
435
+
436
+ if canvas_result.json_data is not None:
437
+ objects = pd.json_normalize(canvas_result.json_data["objects"]) # need to convert obj to str because PyArrow
438
+ for col in objects.select_dtypes(include=['object']).columns:
439
+ objects[col] = objects[col].astype("str")
440
+
config.json ADDED
File without changes
dataset_formation.ipynb ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import json\n",
11
+ "import random"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 6,
17
+ "metadata": {},
18
+ "outputs": [
19
+ {
20
+ "name": "stderr",
21
+ "output_type": "stream",
22
+ "text": [
23
+ "<>:1: SyntaxWarning: invalid escape sequence '\\c'\n",
24
+ "<>:3: SyntaxWarning: invalid escape sequence '\\d'\n",
25
+ "<>:1: SyntaxWarning: invalid escape sequence '\\c'\n",
26
+ "<>:3: SyntaxWarning: invalid escape sequence '\\d'\n",
27
+ "C:\\Users\\rajst\\AppData\\Local\\Temp\\ipykernel_11856\\1444736939.py:1: SyntaxWarning: invalid escape sequence '\\c'\n",
28
+ " image_data=pd.read_csv(\"data_set_formation\\custom_prompts_df.csv\")\n",
29
+ "C:\\Users\\rajst\\AppData\\Local\\Temp\\ipykernel_11856\\1444736939.py:3: SyntaxWarning: invalid escape sequence '\\d'\n",
30
+ " with open(\"data_set_formation\\data.json\") as read:\n"
31
+ ]
32
+ }
33
+ ],
34
+ "source": [
35
+ "image_data=pd.read_csv(\"data_set_formation\\custom_prompts_df.csv\")\n",
36
+ "\n",
37
+ "with open(\"data_set_formation\\data.json\") as read:\n",
38
+ " text_data=json.load(read)\n",
39
+ "# prompt_data="
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 7,
45
+ "metadata": {},
46
+ "outputs": [
47
+ {
48
+ "data": {
49
+ "text/html": [
50
+ "<div>\n",
51
+ "<style scoped>\n",
52
+ " .dataframe tbody tr th:only-of-type {\n",
53
+ " vertical-align: middle;\n",
54
+ " }\n",
55
+ "\n",
56
+ " .dataframe tbody tr th {\n",
57
+ " vertical-align: top;\n",
58
+ " }\n",
59
+ "\n",
60
+ " .dataframe thead th {\n",
61
+ " text-align: right;\n",
62
+ " }\n",
63
+ "</style>\n",
64
+ "<table border=\"1\" class=\"dataframe\">\n",
65
+ " <thead>\n",
66
+ " <tr style=\"text-align: right;\">\n",
67
+ " <th></th>\n",
68
+ " <th>prompt</th>\n",
69
+ " <th>image_file</th>\n",
70
+ " </tr>\n",
71
+ " </thead>\n",
72
+ " <tbody>\n",
73
+ " <tr>\n",
74
+ " <th>0</th>\n",
75
+ " <td>painting of King Henry VIII carrying an umbrella</td>\n",
76
+ " <td>images/0/custom_0_0.png</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>1</th>\n",
80
+ " <td>Fox Mulder and a chinchilla walking down a roa...</td>\n",
81
+ " <td>images/0/custom_1_0.png</td>\n",
82
+ " </tr>\n",
83
+ " <tr>\n",
84
+ " <th>2</th>\n",
85
+ " <td>photo of a gas burner by a soft pretzel</td>\n",
86
+ " <td>images/0/custom_2_0.png</td>\n",
87
+ " </tr>\n",
88
+ " <tr>\n",
89
+ " <th>3</th>\n",
90
+ " <td>photo of Shyster standing street lights on at ...</td>\n",
91
+ " <td>images/0/custom_3_0.png</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>4</th>\n",
95
+ " <td>cute young man eating a plant over a fence in ...</td>\n",
96
+ " <td>images/0/custom_5_0.png</td>\n",
97
+ " </tr>\n",
98
+ " <tr>\n",
99
+ " <th>...</th>\n",
100
+ " <td>...</td>\n",
101
+ " <td>...</td>\n",
102
+ " </tr>\n",
103
+ " <tr>\n",
104
+ " <th>99995</th>\n",
105
+ " <td>photo of a natural kite at Westminster Abbey</td>\n",
106
+ " <td>images/102/custom_102419_0.png</td>\n",
107
+ " </tr>\n",
108
+ " <tr>\n",
109
+ " <th>99996</th>\n",
110
+ " <td>smooth rum with a clock in the style of a digi...</td>\n",
111
+ " <td>images/102/custom_102420_0.png</td>\n",
112
+ " </tr>\n",
113
+ " <tr>\n",
114
+ " <th>99997</th>\n",
115
+ " <td>a lovable elephant by the Gamla Stan, Stockholm</td>\n",
116
+ " <td>images/102/custom_102421_0.png</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>99998</th>\n",
120
+ " <td>photo of Courtney Love with a hot dog</td>\n",
121
+ " <td>images/102/custom_102422_0.png</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>99999</th>\n",
125
+ " <td>Maniac jumping on a skateboard near a fence</td>\n",
126
+ " <td>images/102/custom_102423_0.png</td>\n",
127
+ " </tr>\n",
128
+ " </tbody>\n",
129
+ "</table>\n",
130
+ "<p>100000 rows × 2 columns</p>\n",
131
+ "</div>"
132
+ ],
133
+ "text/plain": [
134
+ " prompt \\\n",
135
+ "0 painting of King Henry VIII carrying an umbrella \n",
136
+ "1 Fox Mulder and a chinchilla walking down a roa... \n",
137
+ "2 photo of a gas burner by a soft pretzel \n",
138
+ "3 photo of Shyster standing street lights on at ... \n",
139
+ "4 cute young man eating a plant over a fence in ... \n",
140
+ "... ... \n",
141
+ "99995 photo of a natural kite at Westminster Abbey \n",
142
+ "99996 smooth rum with a clock in the style of a digi... \n",
143
+ "99997 a lovable elephant by the Gamla Stan, Stockholm \n",
144
+ "99998 photo of Courtney Love with a hot dog \n",
145
+ "99999 Maniac jumping on a skateboard near a fence \n",
146
+ "\n",
147
+ " image_file \n",
148
+ "0 images/0/custom_0_0.png \n",
149
+ "1 images/0/custom_1_0.png \n",
150
+ "2 images/0/custom_2_0.png \n",
151
+ "3 images/0/custom_3_0.png \n",
152
+ "4 images/0/custom_5_0.png \n",
153
+ "... ... \n",
154
+ "99995 images/102/custom_102419_0.png \n",
155
+ "99996 images/102/custom_102420_0.png \n",
156
+ "99997 images/102/custom_102421_0.png \n",
157
+ "99998 images/102/custom_102422_0.png \n",
158
+ "99999 images/102/custom_102423_0.png \n",
159
+ "\n",
160
+ "[100000 rows x 2 columns]"
161
+ ]
162
+ },
163
+ "execution_count": 7,
164
+ "metadata": {},
165
+ "output_type": "execute_result"
166
+ }
167
+ ],
168
+ "source": [
169
+ "image_data"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": 8,
175
+ "metadata": {},
176
+ "outputs": [],
177
+ "source": [
178
+ "data_dict={\"prompt\":[],\"label\":[]}"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": 9,
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "queries = [\n",
188
+ " # General Descriptions\n",
189
+ " \"Generate a beautiful sunset over the ocean.\",\n",
190
+ " \"Create a futuristic cityscape at night.\",\n",
191
+ " \"Show a cozy cabin in the middle of a snowy forest.\",\n",
192
+ " \"Draw a tropical beach with palm trees and clear blue water.\",\n",
193
+ " \"Design a medieval castle on a hilltop.\",\n",
194
+ " \n",
195
+ " # Character-Focused Queries\n",
196
+ " \"Generate a young woman with long red hair in a fantasy setting.\",\n",
197
+ " \"Create a warrior in futuristic armor holding a glowing sword.\",\n",
198
+ " \"Draw a friendly robot helping people in a park.\",\n",
199
+ " \"Design a wise old wizard with a long beard and staff.\",\n",
200
+ " \"Illustrate a child playing with a puppy in a garden.\",\n",
201
+ " \n",
202
+ " # Animal and Nature Queries\n",
203
+ " \"Show a majestic tiger in a dense jungle.\",\n",
204
+ " \"Create a flock of birds flying over a mountain range.\",\n",
205
+ " \"Draw a koi fish pond with colorful fish.\",\n",
206
+ " \"Generate a close-up of a butterfly on a flower.\",\n",
207
+ " \"Illustrate a desert landscape with cacti and a setting sun.\",\n",
208
+ " \n",
209
+ " # Architectural and Object Queries\n",
210
+ " \"Design a futuristic spaceship hovering above Earth.\",\n",
211
+ " \"Create a vintage car driving on a country road.\",\n",
212
+ " \"Draw a small café on a busy European street.\",\n",
213
+ " \"Generate a treehouse in the middle of a forest.\",\n",
214
+ " \"Show a steampunk-style clock tower.\",\n",
215
+ " \n",
216
+ " # Abstract or Conceptual Queries\n",
217
+ " \"Create an image representing the concept of time.\",\n",
218
+ " \"Design a surreal landscape with floating islands.\",\n",
219
+ " \"Generate an artwork of colors blending like a rainbow.\",\n",
220
+ " \"Illustrate the feeling of calmness in visual form.\",\n",
221
+ " \"Show a dreamlike city made of crystal.\",\n",
222
+ " \n",
223
+ " # Cultural or Historical Themes\n",
224
+ " \"Illustrate an ancient Egyptian pyramid under the stars.\",\n",
225
+ " \"Show a samurai in traditional armor standing in a bamboo forest.\",\n",
226
+ " \"Draw a Viking ship sailing through a storm.\",\n",
227
+ " \"Create an Indian temple with intricate carvings.\",\n",
228
+ " \"Generate a Renaissance-style painting of a feast.\",\n",
229
+ " \n",
230
+ " # Event or Scene Queries\n",
231
+ " \"Show a birthday party with balloons and a cake.\",\n",
232
+ " \"Create an image of people camping under the stars.\",\n",
233
+ " \"Draw a bustling market in a small village.\",\n",
234
+ " \"Illustrate a concert with a crowd and colorful lights.\",\n",
235
+ " \"Generate an image of a wedding ceremony by the beach.\",\n",
236
+ " \n",
237
+ " # Seasonal and Holiday Themes\n",
238
+ " \"Show a Christmas scene with a decorated tree and snow.\",\n",
239
+ " \"Generate a spooky Halloween setting with pumpkins and ghosts.\",\n",
240
+ " \"Create a spring meadow full of flowers and butterflies.\",\n",
241
+ " \"Draw an autumn forest with falling leaves.\",\n",
242
+ " \"Illustrate a New Year celebration with fireworks.\"\n",
243
+ "]"
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": 10,
249
+ "metadata": {},
250
+ "outputs": [],
251
+ "source": [
252
+ "for i in queries:\n",
253
+ " data_dict['prompt'].append(i.lower())\n",
254
+ " data_dict['label'].append(\"image\")"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "code",
259
+ "execution_count": 11,
260
+ "metadata": {},
261
+ "outputs": [],
262
+ "source": [
263
+ "counter=0\n",
264
+ "detail_list=[\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]\n",
265
+ "for i in image_data['prompt']:\n",
266
+ " if any([paint_key in i for paint_key in [\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]]):\n",
267
+ " data_dict['prompt'].append(i.lower().replace(random.choice(detail_list),\"image\"))\n",
268
+ " data_dict['label'].append(\"image\")\n",
269
+ " counter+=1\n",
270
+ " if counter==20000:\n",
271
+ " break\n",
272
+ "counter=0\n",
273
+ "for j in text_data[:20000]:\n",
274
+ " data_dict['prompt'].append(j['note'].lower())\n",
275
+ " data_dict['label'].append(\"text\")\n",
276
+ " counter+=1\n",
277
+ " if counter==15000:\n",
278
+ " break"
279
+ ]
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": 12,
284
+ "metadata": {},
285
+ "outputs": [],
286
+ "source": [
287
+ "counter=0\n",
288
+ "for z in text_data[15000:]:\n",
289
+ " if any([paint_key in z['note'] for paint_key in [\"painting\",\"image\",\"photo\",\"frame\",\"picture\",\"potrait\",\"pic\",\"snapshot\"]]):\n",
290
+ " data_dict['prompt'].append(z['note'].lower())\n",
291
+ " data_dict['label'].append(\"text\")\n",
292
+ " counter+=1\n",
293
+ " if counter==5000:\n",
294
+ " break"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": null,
300
+ "metadata": {},
301
+ "outputs": [],
302
+ "source": []
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 13,
307
+ "metadata": {},
308
+ "outputs": [
309
+ {
310
+ "data": {
311
+ "text/html": [
312
+ "<div>\n",
313
+ "<style scoped>\n",
314
+ " .dataframe tbody tr th:only-of-type {\n",
315
+ " vertical-align: middle;\n",
316
+ " }\n",
317
+ "\n",
318
+ " .dataframe tbody tr th {\n",
319
+ " vertical-align: top;\n",
320
+ " }\n",
321
+ "\n",
322
+ " .dataframe thead th {\n",
323
+ " text-align: right;\n",
324
+ " }\n",
325
+ "</style>\n",
326
+ "<table border=\"1\" class=\"dataframe\">\n",
327
+ " <thead>\n",
328
+ " <tr style=\"text-align: right;\">\n",
329
+ " <th></th>\n",
330
+ " <th>prompt</th>\n",
331
+ " <th>label</th>\n",
332
+ " </tr>\n",
333
+ " </thead>\n",
334
+ " <tbody>\n",
335
+ " <tr>\n",
336
+ " <th>0</th>\n",
337
+ " <td>generate a beautiful sunset over the ocean.</td>\n",
338
+ " <td>image</td>\n",
339
+ " </tr>\n",
340
+ " <tr>\n",
341
+ " <th>1</th>\n",
342
+ " <td>create a futuristic cityscape at night.</td>\n",
343
+ " <td>image</td>\n",
344
+ " </tr>\n",
345
+ " <tr>\n",
346
+ " <th>2</th>\n",
347
+ " <td>show a cozy cabin in the middle of a snowy for...</td>\n",
348
+ " <td>image</td>\n",
349
+ " </tr>\n",
350
+ " <tr>\n",
351
+ " <th>3</th>\n",
352
+ " <td>draw a tropical beach with palm trees and clea...</td>\n",
353
+ " <td>image</td>\n",
354
+ " </tr>\n",
355
+ " <tr>\n",
356
+ " <th>4</th>\n",
357
+ " <td>design a medieval castle on a hilltop.</td>\n",
358
+ " <td>image</td>\n",
359
+ " </tr>\n",
360
+ " <tr>\n",
361
+ " <th>...</th>\n",
362
+ " <td>...</td>\n",
363
+ " <td>...</td>\n",
364
+ " </tr>\n",
365
+ " <tr>\n",
366
+ " <th>40035</th>\n",
367
+ " <td>i was watching a documentary and it spoke of s...</td>\n",
368
+ " <td>text</td>\n",
369
+ " </tr>\n",
370
+ " <tr>\n",
371
+ " <th>40036</th>\n",
372
+ " <td>should i buy a dslr or a new phone for photogr...</td>\n",
373
+ " <td>text</td>\n",
374
+ " </tr>\n",
375
+ " <tr>\n",
376
+ " <th>40037</th>\n",
377
+ " <td>okay, i see. so it depends on how serious i am...</td>\n",
378
+ " <td>text</td>\n",
379
+ " </tr>\n",
380
+ " <tr>\n",
381
+ " <th>40038</th>\n",
382
+ " <td>it is just to take photos of my family</td>\n",
383
+ " <td>text</td>\n",
384
+ " </tr>\n",
385
+ " <tr>\n",
386
+ " <th>40039</th>\n",
387
+ " <td>is there any topical treatment i can apply to ...</td>\n",
388
+ " <td>text</td>\n",
389
+ " </tr>\n",
390
+ " </tbody>\n",
391
+ "</table>\n",
392
+ "<p>40040 rows × 2 columns</p>\n",
393
+ "</div>"
394
+ ],
395
+ "text/plain": [
396
+ " prompt label\n",
397
+ "0 generate a beautiful sunset over the ocean. image\n",
398
+ "1 create a futuristic cityscape at night. image\n",
399
+ "2 show a cozy cabin in the middle of a snowy for... image\n",
400
+ "3 draw a tropical beach with palm trees and clea... image\n",
401
+ "4 design a medieval castle on a hilltop. image\n",
402
+ "... ... ...\n",
403
+ "40035 i was watching a documentary and it spoke of s... text\n",
404
+ "40036 should i buy a dslr or a new phone for photogr... text\n",
405
+ "40037 okay, i see. so it depends on how serious i am... text\n",
406
+ "40038 it is just to take photos of my family text\n",
407
+ "40039 is there any topical treatment i can apply to ... text\n",
408
+ "\n",
409
+ "[40040 rows x 2 columns]"
410
+ ]
411
+ },
412
+ "execution_count": 13,
413
+ "metadata": {},
414
+ "output_type": "execute_result"
415
+ }
416
+ ],
417
+ "source": [
418
+ "pd.DataFrame(data_dict)"
419
+ ]
420
+ },
421
+ {
422
+ "cell_type": "code",
423
+ "execution_count": 14,
424
+ "metadata": {},
425
+ "outputs": [],
426
+ "source": [
427
+ "import os\n",
428
+ "folder_path = 'formatted_data/'\n",
429
+ "\n",
430
+ "# Get the list of all files in the folder\n",
431
+ "file_names = os.listdir(folder_path)\n",
432
+ "max_file_name=max([int(i.split(\"_\")[-1][:-4]) for i in file_names])"
433
+ ]
434
+ },
435
+ {
436
+ "cell_type": "markdown",
437
+ "metadata": {},
438
+ "source": [
439
+ "# Confussing prompts"
440
+ ]
441
+ },
442
+ {
443
+ "cell_type": "code",
444
+ "execution_count": 15,
445
+ "metadata": {},
446
+ "outputs": [],
447
+ "source": [
448
+ "# from langchain_community.llms import Ollama\n",
449
+ "# llm = Ollama(model=\"llava:34b \",num_ctx=10000)\n",
450
+ "# enhancement=\"I need to train a model to distinguish between text and images. Please create a list of challenging prompts where the model needs to decide whether to generate text or identify an image.\"\n",
451
+ "# prompt = enhancement\n",
452
+ "# # result = llm.invoke(prompt)\n",
453
+ "# value=llm.invoke(prompt)"
454
+ ]
455
+ },
456
+ {
457
+ "cell_type": "code",
458
+ "execution_count": 16,
459
+ "metadata": {},
460
+ "outputs": [],
461
+ "source": [
462
+ "# print(str(value))"
463
+ ]
464
+ },
465
+ {
466
+ "cell_type": "code",
467
+ "execution_count": 17,
468
+ "metadata": {},
469
+ "outputs": [],
470
+ "source": [
471
+ "pd.DataFrame(data_dict).to_csv(\"formatted_data/data_\"+str(max_file_name+1)+\".csv\",index=False)"
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": null,
477
+ "metadata": {},
478
+ "outputs": [],
479
+ "source": []
480
+ },
481
+ {
482
+ "cell_type": "code",
483
+ "execution_count": null,
484
+ "metadata": {},
485
+ "outputs": [],
486
+ "source": []
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": null,
491
+ "metadata": {},
492
+ "outputs": [],
493
+ "source": []
494
+ },
495
+ {
496
+ "cell_type": "code",
497
+ "execution_count": null,
498
+ "metadata": {},
499
+ "outputs": [],
500
+ "source": []
501
+ }
502
+ ],
503
+ "metadata": {
504
+ "kernelspec": {
505
+ "display_name": "env",
506
+ "language": "python",
507
+ "name": "python3"
508
+ },
509
+ "language_info": {
510
+ "codemirror_mode": {
511
+ "name": "ipython",
512
+ "version": 3
513
+ },
514
+ "file_extension": ".py",
515
+ "mimetype": "text/x-python",
516
+ "name": "python",
517
+ "nbconvert_exporter": "python",
518
+ "pygments_lexer": "ipython3",
519
+ "version": "3.12.4"
520
+ }
521
+ },
522
+ "nbformat": 4,
523
+ "nbformat_minor": 2
524
+ }
image_experimentation.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
new intent_model.ipynb ADDED
The diff for this file is too large to render. See raw diff