KingNish commited on
Commit
b07c324
·
verified ·
1 Parent(s): 383dd71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -34
app.py CHANGED
@@ -41,39 +41,6 @@ def sample_frames(video_file) :
41
  frames=[]
42
  return frames
43
 
44
- def llava(user_prompt, history):
45
- image = user_prompt["files"][-1]
46
- txt = user_prompt["text"]
47
- img = user_prompt["files"]
48
-
49
- video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
50
- image_extensions = Image.registered_extensions()
51
- image_extensions = tuple([ex for ex, f in image_extensions.items()])
52
-
53
- if image.endswith(video_extensions):
54
- image = sample_frames(image)
55
- image_tokens = "<image>" * int(len(image))
56
- prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
57
-
58
- elif image.endswith(image_extensions):
59
- image = Image.open(image).convert("RGB")
60
- prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
61
-
62
- print(len(image))
63
-
64
- inputs = processor(prompt, image, return_tensors="pt")
65
- streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
66
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
67
- generated_text = ""
68
-
69
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
70
- thread.start()
71
-
72
- buffer = ""
73
- for new_text in streamer:
74
- buffer += new_text
75
- yield buffer
76
-
77
  def extract_text_from_webpage(html_content):
78
  soup = BeautifulSoup(html_content, 'html.parser')
79
  for tag in soup(["script", "style", "header", "footer"]):
@@ -122,7 +89,37 @@ def respond(message, history):
122
 
123
  # Handle image processing
124
  if message["files"]:
125
- llava(message, history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
 
128
  # Define function metadata for user interface
 
41
  frames=[]
42
  return frames
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def extract_text_from_webpage(html_content):
45
  soup = BeautifulSoup(html_content, 'html.parser')
46
  for tag in soup(["script", "style", "header", "footer"]):
 
89
 
90
  # Handle image processing
91
  if message["files"]:
92
+ image = user_prompt["files"][-1]
93
+ txt = user_prompt["text"]
94
+ img = user_prompt["files"]
95
+
96
+ video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
97
+ image_extensions = Image.registered_extensions()
98
+ image_extensions = tuple([ex for ex, f in image_extensions.items()])
99
+
100
+ if image.endswith(video_extensions):
101
+ image = sample_frames(image)
102
+ image_tokens = "<image>" * int(len(image))
103
+ prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"
104
+
105
+ elif image.endswith(image_extensions):
106
+ image = Image.open(image).convert("RGB")
107
+ prompt = f"<|im_start|>user <image>\n{user_prompt}<|im_end|><|im_start|>assistant"
108
+
109
+ print(len(image))
110
+
111
+ inputs = processor(prompt, image, return_tensors="pt")
112
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
113
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
114
+ generated_text = ""
115
+
116
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
117
+ thread.start()
118
+
119
+ buffer = ""
120
+ for new_text in streamer:
121
+ buffer += new_text
122
+ yield buffer
123
 
124
 
125
  # Define function metadata for user interface