qnguyen3 commited on
Commit
53eb154
1 Parent(s): 639e625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -10,7 +10,7 @@ import spaces
10
  import subprocess
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
- torch.set_default_device('cuda')
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  'qnguyen3/nanoLLaVA',
@@ -22,6 +22,8 @@ model = AutoModelForCausalLM.from_pretrained(
22
  device_map='auto',
23
  trust_remote_code=True)
24
 
 
 
25
  class KeywordsStoppingCriteria(StoppingCriteria):
26
  def __init__(self, keywords, tokenizer, input_ids):
27
  self.keywords = keywords
@@ -93,14 +95,14 @@ def bot_streaming(message, history):
93
  tokenize=False,
94
  add_generation_prompt=True)
95
  text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
96
- input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
97
  stop_str = '<|im_end|>'
98
  keywords = [stop_str]
99
  stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
100
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
101
 
102
- image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
103
- generation_kwargs = dict(input_ids=input_ids.to("cuda:0"), images=image_tensor, streamer=streamer, max_new_tokens=100, stopping_criteria=[stopping_criteria])
104
  generated_text = ""
105
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
106
  thread.start()
 
10
  import subprocess
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
+ # torch.set_default_device('cuda')
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  'qnguyen3/nanoLLaVA',
 
22
  device_map='auto',
23
  trust_remote_code=True)
24
 
25
+ model.to("cuda:0")
26
+
27
  class KeywordsStoppingCriteria(StoppingCriteria):
28
  def __init__(self, keywords, tokenizer, input_ids):
29
  self.keywords = keywords
 
95
  tokenize=False,
96
  add_generation_prompt=True)
97
  text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
98
+ input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to("cuda:0")
99
  stop_str = '<|im_end|>'
100
  keywords = [stop_str]
101
  stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
102
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
103
 
104
+ image_tensor = model.process_images([image], model.config).to("cuda:0")
105
+ generation_kwargs = dict(input_ids=input_ids, images=image_tensor, streamer=streamer, max_new_tokens=100, stopping_criteria=[stopping_criteria])
106
  generated_text = ""
107
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
108
  thread.start()