qnguyen3 commited on
Commit
88b9346
1 Parent(s): 53eb154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -10,7 +10,7 @@ import spaces
10
  import subprocess
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
- # torch.set_default_device('cuda')
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  'qnguyen3/nanoLLaVA',
@@ -38,7 +38,8 @@ class KeywordsStoppingCriteria(StoppingCriteria):
38
  self.keyword_ids.append(torch.tensor(cur_keyword_ids))
39
  self.tokenizer = tokenizer
40
  self.start_len = input_ids.shape[1]
41
-
 
42
  def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
43
  offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
44
  self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
@@ -51,7 +52,8 @@ class KeywordsStoppingCriteria(StoppingCriteria):
51
  if keyword in outputs:
52
  return True
53
  return False
54
-
 
55
  def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
56
  outputs = []
57
  for i in range(output_ids.shape[0]):
 
10
  import subprocess
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
+ torch.set_default_device('cuda')
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  'qnguyen3/nanoLLaVA',
 
38
  self.keyword_ids.append(torch.tensor(cur_keyword_ids))
39
  self.tokenizer = tokenizer
40
  self.start_len = input_ids.shape[1]
41
+
42
+ @spaces.GPU
43
  def call_for_batch(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
44
  offset = min(output_ids.shape[1] - self.start_len, self.max_keyword_len)
45
  self.keyword_ids = [keyword_id.to(output_ids.device) for keyword_id in self.keyword_ids]
 
52
  if keyword in outputs:
53
  return True
54
  return False
55
+
56
+ @spaces.GPU
57
  def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
58
  outputs = []
59
  for i in range(output_ids.shape[0]):