vincentclaes commited on
Commit
994c940
1 Parent(s): 1fdb555

try to avoid cuda OO error

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -90,18 +90,18 @@ def evaluate(
90
  ):
91
  content = process_webpage(url=url)
92
  # avoid GPU memory overflow
93
- torch.cuda.empty_cache()
94
- prompt = generate_prompt(instruction, content)
95
- inputs = tokenizer(prompt, return_tensors="pt")
96
- input_ids = inputs["input_ids"].to(device)
97
- generation_config = GenerationConfig(
98
- temperature=temperature,
99
- top_p=top_p,
100
- top_k=top_k,
101
- num_beams=num_beams,
102
- **kwargs,
103
- )
104
  with torch.no_grad():
 
 
 
 
 
 
 
 
 
 
 
105
  generation_output = model.generate(
106
  input_ids=input_ids,
107
  generation_config=generation_config,
@@ -109,8 +109,8 @@ def evaluate(
109
  output_scores=True,
110
  max_new_tokens=max_new_tokens,
111
  )
112
- s = generation_output.sequences[0]
113
- output = tokenizer.decode(s)
114
  # avoid GPU memory overflow
115
  torch.cuda.empty_cache()
116
  return output.split("### Response:")[1].strip()
 
90
  ):
91
  content = process_webpage(url=url)
92
  # avoid GPU memory overflow
 
 
 
 
 
 
 
 
 
 
 
93
  with torch.no_grad():
94
+ torch.cuda.empty_cache()
95
+ prompt = generate_prompt(instruction, content)
96
+ inputs = tokenizer(prompt, return_tensors="pt")
97
+ input_ids = inputs["input_ids"].to(device)
98
+ generation_config = GenerationConfig(
99
+ temperature=temperature,
100
+ top_p=top_p,
101
+ top_k=top_k,
102
+ num_beams=num_beams,
103
+ **kwargs,
104
+ )
105
  generation_output = model.generate(
106
  input_ids=input_ids,
107
  generation_config=generation_config,
 
109
  output_scores=True,
110
  max_new_tokens=max_new_tokens,
111
  )
112
+ s = generation_output.sequences[0]
113
+ output = tokenizer.decode(s)
114
  # avoid GPU memory overflow
115
  torch.cuda.empty_cache()
116
  return output.split("### Response:")[1].strip()