freQuensy23 commited on
Commit
dfa8941
·
1 Parent(s): cc03544

Fix add logs

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. generators.py +2 -2
app.py CHANGED
@@ -21,8 +21,8 @@ async def handle(system_input: str, user_input: str):
21
  buffers[i] += str(outputs[i])
22
 
23
  yield list(buffers) + ["", ""]
24
- yield list(buffers) + [generate_openllama(system_input, user_input),
25
- generate_bloom(system_input, user_input)]
26
 
27
 
28
  with gr.Blocks() as demo:
 
21
  buffers[i] += str(outputs[i])
22
 
23
  yield list(buffers) + ["", ""]
24
+ yield list(buffers) + [(openllama_generation := generate_openllama(system_input, user_input)), '']
25
+ yield list(buffers) + [openllama_generation, generate_bloom(system_input, user_input)]
26
 
27
 
28
  with gr.Blocks() as demo:
generators.py CHANGED
@@ -42,7 +42,7 @@ async def generate_gpt2(system_input, user_input):
42
  output = await query_llm({
43
  "inputs": (inputs:=f"{system_input}\n{user_input}"),
44
  }, "openai-community/gpt2")
45
- yield output[0]["generated_text"].replace(inputs, '')
46
 
47
 
48
  async def generate_llama2(system_input, user_input):
@@ -67,7 +67,7 @@ def generate_openllama(system_input, user_input):
67
  model = LlamaForCausalLM.from_pretrained(
68
  model_path, torch_dtype=torch.float16, device_map='cuda',
69
  )
70
- # model = model.to("cuda")
71
  input_text = f"{system_input}\n{user_input}"
72
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
73
  output = model.generate(input_ids, max_length=128)
 
42
  output = await query_llm({
43
  "inputs": (inputs:=f"{system_input}\n{user_input}"),
44
  }, "openai-community/gpt2")
45
+ yield output[0]["generated_text"]
46
 
47
 
48
  async def generate_llama2(system_input, user_input):
 
67
  model = LlamaForCausalLM.from_pretrained(
68
  model_path, torch_dtype=torch.float16, device_map='cuda',
69
  )
70
+ print('model openllama loaded')
71
  input_text = f"{system_input}\n{user_input}"
72
  input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
73
  output = model.generate(input_ids, max_length=128)