Spaces:

nihalnayak
/

bonito

Running on Zero

Nihal Nayak commited on Jun 24, 2024

Commit

7c5f508

1 Parent(s): 7ebf5ca

wip: respond

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,6 +26,20 @@ def respond(
                 "\n<|context|>\n" + context.strip() + "\n<|task|>\n"
     )
     # for token in client.text_generation(input_text, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True):

                 "\n<|context|>\n" + context.strip() + "\n<|task|>\n"
     )
+    input_ids = tokenizer.encode(input_text, return_tensors="pt").to("cuda")
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        do_sample=True,
+        top_p=top_p,
+    )
+    pred_start = int(input_ids.shape[-1])
+    pred = tokenizer.decode(outputs[pred_start:], skip_special_tokens=True)
+    # replace the context
+    return pred
     # for token in client.text_generation(input_text, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True):