Spaces:

yahma
/

BLOOMZ_Compare

Build error

App Files Files Community

gururise commited on Jan 31, 2023

Commit

e956bee

•

1 Parent(s): 466213b

add application file

Browse files

Files changed (2) hide show

app.py +168 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import gradio as gr
+import threading
+import codecs
+#from ast import literal_eval
+from datetime import datetime
+import os
+os.environ['TRANSFORMERS_CACHE'] = '/data/.modelcache/huggingface/hub/'
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:516"
+from transformers import BloomTokenizerFast
+from petals.client import DistributedBloomForCausalLM
+import torch
+import gc
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+TORCH_DTYPE = torch.bfloat16
+MODEL_NAMES = ["bigscience/bloom-petals", "bigscience/bloomz-petals"]
+models = {}
+output = {}
+def gen_thread(model_name, inputs, max_new_tokens, min_length, temperature, top_p, repetition_penalty):
+    global output
+    n_input_tokens = inputs.shape[1]
+    outputs = models[model_name][1].generate(inputs,
+                    max_new_tokens=max_new_tokens,
+                    min_length=min_length,
+                    do_sample=True,
+                    temperature=temperature,
+                    top_p=top_p,
+                    repetition_penalty=repetition_penalty
+                    )
+    output[model_name] = models[model_name][0].decode(outputs[0, n_input_tokens:])
+def to_md(text):
+    # return text.replace("\n", "<br />")
+    return text.replace("\n", "<br />")
+def infer(
+        prompt,
+        min_length=2,
+        max_new_tokens=10,
+        temperature=0.1,
+        top_p=1.0,
+        repetition_penalty = 1.0,
+        stop="\n",
+        num_completions=1,
+        seed=42,
+):
+    #gc.collect()
+    #torch.cuda.empty_cache()
+    if not models:
+        for model_name in MODEL_NAMES:
+            tokenizer = BloomTokenizerFast.from_pretrained(model_name)
+            model = DistributedBloomForCausalLM.from_pretrained(model_name, torch_dtype=TORCH_DTYPE)
+            model = model.to(DEVICE)
+            models[model_name] = tokenizer, model
+    max_new_tokens = int(max_new_tokens)
+    num_completions = int(num_completions)
+    temperature = float(temperature)
+    top_p = float(top_p)
+    stop = stop.split(";")
+    repetition_penalty = float(repetition_penalty)
+    seed = seed
+    assert 1 <= max_new_tokens <= 384
+    assert 0 <= min_length <= max_new_tokens
+    assert 1 <= num_completions <= 5
+    assert 0.0 <= temperature <= 1.0
+    assert 0.0 <= top_p <= 1.0
+    assert 0.9 <= repetition_penalty <= 3.0
+    if temperature == 0.0:
+        temperature = 0.01
+    if prompt == "":
+        prompt = " "
+    threads = list()
+    print(f"START -> ({datetime.now()})\n")
+    print(f"PROMPT ({datetime.now()}):\n-------\n{prompt}\n")
+    for model_name in MODEL_NAMES:
+        inputs = models[model_name][0](prompt, return_tensors="pt")["input_ids"].to(DEVICE)
+        x = threading.Thread(target=gen_thread, args=(model_name, inputs, max_new_tokens, min_length, temperature, top_p, repetition_penalty))
+        threads.append(x)
+        x.start()
+        #n_input_tokens = inputs.shape[1]
+        # outputs = models[model_name][1].generate(inputs,
+        #             max_new_tokens=max_new_tokens,
+        #             min_length=min_length,
+        #             do_sample=True,
+        #             temperature=temperature,
+        #             top_p=top_p,
+        #             repetition_penalty=repetition_penalty
+        #             )
+        #output[model_name] = models[model_name][0].decode(outputs[0, n_input_tokens:])
+        #output[model_name] = outputs[len(prompt):]
+    # Join Threads
+    for model_name, thread in enumerate(threads):
+        print(f"waiting on: {model_name}\n")
+        thread.join()
+        print(f"{model_name} thread done\n")
+    for model_name in MODEL_NAMES:
+        stop = codecs.getdecoder("unicode_escape")(stop[0])[0]
+        stop =  [x.strip(' ') for x in stop.split(',')]
+        for stop_word in stop:
+            if stop_word != '' and stop_word in output[model_name]:
+                output[model_name] = output[model_name][:output[model_name].find(stop_word)]
+        print(f"--- START: {model_name} --- \n{output[model_name]}\n--- END {model_name} ---\n\n")
+    print(f"DONE -> ({datetime.now()})\n")
+    return output[MODEL_NAMES[0]], output[MODEL_NAMES[1]]
+examples = [
+    [
+        # Question Answering
+        '''Please answer the following question:
+Question: What is the capital of Germany?
+Answer:''', 1, 3, 0.2, 1.0, 1.0, "\\n,</s>"],
+    [
+        # Natural Language Interface
+        '''Given a pair of sentences, choose whether the two sentences agree (entailment)/disagree (contradiction) with each other.
+Possible labels: 1. entailment 2. contradiction
+Sentence 1: The skier was on the edge of the ramp. Sentence 2: The skier was dressed in winter clothes.
+Label: entailment
+Sentence 1: The boy skated down the staircase railing. Sentence 2: The boy is a newbie skater.
+Label: contradiction
+Sentence 1: Two middle-aged people stand by a golf hole. Sentence 2: A couple riding in a golf cart.
+Label:''', 1, 2, 0.2, 1.0, 1.0, "\\n,</s>"]
+]
+def main():
+    iface = gr.Interface(
+        fn=infer,
+        allow_flagging="never",
+        inputs=[
+            gr.Textbox(lines=20),  # prompt
+            gr.Slider(0, 256, value=1), #min_length
+            gr.Slider(1, 384, value=20),  # max_tokens
+            gr.Slider(0.0, 1.0, value=0.2),  # temperature
+            gr.Slider(0.0, 1.0, value=0.9),  # top_p
+            gr.Slider(0.9, 3.0, value=1.0),  # repetition penalty
+            gr.Textbox(lines=1, value="\\n,</s>") # stop
+        ],
+        outputs=[gr.Textbox(lines=7, label="BLOOM OUTPUT:"), gr.Textbox(lines=7,label="BLOOMZ OUTPUT:")],
+        examples=examples,
+        cache_examples=True,
+        title="BLOOM vs BLOOMZ",
+        description='''<p>Compare outputs of the BLOOM and BLOOMZ 176 billion parameter models using the [Petals](https://petals.ml/) network. Please consider joining the Petals network to help speed up inference.</p><p>Big thanks to [RFTCapital](https://www.rftcapital.com) for providing initial compute resources.</p>'''
+    )
+    iface.launch(debug=True, share=False)
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ threadpoolctl==3.1.0
2	+ transformers==4.25.1