Spaces:

open-nlp
/

Chris-lab

Sleeping

App Files Files Community

kz209 commited on Sep 1, 2024

Commit

1921336

1 Parent(s): b97cda3

update

Browse files

Files changed (6) hide show

pages/arena.py +45 -2
pages/summarization_playground.py +20 -2
prompt/prompt.ipynb +59 -0
prompt/prompt.json +1 -0
utils/model.py +39 -26
utils/multiple_stream.py +32 -22

pages/arena.py CHANGED Viewed

@@ -1,6 +1,49 @@
-from utils.multiple_stream import create_interface
 def create_arena():
-    demo = create_interface()
     return demo

+#from utils.multiple_stream import create_interface
+import random
+import gradio as gr
+import json
+import logging
+import gc
+import torch
+from utils.data import dataset
+from utils.multiple_stream import stream_data
+from summarization_playground import get_model_batch_generation
 def create_arena():
+    with gr.Blocks() as demo:
+        with gr.Group():
+            gr.Markdown("## This is a playground to test prompts for clinical dialogue summarizations")
+            with json.loads("prompt/prompt.json", "r") as file:
+                json_data = file.read()
+                prompts = json.loads(json_data)
+            datapoint = random.choice(dataset)
+            datapoint = datapoint['section_text'] + '\n\nDialogue:\n' + datapoint['dialogue']
+            submit_button = gr.Button("✨ Submit ✨")
+            with gr.Row():
+                columns = [gr.Textbox(label=f"Column {i+1}", lines=10) for i in range(3)]
+            content_list = [prompt + '\n{' + datapoint + '}\n\nsummary:' for prompt in prompts]
+            model = get_model_batch_generation("Qwen/Qwen2-1.5B-Instruct")
+            def start_streaming():
+                for data in stream_data(content_list, model):
+                    updates = [gr.update(value=data[i]) for i in range(len(columns))]
+                    yield tuple(updates)
+            submit_button.click(
+                fn=start_streaming,
+                inputs=[],
+                outputs=columns,
+                show_progress=False
+            )
     return demo
+if __name__ == "__main__":
+    demo = create_arena()
+    demo.queue()
+    demo.launch()

pages/summarization_playground.py CHANGED Viewed

@@ -33,8 +33,24 @@ Back in Boston, Kidd is going to rely on Lively even more. He'll play close to 3
     random_label: ""
 }
 def generate_answer(sources, model_name, prompt):
-    content = prompt + '\n{' + sources + '}\n\nsummary:'
     global __model_on_gpu__
     if __model_on_gpu__ != model_name:
@@ -47,6 +63,8 @@ def generate_answer(sources, model_name, prompt):
         model[model_name] = Model(model_name)
         __model_on_gpu__ = model_name
     answer = model[model_name].gen(content)
     return answer
@@ -68,7 +86,7 @@ def update_input(example):
 def create_summarization_interface():
     with gr.Blocks() as demo:
         gr.Markdown("## This is a playground to test prompts for clinical dialogue summarizations")
         with gr.Row():
             example_dropdown = gr.Dropdown(choices=list(examples.keys()), label="Choose an example", value=random_label)
             model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])

     random_label: ""
 }
+def get_model_batch_generation(model_name):
+    global __model_on_gpu__
+    if __model_on_gpu__ != model_name:
+        if __model_on_gpu__:
+            logging.info(f"delete model {__model_on_gpu__}")
+            del model[__model_on_gpu__]
+            gc.collect()
+            torch.cuda.empty_cache()
+        model[model_name] = Model(model_name)
+        __model_on_gpu__ = model_name
+    return model[model_name]
 def generate_answer(sources, model_name, prompt):
     global __model_on_gpu__
     if __model_on_gpu__ != model_name:
         model[model_name] = Model(model_name)
         __model_on_gpu__ = model_name
+    content = prompt + '\n{' + sources + '}\n\nsummary:'
     answer = model[model_name].gen(content)
     return answer
 def create_summarization_interface():
     with gr.Blocks() as demo:
         gr.Markdown("## This is a playground to test prompts for clinical dialogue summarizations")
         with gr.Row():
             example_dropdown = gr.Dropdown(choices=list(examples.keys()), label="Choose an example", value=random_label)
             model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])

prompt/prompt.ipynb ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "prompts = [\n",
+    "    \"\"\"Please summarize the following conversation by highlighting the key points and main topics discussed. Include any important conclusions or decisions made during the conversation.\n",
+    "Conversation:\"\"\",\n",
+    "    \"\"\"Generate a concise summary of the conversation below. Focus on the main arguments, the flow of the discussion, and any significant outcomes or agreements reached. Make sure to capture the essence of the dialogue without including extraneous details.\n",
+    "Conversation:\"\"\",\n",
+    "    \"\"\"Provide a brief overview of the conversation provided. Summarize the main ideas exchanged, the context of the discussion, and any resolutions or actions decided. Ensure the summary is clear and easy to understand for someone who wasn't part of the conversation\"\"\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"prompt.json\", \"w\") as f:\n",
+    "    json.dump(prompts, f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

prompt/prompt.json ADDED Viewed

	@@ -0,0 +1 @@

+ ["Please summarize the following conversation by highlighting the key points and main topics discussed. Include any important conclusions or decisions made during the conversation.\nConversation:", "Generate a concise summary of the conversation below. Focus on the main arguments, the flow of the discussion, and any significant outcomes or agreements reached. Make sure to capture the essence of the dialogue without including extraneous details.\nConversation:", "Provide a brief overview of the conversation provided. Summarize the main ideas exchanged, the context of the discussion, and any resolutions or actions decided. Ensure the summary is clear and easy to understand for someone who wasn't part of the conversation"]

utils/model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import AutoTokenizer
 import transformers
 import torch
@@ -12,26 +12,32 @@ login(token = os.getenv('HF_TOKEN'))
 class Model(torch.nn.Module):
     number_of_models = 0
     __model_list__ = [
         "lmsys/vicuna-7b-v1.5",
         "google-t5/t5-large",
         "mistralai/Mistral-7B-Instruct-v0.1",
         "meta-llama/Meta-Llama-3.1-8B-Instruct"
     ]
-    def __init__(self, model_name="lmsys/vicuna-7b-v1.5") -> None:
         super(Model, self).__init__()
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.name = model_name
         logging.info(f'start loading model {self.name}')
-        self.pipeline = transformers.pipeline(
-            "summarization" if model_name=="google-t5/t5-large" else "text-generation",
-            model=model_name,
-            tokenizer=self.tokenizer,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-        )
         logging.info(f'Loaded model {self.name}')
         self.update()
@@ -49,25 +55,32 @@ class Model(torch.nn.Module):
     def return_model(self):
         return self.pipeline
-    def gen(self, content, temp=0.1, max_length=500):
-        if self.name == "google-t5/t5-large":
-            sequences = self.pipeline(
-                content,
-                max_new_tokens=max_length,
-                do_sample=True,
-                temperature=temp,
-                num_return_sequences=1,
-                eos_token_id=self.tokenizer.eos_token_id,
-            )
-            return sequences[-1]['summary_text']
         else:
-            sequences = self.pipeline(
-                content,
                 max_new_tokens=max_length,
                 do_sample=True,
                 temperature=temp,
-                num_return_sequences=1,
-                eos_token_id=self.tokenizer.eos_token_id,
-                return_full_text=False
             )
-            return sequences[-1]['generated_text']

+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, TextStreamer
 import transformers
 import torch
 class Model(torch.nn.Module):
     number_of_models = 0
     __model_list__ = [
+        "Qwen/Qwen2-1.5B-Instruct",
         "lmsys/vicuna-7b-v1.5",
         "google-t5/t5-large",
         "mistralai/Mistral-7B-Instruct-v0.1",
         "meta-llama/Meta-Llama-3.1-8B-Instruct"
     ]
+    def __init__(self, model_name="Qwen/Qwen2-1.5B-Instruct") -> None:
         super(Model, self).__init__()
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.name = model_name
         logging.info(f'start loading model {self.name}')
+        if model_name == "google-t5/t5-large":
+            # For T5 or any other Seq2Seq model
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(
+                model_name, torch_dtype=torch.bfloat16, device_map="auto"
+            )
+        else:
+            # For GPT-like models or other causal language models
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name, torch_dtype=torch.bfloat16, device_map="auto"
+            )
         logging.info(f'Loaded model {self.name}')
         self.update()
     def return_model(self):
         return self.pipeline
+    def gen(self, content_list, temp=0.1, max_length=500, streaming=False):
+        # Convert list of texts to input IDs
+        input_ids = self.tokenizer(content_list, return_tensors="pt", padding=True, truncation=True).input_ids.to(self.model.device)
+        if streaming:
+            # Prepare streamers for each input
+            streamers = [TextStreamer(self.tokenizer, skip_prompt=True) for _ in content_list]
+            # Stream the output token by token for each input text
+            for i, streamer in enumerate(streamers):
+                for output in self.model.generate(
+                        input_ids[i].unsqueeze(0),  # Process each input separately
+                        max_new_tokens=max_length,
+                        do_sample=True,
+                        temperature=temp,
+                        eos_token_id=self.tokenizer.eos_token_id,
+                        return_dict_in_generate=True,
+                        output_scores=True,
+                        streamer=streamer):
+                    pass  # TextStreamer automatically handles the streaming, no need to manually handle the output
         else:
+            outputs = self.model.generate(
+                input_ids,
                 max_new_tokens=max_length,
                 do_sample=True,
                 temperature=temp,
+                eos_token_id=self.tokenizer.eos_token_id
             )
+            return [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

utils/multiple_stream.py CHANGED Viewed

@@ -3,6 +3,9 @@ import random
 from time import sleep
 import gradio as gr
 TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have
 showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal
 reasoning tasks involving complex temporal logic. """
@@ -16,16 +19,16 @@ def generate_data_test():
     for word in temp.split(" "):
         yield word + " "
-def stream_data(progress=gr.Progress()):
-    """Stream data to all columns"""
-    outputs = ["", "", ""]
-    generators = [generate_data_test() for _ in range(3)]
     while True:
         updated = False
-        for i, gen in enumerate(generators):
             try:
-                word = next(gen)
                 outputs[i] += word
                 updated = True
             except StopIteration:
@@ -35,24 +38,31 @@ def stream_data(progress=gr.Progress()):
             break
         yield tuple(outputs)
-        sleep(0.01)
 def create_interface():
-    with gr.Group():
-        with gr.Row():
-            col1 = gr.Textbox(label="Column 1", lines=10)
-            col2 = gr.Textbox(label="Column 2", lines=10)
-            col3 = gr.Textbox(label="Column 3", lines=10)
-        start_btn = gr.Button("Start Streaming")
-        start_btn.click(
-            fn=stream_data,
-            outputs=[col1, col2, col3],
-            show_progress=False
-        )
-    #return demo
 if __name__ == "__main__":
     demo = create_interface()

 from time import sleep
 import gradio as gr
+from utils.model import Model
 TEST = """ Test of Time. A Benchmark for Evaluating LLMs on Temporal Reasoning. Large language models (LLMs) have
 showcased remarkable reasoning capabilities, yet they remain susceptible to errors, particularly in temporal
 reasoning tasks involving complex temporal logic. """
     for word in temp.split(" "):
         yield word + " "
+def stream_data(content_list, model):
+    """Stream data to three columns"""
+    outputs = ["" for _ in content_list]
+    # Use the gen method to handle batch generation
     while True:
         updated = False
+        for i, content in enumerate(content_list):
             try:
+                word = next(model.gen([content], streaming=True))  # Wrap content in a list to match expected input type
                 outputs[i] += word
                 updated = True
             except StopIteration:
             break
         yield tuple(outputs)
 def create_interface():
+    with gr.Blocks() as demo:
+        with gr.Group():
+            with gr.Row():
+                columns = [gr.Textbox(label=f"Column {i+1}", lines=10) for i in range(3)]
+            start_btn = gr.Button("Start Streaming")
+            def start_streaming():
+                content_list = [col.value for col in columns]  # Get input texts from text boxes
+                for data in stream_data(content_list):
+                    updates = [gr.update(value=data[i]) for i in range(len(columns))]
+                    yield tuple(updates)
+            start_btn.click(
+                fn=start_streaming,
+                inputs=[],
+                outputs=columns,
+                show_progress=False
+            )
+    return demo
 if __name__ == "__main__":
     demo = create_interface()