etchen commited on
Commit
43d8095
1 Parent(s): 679d21e

dataset poc

Browse files
pages/summarization_example.py CHANGED
@@ -1,12 +1,15 @@
1
  from dotenv import load_dotenv
2
  import gradio as gr
 
3
 
4
  from utils.model import Model
 
5
 
6
  __default_model_name__ = "lmsys/vicuna-7b-v1.5"
7
  model = Model(__default_model_name__)
8
  load_dotenv()
9
 
 
10
  examples = {
11
  "example 1": """Boston's injury reporting for Kristaps Porziņģis has been fairly coy. He missed Game 3, but his coach told reporters just before Game 4 that was technically available, but with a catch.
12
  Joe Mazzulla said Porziņģis would "only be used in specific instances, if necessary." That sounds like the team doesn't want to risk further injury to his dislocated Posterior Tibialis (or some other body part, due to overcompensation for the ankle), unless it's in a desperate situation.
@@ -20,6 +23,8 @@ Expect the rookie's playing time to continue to climb in Game 5. It seems increa
20
  Lively has been absolutely relentless on the offensive glass all postseason. He makes solid decisions as a passer when his rolls don't immediately lead to dunks. And he's not a liability when caught defending guards or wings outside.
21
  All of that has led to postseason averages of 8.2 points, 7.6 rebounds, 1.4 assists and 1.0 blocks in just 21.9 minutes, as well as a double-double in 22 minutes of Game 4.
22
  Back in Boston, Kidd is going to rely on Lively even more. He'll play close to 30 minutes and reach double-figures in both scoring and rebounding again.""",
 
 
23
  }
24
 
25
  def generate_answer(sources, model_name, prompt):
@@ -39,6 +44,8 @@ def process_input(input_text, model_selection, prompt):
39
  return "Please fill the input to generate outputs."
40
 
41
  def update_input(example):
 
 
42
  return examples[example]
43
 
44
  def create_summarization_interface():
@@ -64,4 +71,4 @@ summarization: """, label='Input Prompting Template', lines=8, placeholder='Inpu
64
 
65
  if __name__ == "__main__":
66
  demo = create_summarization_interface()
67
- demo.launch()
 
1
  from dotenv import load_dotenv
2
  import gradio as gr
3
+ import random
4
 
5
  from utils.model import Model
6
+ from utils.data import dataset
7
 
8
  __default_model_name__ = "lmsys/vicuna-7b-v1.5"
9
  model = Model(__default_model_name__)
10
  load_dotenv()
11
 
12
+ random_label = '🔀 Random dialogue from dataset'
13
  examples = {
14
  "example 1": """Boston's injury reporting for Kristaps Porziņģis has been fairly coy. He missed Game 3, but his coach told reporters just before Game 4 that was technically available, but with a catch.
15
  Joe Mazzulla said Porziņģis would "only be used in specific instances, if necessary." That sounds like the team doesn't want to risk further injury to his dislocated Posterior Tibialis (or some other body part, due to overcompensation for the ankle), unless it's in a desperate situation.
 
23
  Lively has been absolutely relentless on the offensive glass all postseason. He makes solid decisions as a passer when his rolls don't immediately lead to dunks. And he's not a liability when caught defending guards or wings outside.
24
  All of that has led to postseason averages of 8.2 points, 7.6 rebounds, 1.4 assists and 1.0 blocks in just 21.9 minutes, as well as a double-double in 22 minutes of Game 4.
25
  Back in Boston, Kidd is going to rely on Lively even more. He'll play close to 30 minutes and reach double-figures in both scoring and rebounding again.""",
26
+
27
+ random_label: ""
28
  }
29
 
30
  def generate_answer(sources, model_name, prompt):
 
44
  return "Please fill the input to generate outputs."
45
 
46
  def update_input(example):
47
+ if example == random_label:
48
+ return random.choice(dataset)['dialogue']
49
  return examples[example]
50
 
51
  def create_summarization_interface():
 
71
 
72
  if __name__ == "__main__":
73
  demo = create_summarization_interface()
74
+ demo.launch()
requirements.txt CHANGED
@@ -6,4 +6,5 @@ sentencepiece
6
  gradio
7
  torch
8
  torchvision
9
- torchaudio
 
 
6
  gradio
7
  torch
8
  torchvision
9
+ torchaudio
10
+ datasets
utils/data.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ dialogsum = load_dataset('knkarthick/dialogsum')
3
+ dataset = list(dialogsum['train']) + list(dialogsum['validation']) + list(dialogsum['test'])
4
+