danseith commited on
Commit
ca69fee
1 Parent(s): e3a2d6f

Added dummy temp slider and output text box with new input.

Browse files
Files changed (1) hide show
  1. app.py +75 -40
app.py CHANGED
@@ -1,15 +1,14 @@
1
  import gradio as gr
2
  import numpy as np
3
  import torch
4
- from transformers import pipeline, Pipeline
5
  from transformers.pipelines import PIPELINE_REGISTRY, FillMaskPipeline
6
- from transformers import AutoConfig, AutoModel, AutoModelForMaskedLM
7
 
8
- unmasker = pipeline("fill-mask", model="anferico/bert-for-patents")
9
  # unmasker = pipeline("temp-scale", model="anferico/bert-for-patents")
10
- example = 'A crustless [MASK] made from two slices of baked bread'
11
- example_dict = {}
12
- example_dict['input_ids'] = example
13
 
14
  def add_mask(text, size=1):
15
  split_text = text.split()
@@ -20,7 +19,49 @@ def add_mask(text, size=1):
20
 
21
 
22
  class TempScalePipe(FillMaskPipeline):
23
- def postprocess(self, model_outputs, top_k=3, target_ids=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # Cap top_k if there are targets
25
  if target_ids is not None and target_ids.shape[0] < top_k:
26
  top_k = target_ids.shape[0]
@@ -30,14 +71,16 @@ class TempScalePipe(FillMaskPipeline):
30
  masked_index = torch.nonzero(input_ids == self.tokenizer.mask_token_id, as_tuple=False).squeeze(-1)
31
  # Fill mask pipeline supports only one ${mask_token} per sample
32
 
33
- logits = outputs[0, masked_index, :] / 1e1
34
  probs = logits.softmax(dim=-1)
35
- indices = torch.multinomial(probs, num_samples=3)
36
- probs = probs[indices]
 
 
37
  if target_ids is not None:
38
  probs = probs[..., target_ids]
39
-
40
- values, predictions = probs.topk(top_k)
41
 
42
  result = []
43
  single_mask = values.shape[0] == 1
@@ -69,41 +112,33 @@ PIPELINE_REGISTRY.register_pipeline(
69
  pipeline_class=TempScalePipe,
70
  pt_model=AutoModelForMaskedLM,
71
  )
 
72
 
73
-
74
- def unmask(text):
75
  # text = add_mask(text)
76
- res = unmasker(text)
 
 
77
  out = {item["token_str"]: item["score"] for item in res}
78
- return out
79
-
80
-
 
 
 
 
 
 
81
 
82
  textbox = gr.Textbox(label="Type language here", lines=5)
83
- # import gradio as gr
84
- from transformers import pipeline, Pipeline
85
-
86
-
87
- # unmasker = pipeline("fill-mask", model="anferico/bert-for-patents")
88
- #
89
- #
90
-
91
- #
92
- #
93
- # def unmask(text):
94
- # text = add_mask(text)
95
- # res = unmasker(text)
96
- # out = {item["token_str"]: item["score"] for item in res}
97
- # return out
98
- #
99
- #
100
- # textbox = gr.Textbox(label="Type language here", lines=5)
101
- #
102
  demo = gr.Interface(
103
  fn=unmask,
104
- inputs=textbox,
105
- outputs="label",
106
- examples=[example],
107
  )
108
 
109
  demo.launch()
 
1
  import gradio as gr
2
  import numpy as np
3
  import torch
4
+ from transformers import pipeline
5
  from transformers.pipelines import PIPELINE_REGISTRY, FillMaskPipeline
6
+ from transformers import AutoModelForMaskedLM
7
 
 
8
  # unmasker = pipeline("temp-scale", model="anferico/bert-for-patents")
9
+ example = 'A crustless [MASK] made from two slices of baked bread.'
10
+ example = 'The invention provides a method for altering or modifying [MASK] of one or more gene products.'
11
+ example = 'The graphite [MASK] is composed of a two-dimensional hexagonal lattice of carbon atoms.'
12
 
13
  def add_mask(text, size=1):
14
  split_text = text.split()
 
19
 
20
 
21
  class TempScalePipe(FillMaskPipeline):
22
+ def _sanitize_parameters(self, top_k=None, targets=None, temp=None):
23
+ postprocess_params = {}
24
+
25
+ if targets is not None:
26
+ target_ids = self.get_target_ids(targets, top_k)
27
+ postprocess_params["target_ids"] = target_ids
28
+
29
+ if top_k is not None:
30
+ postprocess_params["top_k"] = top_k
31
+
32
+ if temp is not None:
33
+ postprocess_params["temp"] = temp
34
+ return {}, {}, postprocess_params
35
+
36
+
37
+ def __call__(self, inputs, *args, **kwargs):
38
+ """
39
+ Fill the masked token in the text(s) given as inputs.
40
+
41
+ Args:
42
+ args (`str` or `List[str]`):
43
+ One or several texts (or one list of prompts) with masked tokens.
44
+ targets (`str` or `List[str]`, *optional*):
45
+ When passed, the model will limit the scores to the passed targets instead of looking up in the whole
46
+ vocab. If the provided targets are not in the model vocab, they will be tokenized and the first
47
+ resulting token will be used (with a warning, and that might be slower).
48
+ top_k (`int`, *optional*):
49
+ When passed, overrides the number of predictions to return.
50
+
51
+ Return:
52
+ A list or a list of list of `dict`: Each result comes as list of dictionaries with the following keys:
53
+
54
+ - **sequence** (`str`) -- The corresponding input with the mask token prediction.
55
+ - **score** (`float`) -- The corresponding probability.
56
+ - **token** (`int`) -- The predicted token id (to replace the masked one).
57
+ - **token** (`str`) -- The predicted token (to replace the masked one).
58
+ """
59
+ outputs = super().__call__(inputs, **kwargs)
60
+ if isinstance(inputs, list) and len(inputs) == 1:
61
+ return outputs[0]
62
+ return outputs
63
+
64
+ def postprocess(self, model_outputs, top_k=10, target_ids=None, temp=1):
65
  # Cap top_k if there are targets
66
  if target_ids is not None and target_ids.shape[0] < top_k:
67
  top_k = target_ids.shape[0]
 
71
  masked_index = torch.nonzero(input_ids == self.tokenizer.mask_token_id, as_tuple=False).squeeze(-1)
72
  # Fill mask pipeline supports only one ${mask_token} per sample
73
 
74
+ logits = outputs[0, masked_index, :] / 1.2
75
  probs = logits.softmax(dim=-1)
76
+ sampling = False
77
+ if sampling:
78
+ predictions = torch.multinomial(probs, num_samples=3)
79
+ values = probs[0, predictions]
80
  if target_ids is not None:
81
  probs = probs[..., target_ids]
82
+ if not sampling:
83
+ values, predictions = probs.topk(top_k)
84
 
85
  result = []
86
  single_mask = values.shape[0] == 1
 
112
  pipeline_class=TempScalePipe,
113
  pt_model=AutoModelForMaskedLM,
114
  )
115
+ scrambler = pipeline("temp-scale", model="anferico/bert-for-patents")
116
 
117
+ def unmask(text, temp):
 
118
  # text = add_mask(text)
119
+ split_text = text.split()
120
+ res = scrambler(text)
121
+ mask_pos = [i for i, t in enumerate(split_text) if 'MASK' in t][0]
122
  out = {item["token_str"]: item["score"] for item in res}
123
+ score_to_str = {out[k]:k for k in out.keys()}
124
+ print(score_to_str)
125
+ print(out)
126
+ score_list = list(score_to_str.keys())
127
+ idx = np.argmax(np.random.multinomial(1, score_list, 1))
128
+ score = score_list[idx]
129
+ new_token = score_to_str[score]
130
+ split_text[mask_pos] = new_token
131
+ return out, ' '.join(split_text)
132
 
133
  textbox = gr.Textbox(label="Type language here", lines=5)
134
+ textbox2 = gr.Textbox(placeholder="Type here...", lines=4)
135
+ temp_slider = gr.Slider(1.0, 1.5, value=1.0, label='Creativity')
136
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  demo = gr.Interface(
138
  fn=unmask,
139
+ inputs=[textbox, temp_slider],
140
+ outputs=["label", textbox2],
141
+ examples=[[example, 1.2]],
142
  )
143
 
144
  demo.launch()