milyiyo commited on
Commit
437058b
1 Parent(s): ced52fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -23
app.py CHANGED
@@ -6,19 +6,7 @@ tokenizer = AutoTokenizer.from_pretrained("milyiyo/paraphraser-german-mt5-small"
6
  model = AutoModelForSeq2SeqLM.from_pretrained("milyiyo/paraphraser-german-mt5-small")
7
 
8
 
9
- def decode_model_outputs(model_outputs, model_input):
10
- input_sent = model_input.split(':',1)[-1].strip()
11
- sentences = []
12
- for output in model_outputs:
13
- sentences.append(tokenizer.decode(output, skip_special_tokens=True))
14
-
15
- sentences = set(sentences)
16
- for sent in sentences:
17
- if sent != input_sent:
18
- print(f' - {sent}')
19
-
20
-
21
- def generate_v1(input):
22
  """Generate text using a Beam Search strategy with repetition penalty."""
23
  model_outputs = model.generate(input["input_ids"],
24
  early_stopping=True,
@@ -27,7 +15,8 @@ def generate_v1(input):
27
  no_repeat_ngram_size=2,
28
  num_beams=10,
29
  num_return_sequences=5,
30
- repetition_penalty=3.5,
 
31
  )
32
  sentences = []
33
  for output in model_outputs:
@@ -35,7 +24,7 @@ def generate_v1(input):
35
  return sentences
36
 
37
 
38
- def generate_v2(input):
39
  """Generate text using a Beam Search strategy."""
40
  model_outputs = model.generate(input["input_ids"],
41
  early_stopping=True,
@@ -45,6 +34,7 @@ def generate_v2(input):
45
  num_beams=5,
46
  num_return_sequences=5,
47
  temperature=1.5,
 
48
  )
49
  sentences = []
50
  for output in model_outputs:
@@ -52,7 +42,7 @@ def generate_v2(input):
52
  return sentences
53
 
54
 
55
- def generate_v3(input):
56
  """Generate text using a Diverse Beam Search strategy."""
57
  model_outputs = model.generate(input["input_ids"],
58
  num_beams=5,
@@ -63,14 +53,15 @@ def generate_v3(input):
63
  diversity_penalty=2.0,
64
  no_repeat_ngram_size=2,
65
  early_stopping=True,
66
- length_penalty=2.0)
 
67
  sentences = []
68
  for output in model_outputs:
69
  sentences.append(tokenizer.decode(output, skip_special_tokens=True))
70
  return sentences
71
 
72
 
73
- def generate_v4(input):
74
  """Generate text using a Diverse Beam Search strategy."""
75
  input_ids, attention_masks = input["input_ids"], input["attention_mask"]
76
  outputs = model.generate(
@@ -80,7 +71,7 @@ def generate_v4(input):
80
  top_k=120,
81
  top_p=0.95,
82
  early_stopping=True,
83
- num_return_sequences=p_count
84
  )
85
  res = []
86
  for output in outputs:
@@ -114,10 +105,10 @@ def paraphrase(sentence: str, count: str):
114
  # res.append(line)
115
  return {
116
  'result': {
117
- 'generate_v1':generate_v1(encoding),
118
- 'generate_v2':generate_v2(encoding),
119
- 'generate_v3':generate_v3(encoding),
120
- 'generate_v4':generate_v4(encoding),
121
  }
122
  }
123
 
 
6
  model = AutoModelForSeq2SeqLM.from_pretrained("milyiyo/paraphraser-german-mt5-small")
7
 
8
 
9
+ def generate_v1(input, count):
 
 
 
 
 
 
 
 
 
 
 
 
10
  """Generate text using a Beam Search strategy with repetition penalty."""
11
  model_outputs = model.generate(input["input_ids"],
12
  early_stopping=True,
 
15
  no_repeat_ngram_size=2,
16
  num_beams=10,
17
  num_return_sequences=5,
18
+ repetition_penalty=3.5,
19
+ num_return_sequences=count
20
  )
21
  sentences = []
22
  for output in model_outputs:
 
24
  return sentences
25
 
26
 
27
+ def generate_v2(input, count):
28
  """Generate text using a Beam Search strategy."""
29
  model_outputs = model.generate(input["input_ids"],
30
  early_stopping=True,
 
34
  num_beams=5,
35
  num_return_sequences=5,
36
  temperature=1.5,
37
+ num_return_sequences=count
38
  )
39
  sentences = []
40
  for output in model_outputs:
 
42
  return sentences
43
 
44
 
45
+ def generate_v3(input, count):
46
  """Generate text using a Diverse Beam Search strategy."""
47
  model_outputs = model.generate(input["input_ids"],
48
  num_beams=5,
 
53
  diversity_penalty=2.0,
54
  no_repeat_ngram_size=2,
55
  early_stopping=True,
56
+ length_penalty=2.0,
57
+ num_return_sequences=count)
58
  sentences = []
59
  for output in model_outputs:
60
  sentences.append(tokenizer.decode(output, skip_special_tokens=True))
61
  return sentences
62
 
63
 
64
+ def generate_v4(input, count):
65
  """Generate text using a Diverse Beam Search strategy."""
66
  input_ids, attention_masks = input["input_ids"], input["attention_mask"]
67
  outputs = model.generate(
 
71
  top_k=120,
72
  top_p=0.95,
73
  early_stopping=True,
74
+ num_return_sequences=count
75
  )
76
  res = []
77
  for output in outputs:
 
105
  # res.append(line)
106
  return {
107
  'result': {
108
+ 'generate_v1':generate_v1(encoding, count),
109
+ 'generate_v2':generate_v2(encoding, count),
110
+ 'generate_v3':generate_v3(encoding, count),
111
+ 'generate_v4':generate_v4(encoding, count),
112
  }
113
  }
114