lxe commited on
Commit
bf9d89b
·
1 Parent(s): b7ef03e

Adjusting some sliders and generation config

Browse files
Files changed (1) hide show
  1. main.py +27 -14
main.py CHANGED
@@ -61,6 +61,8 @@ def generate_text(
61
 
62
  maybe_load_models()
63
 
 
 
64
  if model_name and model_name != "None":
65
  model = PeftModel.from_pretrained(
66
  model, model_name,
@@ -69,7 +71,15 @@ def generate_text(
69
 
70
  inputs = tokenizer(text, return_tensors="pt")
71
  input_ids = inputs["input_ids"].to(model.device)
 
 
 
 
 
72
  generation_config = GenerationConfig(
 
 
 
73
  # Controls the 'temperature' of the softmax distribution during sampling.
74
  # Higher values (e.g., 1.0) make the model generate more diverse and random outputs,
75
  # while lower values (e.g., 0.1) make it more deterministic and
@@ -98,25 +108,28 @@ def generate_text(
98
  # This can be useful to control the length of generated text, especially in tasks
99
  # like text summarization or translation, where the output should not be excessively long.
100
  max_new_tokens=max_new_tokens,
 
 
 
 
 
101
  )
102
 
103
 
 
104
  with torch.no_grad():
105
  generation_output = model.generate(
106
  input_ids=input_ids,
107
  attention_mask=torch.ones_like(input_ids),
108
  generation_config=generation_config,
109
- return_dict_in_generate=True,
110
- output_scores=True,
111
- )
112
-
113
- output = []
114
- for token_id in generation_output[0]:
115
- new = tokenizer.decode(token_id, skip_special_tokens=True)
116
- output.append(new)
117
- print(new, end=" ", flush=True)
118
 
119
- return ''.join(output).strip()
 
120
 
121
  def tokenize_and_train(
122
  training_text,
@@ -364,25 +377,25 @@ with gr.Blocks(css="#refresh-button { max-width: 32px }") as demo:
364
  with gr.Column():
365
  # temperature, top_p, top_k, repeat_penalty, max_new_tokens
366
  temperature = gr.Slider(
367
- minimum=0, maximum=2, value=0.7, step=0.1,
368
  label="Temperature",
369
  info=""
370
  )
371
 
372
  top_p = gr.Slider(
373
- minimum=0, maximum=1, value=0.2, step=0.1,
374
  label="Top P",
375
  info=""
376
  )
377
 
378
  top_k = gr.Slider(
379
- minimum=0, maximum=100, value=50, step=1,
380
  label="Top K",
381
  info=""
382
  )
383
 
384
  repeat_penalty = gr.Slider(
385
- minimum=0, maximum=1, value=0.8, step=0.1,
386
  label="Repeat Penalty",
387
  info=""
388
  )
 
61
 
62
  maybe_load_models()
63
 
64
+ tokenizer.pad_token_id = 0
65
+
66
  if model_name and model_name != "None":
67
  model = PeftModel.from_pretrained(
68
  model, model_name,
 
71
 
72
  inputs = tokenizer(text, return_tensors="pt")
73
  input_ids = inputs["input_ids"].to(model.device)
74
+
75
+ # llama_config = transformers.LlamaConfig()
76
+ # print(llama_config)
77
+
78
+ stopping_criteria_list = transformers.StoppingCriteriaList()
79
  generation_config = GenerationConfig(
80
+ # Whether to use greedy decoding. If set to False,
81
+ do_sample=True,
82
+
83
  # Controls the 'temperature' of the softmax distribution during sampling.
84
  # Higher values (e.g., 1.0) make the model generate more diverse and random outputs,
85
  # while lower values (e.g., 0.1) make it more deterministic and
 
108
  # This can be useful to control the length of generated text, especially in tasks
109
  # like text summarization or translation, where the output should not be excessively long.
110
  max_new_tokens=max_new_tokens,
111
+
112
+ # typical_p=1,
113
+ # stopping_criteria=stopping_criteria_list,
114
+ # eos_token_id=llama_config.eos_token_id,
115
+ # pad_token_id=llama_config.eos_token_id
116
  )
117
 
118
 
119
+
120
  with torch.no_grad():
121
  generation_output = model.generate(
122
  input_ids=input_ids,
123
  attention_mask=torch.ones_like(input_ids),
124
  generation_config=generation_config,
125
+ # return_dict_in_generate=True,
126
+ # output_scores=True,
127
+ # eos_token_id=[tokenizer.eos_token_id],
128
+ use_cache=True,
129
+ )[0].cuda()
 
 
 
 
130
 
131
+ output_text = tokenizer.decode(generation_output)
132
+ return output_text.strip()
133
 
134
  def tokenize_and_train(
135
  training_text,
 
377
  with gr.Column():
378
  # temperature, top_p, top_k, repeat_penalty, max_new_tokens
379
  temperature = gr.Slider(
380
+ minimum=0, maximum=1.99, value=0.7, step=0.01,
381
  label="Temperature",
382
  info=""
383
  )
384
 
385
  top_p = gr.Slider(
386
+ minimum=0, maximum=1, value=0.2, step=0.01,
387
  label="Top P",
388
  info=""
389
  )
390
 
391
  top_k = gr.Slider(
392
+ minimum=0, maximum=200, value=50, step=1,
393
  label="Top K",
394
  info=""
395
  )
396
 
397
  repeat_penalty = gr.Slider(
398
+ minimum=0, maximum=1.5, value=0.8, step=0.01,
399
  label="Repeat Penalty",
400
  info=""
401
  )