Spaces:
Build error
Build error
Adjusting some sliders and generation config
Browse files
main.py
CHANGED
@@ -61,6 +61,8 @@ def generate_text(
|
|
61 |
|
62 |
maybe_load_models()
|
63 |
|
|
|
|
|
64 |
if model_name and model_name != "None":
|
65 |
model = PeftModel.from_pretrained(
|
66 |
model, model_name,
|
@@ -69,7 +71,15 @@ def generate_text(
|
|
69 |
|
70 |
inputs = tokenizer(text, return_tensors="pt")
|
71 |
input_ids = inputs["input_ids"].to(model.device)
|
|
|
|
|
|
|
|
|
|
|
72 |
generation_config = GenerationConfig(
|
|
|
|
|
|
|
73 |
# Controls the 'temperature' of the softmax distribution during sampling.
|
74 |
# Higher values (e.g., 1.0) make the model generate more diverse and random outputs,
|
75 |
# while lower values (e.g., 0.1) make it more deterministic and
|
@@ -98,25 +108,28 @@ def generate_text(
|
|
98 |
# This can be useful to control the length of generated text, especially in tasks
|
99 |
# like text summarization or translation, where the output should not be excessively long.
|
100 |
max_new_tokens=max_new_tokens,
|
|
|
|
|
|
|
|
|
|
|
101 |
)
|
102 |
|
103 |
|
|
|
104 |
with torch.no_grad():
|
105 |
generation_output = model.generate(
|
106 |
input_ids=input_ids,
|
107 |
attention_mask=torch.ones_like(input_ids),
|
108 |
generation_config=generation_config,
|
109 |
-
return_dict_in_generate=True,
|
110 |
-
output_scores=True,
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
for token_id in generation_output[0]:
|
115 |
-
new = tokenizer.decode(token_id, skip_special_tokens=True)
|
116 |
-
output.append(new)
|
117 |
-
print(new, end=" ", flush=True)
|
118 |
|
119 |
-
|
|
|
120 |
|
121 |
def tokenize_and_train(
|
122 |
training_text,
|
@@ -364,25 +377,25 @@ with gr.Blocks(css="#refresh-button { max-width: 32px }") as demo:
|
|
364 |
with gr.Column():
|
365 |
# temperature, top_p, top_k, repeat_penalty, max_new_tokens
|
366 |
temperature = gr.Slider(
|
367 |
-
minimum=0, maximum=
|
368 |
label="Temperature",
|
369 |
info=""
|
370 |
)
|
371 |
|
372 |
top_p = gr.Slider(
|
373 |
-
minimum=0, maximum=1, value=0.2, step=0.
|
374 |
label="Top P",
|
375 |
info=""
|
376 |
)
|
377 |
|
378 |
top_k = gr.Slider(
|
379 |
-
minimum=0, maximum=
|
380 |
label="Top K",
|
381 |
info=""
|
382 |
)
|
383 |
|
384 |
repeat_penalty = gr.Slider(
|
385 |
-
minimum=0, maximum=1, value=0.8, step=0.
|
386 |
label="Repeat Penalty",
|
387 |
info=""
|
388 |
)
|
|
|
61 |
|
62 |
maybe_load_models()
|
63 |
|
64 |
+
tokenizer.pad_token_id = 0
|
65 |
+
|
66 |
if model_name and model_name != "None":
|
67 |
model = PeftModel.from_pretrained(
|
68 |
model, model_name,
|
|
|
71 |
|
72 |
inputs = tokenizer(text, return_tensors="pt")
|
73 |
input_ids = inputs["input_ids"].to(model.device)
|
74 |
+
|
75 |
+
# llama_config = transformers.LlamaConfig()
|
76 |
+
# print(llama_config)
|
77 |
+
|
78 |
+
stopping_criteria_list = transformers.StoppingCriteriaList()
|
79 |
generation_config = GenerationConfig(
|
80 |
+
# Whether to use greedy decoding. If set to False,
|
81 |
+
do_sample=True,
|
82 |
+
|
83 |
# Controls the 'temperature' of the softmax distribution during sampling.
|
84 |
# Higher values (e.g., 1.0) make the model generate more diverse and random outputs,
|
85 |
# while lower values (e.g., 0.1) make it more deterministic and
|
|
|
108 |
# This can be useful to control the length of generated text, especially in tasks
|
109 |
# like text summarization or translation, where the output should not be excessively long.
|
110 |
max_new_tokens=max_new_tokens,
|
111 |
+
|
112 |
+
# typical_p=1,
|
113 |
+
# stopping_criteria=stopping_criteria_list,
|
114 |
+
# eos_token_id=llama_config.eos_token_id,
|
115 |
+
# pad_token_id=llama_config.eos_token_id
|
116 |
)
|
117 |
|
118 |
|
119 |
+
|
120 |
with torch.no_grad():
|
121 |
generation_output = model.generate(
|
122 |
input_ids=input_ids,
|
123 |
attention_mask=torch.ones_like(input_ids),
|
124 |
generation_config=generation_config,
|
125 |
+
# return_dict_in_generate=True,
|
126 |
+
# output_scores=True,
|
127 |
+
# eos_token_id=[tokenizer.eos_token_id],
|
128 |
+
use_cache=True,
|
129 |
+
)[0].cuda()
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
output_text = tokenizer.decode(generation_output)
|
132 |
+
return output_text.strip()
|
133 |
|
134 |
def tokenize_and_train(
|
135 |
training_text,
|
|
|
377 |
with gr.Column():
|
378 |
# temperature, top_p, top_k, repeat_penalty, max_new_tokens
|
379 |
temperature = gr.Slider(
|
380 |
+
minimum=0, maximum=1.99, value=0.7, step=0.01,
|
381 |
label="Temperature",
|
382 |
info=""
|
383 |
)
|
384 |
|
385 |
top_p = gr.Slider(
|
386 |
+
minimum=0, maximum=1, value=0.2, step=0.01,
|
387 |
label="Top P",
|
388 |
info=""
|
389 |
)
|
390 |
|
391 |
top_k = gr.Slider(
|
392 |
+
minimum=0, maximum=200, value=50, step=1,
|
393 |
label="Top K",
|
394 |
info=""
|
395 |
)
|
396 |
|
397 |
repeat_penalty = gr.Slider(
|
398 |
+
minimum=0, maximum=1.5, value=0.8, step=0.01,
|
399 |
label="Repeat Penalty",
|
400 |
info=""
|
401 |
)
|