Norod78 commited on
Commit
2010d1b
1 Parent(s): 2a6db41
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ title = "מחולל ציטוטים פיקטיביים של שאול אמסטרדמסקי"
5
+ description = "<p align=\"right\">✨ נונסנס כיס</p>"
6
+ article = "<p align=\"right\">אימן: <a href=\"https://linktr.ee/Norod78\">דורון אדלר</a> באמצעות: <a href=\"https://story.kan.org.il/robo_shaul\">מאגר המידע של רובושאול</a></p>"
7
+ model_id = "./model"
8
+ text_generator = pipeline('text-generation', model=model_id, tokenizer=model_id)
9
+ max_length = 128
10
+ top_k = 40
11
+ top_p = 0.92
12
+ temperature = 0.98
13
+
14
+ def text_generation(input_text = None):
15
+ if input_text == None or len(input_text) == 0:
16
+ input_text = ". "
17
+ else:
18
+ input_text ="<|startoftext|>" + input_text
19
+ generated_text = text_generator(input_text,
20
+ max_length=max_length,
21
+ top_k=top_k,
22
+ top_p=top_p,
23
+ temperature=temperature,
24
+ do_sample=True,
25
+ repetition_penalty=2.0,
26
+ num_return_sequences=1)
27
+ parsed_text = generated_text[0]["generated_text"].replace("<|startoftext|>", "").replace("\r","").replace("\n\n", "\n").replace("\t", " ").replace("<|pad|>", " * ").replace("\"\"", "\"")
28
+ return parsed_text[2:]
29
+ gr.Interface(
30
+ text_generation,
31
+ inputs=None,
32
+ outputs=gr.Textbox(lines=3, type="text", label="פה מופיע הטקסט שהמחולל יוצר, לאחר הלחיצה על הכפתור", elem_id="output_text"),
33
+ css="#output_text{direction: rtl} #input_text{direction: rtl}",
34
+ title=title,
35
+ description=description,
36
+ article=article,
37
+ theme="default",
38
+ allow_flagging="never",
39
+ ).launch()
model/added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 50258,
3
+ "<|pad|>": 50259,
4
+ "<|startoftext|>": 50257
5
+ }
model/config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Norod78/distilgpt2-base-pretrained-he",
3
+ "_num_labels": 1,
4
+ "activation_function": "gelu_new",
5
+ "architectures": [
6
+ "GPT2LMHeadModel"
7
+ ],
8
+ "attn_pdrop": 0.1,
9
+ "bos_token_id": 50256,
10
+ "embd_pdrop": 0.1,
11
+ "eos_token_id": 50256,
12
+ "gradient_checkpointing": false,
13
+ "id2label": {
14
+ "0": "LABEL_0"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "label2id": {
18
+ "LABEL_0": 0
19
+ },
20
+ "layer_norm_epsilon": 1e-05,
21
+ "model_type": "gpt2",
22
+ "n_ctx": 1024,
23
+ "n_embd": 768,
24
+ "n_head": 12,
25
+ "n_inner": null,
26
+ "n_layer": 6,
27
+ "n_positions": 1024,
28
+ "pad_token_id": 50257,
29
+ "reorder_and_upcast_attn": false,
30
+ "resid_pdrop": 0.1,
31
+ "scale_attn_by_inverse_layer_idx": false,
32
+ "scale_attn_weights": true,
33
+ "summary_activation": null,
34
+ "summary_first_dropout": 0.1,
35
+ "summary_proj_to_labels": true,
36
+ "summary_type": "cls_index",
37
+ "summary_use_proj": true,
38
+ "task_specific_params": {
39
+ "text-generation": {
40
+ "do_sample": true,
41
+ "max_length": 50
42
+ }
43
+ },
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.26.1",
46
+ "use_cache": true,
47
+ "vocab_size": 50260
48
+ }
model/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "pad_token_id": 50257,
6
+ "transformers_version": "4.26.1"
7
+ }
model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6994817063266d5f7d467ed84132e39b60714b72e8770487f177eba57b5ce21
3
+ size 333979385
model/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|startoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<|pad|>",
6
+ "unk_token": "<unk>"
7
+ }
model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
model/tokenizer_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|startoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "name_or_path": "Norod78/distilgpt2-base-pretrained-he",
7
+ "pad_token": "<|pad|>",
8
+ "special_tokens_map_file": "./distilgpt2-base-pretrained-he/special_tokens_map.json",
9
+ "tokenizer_class": "GPT2Tokenizer",
10
+ "unk_token": "<|endoftext|>",
11
+ "unknown_token": "<|unknown|>"
12
+ }
model/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ tokenizers
5
+