Martian commited on
Commit
d772533
1 Parent(s): bd5226a

Add pipeline example

Browse files
Files changed (1) hide show
  1. README.md +45 -0
README.md CHANGED
@@ -12,3 +12,48 @@ widget:
12
 
13
  Title generator based on Neo-GPT 125M fine-tuned on a dataset of 39k url's title. All urls are selected on the TOP 10 google on a list of Keywords about "Electric car" - "Electric car for sale".
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  Title generator based on Neo-GPT 125M fine-tuned on a dataset of 39k url's title. All urls are selected on the TOP 10 google on a list of Keywords about "Electric car" - "Electric car for sale".
14
 
15
+ # Pipeline example
16
+
17
+ import pandas as pd
18
+ ```python
19
+ from transformers import AutoModelForMaskedLM
20
+ from transformers import GPT2Tokenizer, TrainingArguments, AutoModelForCausalLM, AutoConfig
21
+
22
+ model = AutoModelForCausalLM.from_pretrained('Martian/Neo-GPT-Title-Generation-Electric-Car')
23
+
24
+ tokenizer = GPT2Tokenizer.from_pretrained('Martian/Neo-GPT-Title-Generation-Electric-Car', bos_token='<|startoftext|>',
25
+ eos_token='<|endoftext|>', pad_token='<|pad|>')
26
+
27
+ prompt = "<|startoftext|> Electric car"
28
+
29
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
30
+
31
+ gen_tokens = model.generate(input_ids, do_sample=True, top_k=100, min_length = 30, max_length=150, top_p=0.90, num_return_sequences=20, skip_special_tokens=True)
32
+
33
+ list_title_gen = []
34
+
35
+ for i, sample_output in enumerate(gen_tokens):
36
+ title = tokenizer.decode(sample_output, skip_special_tokens=True)
37
+ list_title_gen.append(title)
38
+
39
+ for i in list_title_gen:
40
+ try:
41
+ list_title_gen[list_title_gen.index(i)] = i.split(' | ')[0]
42
+ except:
43
+ continue
44
+ try:
45
+ list_title_gen[list_title_gen.index(i)] = i.split(' - ')[0]
46
+ except:
47
+ continue
48
+ try:
49
+ list_title_gen[list_title_gen.index(i)] = i.split(' — ')[0]
50
+ except:
51
+ continue
52
+
53
+ list_title_gen = [sub.replace('�', ' ').replace('\r',' ').replace('\n',' ').replace('\t', ' ').replace('\xa0', '') for sub in list_title_gen]
54
+ list_title_gen = [sub if sub != '<|startoftext|> Electric car' else '' for sub in list_title_gen]
55
+
56
+ for i in list_title_gen:
57
+ print(i)
58
+
59
+ ```