littleworth commited on
Commit
aa08ba0
1 Parent(s): b7edfc6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -11
README.md CHANGED
@@ -47,16 +47,13 @@ model_name = "littleworth/protgpt2-distilled-tiny"
47
  tokenizer = GPT2Tokenizer.from_pretrained(model_name)
48
  model = GPT2LMHeadModel.from_pretrained(model_name)
49
 
50
- # Ensure tokenizer is padding from the left
51
- tokenizer.padding_side = "left"
52
-
53
  # Initialize the pipeline
54
  text_generator = TextGenerationPipeline(
55
  model=model, tokenizer=tokenizer, device=0
56
  ) # specify device if needed
57
 
58
  # Generate sequences
59
- sequences = text_generator(
60
  "<|endoftext|>",
61
  max_length=100,
62
  do_sample=True,
@@ -68,15 +65,20 @@ sequences = text_generator(
68
  truncation=True,
69
  )
70
 
71
- for i, seq in enumerate(sequences):
72
- seq["generated_text"] = seq["generated_text"].replace("<|endoftext|>", "")
73
-
 
74
  # Remove newline characters and non-alphabetical characters
75
- seq["generated_text"] = "".join(
76
- char for char in seq["generated_text"] if char.isalpha()
77
- )
 
 
 
 
78
  print(f">Seq_{i}")
79
- print(seq["generated_text"])
80
  ```
81
 
82
  ### Use Cases
 
47
  tokenizer = GPT2Tokenizer.from_pretrained(model_name)
48
  model = GPT2LMHeadModel.from_pretrained(model_name)
49
 
 
 
 
50
  # Initialize the pipeline
51
  text_generator = TextGenerationPipeline(
52
  model=model, tokenizer=tokenizer, device=0
53
  ) # specify device if needed
54
 
55
  # Generate sequences
56
+ generated_sequences = text_generator(
57
  "<|endoftext|>",
58
  max_length=100,
59
  do_sample=True,
 
65
  truncation=True,
66
  )
67
 
68
+ def clean_sequence(text):
69
+ # Remove the "<|endoftext|>" token
70
+ text = text.replace("<|endoftext|>", "")
71
+
72
  # Remove newline characters and non-alphabetical characters
73
+ text = "".join(char for char in text if char.isalpha())
74
+
75
+ return text
76
+
77
+ # Print the generated sequences
78
+ for i, seq in enumerate(generated_sequences):
79
+ cleaned_text = clean_sequence(seq["generated_text"])
80
  print(f">Seq_{i}")
81
+ print(cleaned_text)
82
  ```
83
 
84
  ### Use Cases