krum-utsav commited on
Commit
66c5c22
1 Parent(s): f850893

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -6
README.md CHANGED
@@ -40,13 +40,23 @@ paraphraser.paraphrase("Hey, can yuo hepl me cancel my last order?", tone="witty
40
 
41
  OR use directly with transformers
42
 
43
- ```
44
  from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
45
 
46
-
47
  DEVICE = "cuda"
48
  EOC_FORMAT = "\n\n### END"
 
 
 
 
 
 
 
 
49
 
 
 
50
 
51
  class StoppingCriteriaSub(StoppingCriteria):
52
  """Helps in stopping the generation when a certain sequence of tokens is generated."""
@@ -63,7 +73,6 @@ stopping_criteria = StoppingCriteriaList(
63
  [StoppingCriteriaSub(stops=tokenizer(EOC_FORMAT)["input_ids"])]
64
  )
65
 
66
-
67
  def predict(input_text: str) -> str:
68
  tokenized = tokenizer(
69
  input_text,
@@ -86,9 +95,14 @@ def predict(input_text: str) -> str:
86
  stopping_criteria=self.stopping_criteria,
87
  )
88
 
89
- out_texts = [self.tokenizer.decode(o, skip_special_tokens=True) for o in out]
90
- for o in out_texts:
91
- print(o)
 
 
 
 
 
92
  ```
93
 
94
  ## Sample training data
 
40
 
41
  OR use directly with transformers
42
 
43
+ ```python
44
  from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
45
 
46
+ model_id = "llm-toys/RedPajama-INCITE-Base-3B-v1-paraphrase-tone"
47
  DEVICE = "cuda"
48
  EOC_FORMAT = "\n\n### END"
49
+ PARAPHRASE_PREDICT_FORMAT = (
50
+ "### Instruction:\nGenerate a paraphrase for the following Input sentence.\n\n"
51
+ "### Input:\n{input_text}\n\n### Response:\n"
52
+ )
53
+ TONE_CHANGE_PREDICT_FORMAT = (
54
+ "### Instruction:\nChange the tone of the following Input sentence to {tone}.\n\n"
55
+ "### Input:\n{input_text}\n\n### Response:\n"
56
+ )
57
 
58
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
59
+ model = AutoModel.from_pretrained(mode_id).to(DEVICE)
60
 
61
  class StoppingCriteriaSub(StoppingCriteria):
62
  """Helps in stopping the generation when a certain sequence of tokens is generated."""
 
73
  [StoppingCriteriaSub(stops=tokenizer(EOC_FORMAT)["input_ids"])]
74
  )
75
 
 
76
  def predict(input_text: str) -> str:
77
  tokenized = tokenizer(
78
  input_text,
 
95
  stopping_criteria=self.stopping_criteria,
96
  )
97
 
98
+ out_texts = [tokenizer.decode(o, skip_special_tokens=True) for o in out]
99
+ return out_texts
100
+
101
+ print("Paraphrasing:")
102
+ print(predict(PARAPHRASE_PREDICT_FORMAT.format(input_text="If you have any further questions, feel free to ask.")))
103
+
104
+ print("Tone change:")
105
+ print(predict(TONE_CHANGE_PREDICT_FORMAT.format(input_text="If you have any further questions, feel free to ask.", tone="professional")))
106
  ```
107
 
108
  ## Sample training data