Commit
•
66c5c22
1
Parent(s):
f850893
Update README.md
Browse files
README.md
CHANGED
@@ -40,13 +40,23 @@ paraphraser.paraphrase("Hey, can yuo hepl me cancel my last order?", tone="witty
|
|
40 |
|
41 |
OR use directly with transformers
|
42 |
|
43 |
-
```
|
44 |
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
|
45 |
|
46 |
-
|
47 |
DEVICE = "cuda"
|
48 |
EOC_FORMAT = "\n\n### END"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
50 |
|
51 |
class StoppingCriteriaSub(StoppingCriteria):
|
52 |
"""Helps in stopping the generation when a certain sequence of tokens is generated."""
|
@@ -63,7 +73,6 @@ stopping_criteria = StoppingCriteriaList(
|
|
63 |
[StoppingCriteriaSub(stops=tokenizer(EOC_FORMAT)["input_ids"])]
|
64 |
)
|
65 |
|
66 |
-
|
67 |
def predict(input_text: str) -> str:
|
68 |
tokenized = tokenizer(
|
69 |
input_text,
|
@@ -86,9 +95,14 @@ def predict(input_text: str) -> str:
|
|
86 |
stopping_criteria=self.stopping_criteria,
|
87 |
)
|
88 |
|
89 |
-
out_texts = [
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
92 |
```
|
93 |
|
94 |
## Sample training data
|
|
|
40 |
|
41 |
OR use directly with transformers
|
42 |
|
43 |
+
```python
|
44 |
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
|
45 |
|
46 |
+
model_id = "llm-toys/RedPajama-INCITE-Base-3B-v1-paraphrase-tone"
|
47 |
DEVICE = "cuda"
|
48 |
EOC_FORMAT = "\n\n### END"
|
49 |
+
PARAPHRASE_PREDICT_FORMAT = (
|
50 |
+
"### Instruction:\nGenerate a paraphrase for the following Input sentence.\n\n"
|
51 |
+
"### Input:\n{input_text}\n\n### Response:\n"
|
52 |
+
)
|
53 |
+
TONE_CHANGE_PREDICT_FORMAT = (
|
54 |
+
"### Instruction:\nChange the tone of the following Input sentence to {tone}.\n\n"
|
55 |
+
"### Input:\n{input_text}\n\n### Response:\n"
|
56 |
+
)
|
57 |
|
58 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
59 |
+
model = AutoModel.from_pretrained(mode_id).to(DEVICE)
|
60 |
|
61 |
class StoppingCriteriaSub(StoppingCriteria):
|
62 |
"""Helps in stopping the generation when a certain sequence of tokens is generated."""
|
|
|
73 |
[StoppingCriteriaSub(stops=tokenizer(EOC_FORMAT)["input_ids"])]
|
74 |
)
|
75 |
|
|
|
76 |
def predict(input_text: str) -> str:
|
77 |
tokenized = tokenizer(
|
78 |
input_text,
|
|
|
95 |
stopping_criteria=self.stopping_criteria,
|
96 |
)
|
97 |
|
98 |
+
out_texts = [tokenizer.decode(o, skip_special_tokens=True) for o in out]
|
99 |
+
return out_texts
|
100 |
+
|
101 |
+
print("Paraphrasing:")
|
102 |
+
print(predict(PARAPHRASE_PREDICT_FORMAT.format(input_text="If you have any further questions, feel free to ask.")))
|
103 |
+
|
104 |
+
print("Tone change:")
|
105 |
+
print(predict(TONE_CHANGE_PREDICT_FORMAT.format(input_text="If you have any further questions, feel free to ask.", tone="professional")))
|
106 |
```
|
107 |
|
108 |
## Sample training data
|