PardisSzah
/

Mohavere_PersianTextFormalizer-inference-pipeline

Text2Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

PardisSzah commited on Mar 1

Commit

c96e0a1

•

1 Parent(s): 360b18c

commit files to HF hub

Files changed (2) hide show

PersianTextFormalizerPipeline.py +19 -0
config.json +9 -0

PersianTextFormalizerPipeline.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from transformers import Pipeline, T5ForConditionalGeneration, AutoTokenizer
+class PersianTextFormalizerPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "second_text" in kwargs:
+            preprocess_kwargs["second_text"] = kwargs["second_text"]
+        return preprocess_kwargs, {}, {}
+    def preprocess(self, text, second_text=None):
+        inputs = self.tokenizer.encode("informal: " + text, return_tensors='pt', max_length=128, truncation=True, padding='max_length')
+        return inputs.to(self.device)
+    def _forward(self, model_inputs):
+        return self.model.generate(model_inputs, max_length=128, num_beams=4, temperature=0.7)
+    def postprocess(self, model_outputs):
+        return self.tokenizer.decode(model_outputs[0], skip_special_tokens=True)

config.json CHANGED Viewed

@@ -4,6 +4,15 @@
     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
   "d_ff": 2048,
   "d_kv": 64,
   "d_model": 768,

     "T5ForConditionalGeneration"
   ],
   "classifier_dropout": 0.0,
+  "custom_pipelines": {
+    "text2text-PersianTextFormalizer_M": {
+      "impl": "PersianTextFormalizerPipeline.PersianTextFormalizerPipeline",
+      "pt": [
+        "T5ForConditionalGeneration"
+      ],
+      "tf": []
+    }
+  },
   "d_ff": 2048,
   "d_kv": 64,
   "d_model": 768,