File size: 1,479 Bytes
6cefe52 f2e835f 6cefe52 f2e835f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
---
license: apache-2.0
language:
- gu
---
<!-- This model card has been generated automatically according to the information Keras had access to. You should
probably proofread and complete it, then remove this comment. -->
# Model description
The model is a mt5-small version of Gujarati Grammarly for spell correction given a sentence. Only this small version checkpoints are open source.
# Example usage:
from transformers import AutoTokenizer
import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM
from transformers import create_optimizer
model_checkpoint = "Jayveersinh-Raj/guj-grammar-small"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
sent="સુંદરકાંડના પ્રારંભમાં હનૂમાન બળવાન તો છે પણ સાથે-સાથે બુદ્ધિમાન પણ છે તેની રોચક ધર્મકથા છૈ"
inputs = tokenizer.encode(sent, return_tensors='tf')
output_ids = model.generate(inputs, max_length=128, num_beams = 4, early_stopping=True)
output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Generated Correction:")
print(output)
# Notes:
- Only supports Gujarati language for now
- Private dataset is used
- Only Tensorflow model is available for now, Pytorch checkpoints would be available soon. |