Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Notebook/01_romaneng2nep-translation.ipynb +1 -0
- Notebook/02_RomanEng2Nep_Transliteration_v2.ipynb +0 -0
- Notebook/dataset-conversion.ipynb +1 -0
- README.md +16 -8
- app.py +35 -0
- requirements.txt +5 -0
Notebook/01_romaneng2nep-translation.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":30762,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install transformers datasets evaluate sacrebleu","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-09-22T16:50:45.081814Z","iopub.execute_input":"2024-09-22T16:50:45.082178Z","iopub.status.idle":"2024-09-22T16:51:00.847186Z","shell.execute_reply.started":"2024-09-22T16:50:45.082142Z","shell.execute_reply":"2024-09-22T16:51:00.846189Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from huggingface_hub import notebook_login\n\nnotebook_login()","metadata":{"execution":{"iopub.status.busy":"2024-09-22T16:53:01.731576Z","iopub.execute_input":"2024-09-22T16:53:01.732612Z","iopub.status.idle":"2024-09-22T16:53:02.070092Z","shell.execute_reply.started":"2024-09-22T16:53:01.732561Z","shell.execute_reply":"2024-09-22T16:53:02.068675Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from datasets import load_dataset\n\ndata = load_dataset(\"syubraj/roman2nepali-transliteration\")","metadata":{"execution":{"iopub.status.busy":"2024-09-22T16:53:19.592468Z","iopub.execute_input":"2024-09-22T16:53:19.592905Z","iopub.status.idle":"2024-09-22T16:53:27.147254Z","shell.execute_reply.started":"2024-09-22T16:53:19.592866Z","shell.execute_reply":"2024-09-22T16:53:27.146252Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"data = data['train'].train_test_split(test_size=0.02)","metadata":{"execution":{"iopub.status.busy":"2024-09-22T16:53:27.159658Z","iopub.execute_input":"2024-09-22T16:53:27.159978Z","iopub.status.idle":"2024-09-22T16:53:28.254948Z","shell.execute_reply.started":"2024-09-22T16:53:27.159945Z","shell.execute_reply":"2024-09-22T16:53:28.254138Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import AutoTokenizer\n\ncheckpoint = \"google-t5/t5-small\"\ntokenizer_checkpoint = \"FacebookAI/xlm-roberta-base\"\ntokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)","metadata":{"execution":{"iopub.status.busy":"2024-09-22T16:53:28.257447Z","iopub.execute_input":"2024-09-22T16:53:28.257770Z","iopub.status.idle":"2024-09-22T16:53:33.442359Z","shell.execute_reply.started":"2024-09-22T16:53:28.257734Z","shell.execute_reply":"2024-09-22T16:53:33.441518Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"source_lang = 'roman'\ntarget_lang = 'nepali'\nprefix = \"translate Roman to Nepali: \"\n\n\ndef preprocess_function(examples):\n inputs = [prefix + example[source_lang] for example in examples[\"translation\"]]\n targets = [example[target_lang] for example in examples[\"translation\"]]\n model_inputs = tokenizer(inputs, text_target=targets, max_length=30, truncation=True)\n return model_inputs","metadata":{"execution":{"iopub.status.busy":"2024-09-22T16:53:33.443485Z","iopub.execute_input":"2024-09-22T16:53:33.443837Z","iopub.status.idle":"2024-09-22T16:53:33.449493Z","shell.execute_reply.started":"2024-09-22T16:53:33.443801Z","shell.execute_reply":"2024-09-22T16:53:33.448556Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tokenized_data = data.map(preprocess_function, batched=True)\nprint(\"Data mapping done\")","metadata":{"execution":{"iopub.status.busy":"2024-09-22T16:56:35.660144Z","iopub.execute_input":"2024-09-22T16:56:35.660919Z","iopub.status.idle":"2024-09-22T17:00:01.565246Z","shell.execute_reply.started":"2024-09-22T16:56:35.660877Z","shell.execute_reply":"2024-09-22T17:00:01.564308Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import DataCollatorForSeq2Seq\n\ndata_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:00:06.956005Z","iopub.execute_input":"2024-09-22T17:00:06.956402Z","iopub.status.idle":"2024-09-22T17:00:19.227805Z","shell.execute_reply.started":"2024-09-22T17:00:06.956363Z","shell.execute_reply":"2024-09-22T17:00:19.226974Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import evaluate\n\nmetric = evaluate.load(\"sacrebleu\")","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:00:26.591170Z","iopub.execute_input":"2024-09-22T17:00:26.591860Z","iopub.status.idle":"2024-09-22T17:00:29.284578Z","shell.execute_reply.started":"2024-09-22T17:00:26.591822Z","shell.execute_reply":"2024-09-22T17:00:29.283684Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np\n\n\ndef postprocess_text(preds, labels):\n preds = [pred.strip() for pred in preds]\n labels = [[label.strip()] for label in labels]\n\n return preds, labels\n\n\ndef compute_metrics(eval_preds):\n preds, labels = eval_preds\n if isinstance(preds, tuple):\n preds = preds[0]\n decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)\n\n labels = np.where(labels != -100, labels, tokenizer.pad_token_id)\n decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n\n decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)\n\n result = metric.compute(predictions=decoded_preds, references=decoded_labels)\n result = {\"bleu\": result[\"score\"]}\n\n prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]\n result[\"gen_len\"] = np.mean(prediction_lens)\n result = {k: round(v, 4) for k, v in result.items()}\n return result","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:00:38.702200Z","iopub.execute_input":"2024-09-22T17:00:38.702924Z","iopub.status.idle":"2024-09-22T17:00:38.712287Z","shell.execute_reply.started":"2024-09-22T17:00:38.702882Z","shell.execute_reply":"2024-09-22T17:00:38.711336Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n\nmodel = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:00:40.951814Z","iopub.execute_input":"2024-09-22T17:00:40.952201Z","iopub.status.idle":"2024-09-22T17:00:46.435083Z","shell.execute_reply.started":"2024-09-22T17:00:40.952163Z","shell.execute_reply":"2024-09-22T17:00:46.434107Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import wandb\n\ntry:\n from kaggle_secrets import UserSecretsClient\n user_secrets = UserSecretsClient()\n api_key = user_secrets.get_secret(\"wandb_api\")\n wandb.login(key=api_key)\n anony = None\nexcept:\n anony = \"must\"\n print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \\nGet your W&B access token from here: https://wandb.ai/authorize')","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:00:46.437207Z","iopub.execute_input":"2024-09-22T17:00:46.438049Z","iopub.status.idle":"2024-09-22T17:00:49.392150Z","shell.execute_reply.started":"2024-09-22T17:00:46.437981Z","shell.execute_reply":"2024-09-22T17:00:49.391318Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"training_args = Seq2SeqTrainingArguments(\n output_dir=\"/kaggle/working/romaneng2nep/\",\n eval_strategy=\"epoch\",\n learning_rate=2e-5,\n per_device_train_batch_size=16,\n per_device_eval_batch_size=16,\n weight_decay=0.01,\n lr_scheduler = linear,\n save_total_limit=3,\n num_train_epochs=1,\n predict_with_generate=True,\n fp16=True,\n report_to = 'wandb'\n push_to_hub = True,\n)\n\ntrainer = Seq2SeqTrainer(\n model=model,\n args=training_args,\n train_dataset=tokenized_data[\"train\"],\n eval_dataset=tokenized_data[\"test\"],\n tokenizer=tokenizer,\n data_collator=data_collator,\n compute_metrics=compute_metrics,\n)\n","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:01:55.818916Z","iopub.execute_input":"2024-09-22T17:01:55.819976Z","iopub.status.idle":"2024-09-22T17:01:55.981315Z","shell.execute_reply.started":"2024-09-22T17:01:55.819917Z","shell.execute_reply":"2024-09-22T17:01:55.980163Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"trainer.train()","metadata":{"execution":{"iopub.status.busy":"2024-09-22T17:01:57.802474Z","iopub.execute_input":"2024-09-22T17:01:57.803390Z","iopub.status.idle":"2024-09-22T17:03:13.942728Z","shell.execute_reply.started":"2024-09-22T17:01:57.803348Z","shell.execute_reply":"2024-09-22T17:03:13.940096Z"},"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"trainer.push_to_hub('syubraj/romanized_english_2_nepali')","metadata":{},"execution_count":null,"outputs":[]}]}
|
Notebook/02_RomanEng2Nep_Transliteration_v2.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Notebook/dataset-conversion.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2024-09-22T15:43:44.884747Z","iopub.status.busy":"2024-09-22T15:43:44.884016Z","iopub.status.idle":"2024-09-22T15:43:53.003699Z","shell.execute_reply":"2024-09-22T15:43:53.002880Z","shell.execute_reply.started":"2024-09-22T15:43:44.884711Z"},"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"10615553125b47bbb283c6d15b9d8ac3","version_major":2,"version_minor":0},"text/plain":["Downloading readme: 0%| | 0.00/624 [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6ee918c192bf476fb66c29742418b4ca","version_major":2,"version_minor":0},"text/plain":["Downloading data: 0%| | 0.00/86.1M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7d7dcb663d2747d5bd509b4931809910","version_major":2,"version_minor":0},"text/plain":["Downloading data: 0%| | 0.00/94.2k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"1a364f3f533f4edfa042201e1825d207","version_major":2,"version_minor":0},"text/plain":["Generating train split: 0%| | 0/2397414 [00:00<?, ? examples/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"76d84909e197482ea32cdd4a4e035ee3","version_major":2,"version_minor":0},"text/plain":["Generating validation split: 0%| | 0/2804 [00:00<?, ? examples/s]"]},"metadata":{},"output_type":"display_data"}],"source":["from datasets import load_dataset\n","\n","ds = load_dataset(\"Saugatkafley/Nepali-Roman-Transliteration\")"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-09-22T15:48:28.037465Z","iopub.status.busy":"2024-09-22T15:48:28.036523Z","iopub.status.idle":"2024-09-22T15:48:28.042501Z","shell.execute_reply":"2024-09-22T15:48:28.041586Z","shell.execute_reply.started":"2024-09-22T15:48:28.037419Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["DatasetDict({\n"," train: Dataset({\n"," features: ['unique_identifier', 'native word', 'english word'],\n"," num_rows: 2397414\n"," })\n"," validation: Dataset({\n"," features: ['unique_identifier', 'native word', 'english word'],\n"," num_rows: 2804\n"," })\n","})\n"]}],"source":["print(ds)"]},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2024-09-22T15:51:52.459691Z","iopub.status.busy":"2024-09-22T15:51:52.458718Z","iopub.status.idle":"2024-09-22T15:51:52.495039Z","shell.execute_reply":"2024-09-22T15:51:52.493900Z","shell.execute_reply.started":"2024-09-22T15:51:52.459633Z"},"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d15ba4abe5c342d5afe42cc8959365bb","version_major":2,"version_minor":0},"text/plain":["VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"]},"metadata":{},"output_type":"display_data"}],"source":["# !pip install huggingface\n","\n","from huggingface_hub import notebook_login\n","\n","notebook_login()"]},{"cell_type":"code","execution_count":15,"metadata":{"execution":{"iopub.execute_input":"2024-09-22T16:06:46.498144Z","iopub.status.busy":"2024-09-22T16:06:46.497197Z","iopub.status.idle":"2024-09-22T16:09:05.290568Z","shell.execute_reply":"2024-09-22T16:09:05.288867Z","shell.execute_reply.started":"2024-09-22T16:06:46.498097Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["100%|██████████| 2397414/2397414 [02:11<00:00, 18234.37it/s]\n","100%|██████████| 2804/2804 [00:00<00:00, 19079.34it/s]\n"]}],"source":["from datasets import DatasetDict, Dataset\n","from tqdm import tqdm\n","\n","\n","def transform_dataset(dataset):\n"," # Create a list to hold our transformed data\n"," transformed_data = []\n"," \n"," for example in tqdm(dataset):\n","# # Generate a random 5-digit ID (you may want to use a more robust method)\n","# random_id = str(random.randint(10000, 99999))\n"," \n"," transformed_example = {\n"," 'id': example['unique_identifier'],\n"," 'translation': {\n"," 'roman': example['english word'],\n"," 'nepali': example['native word'] \n"," }\n"," }\n"," transformed_data.append(transformed_example)\n"," \n"," # Create a new dataset from our transformed data\n"," return Dataset.from_list(transformed_data)\n","\n","transformed_train = transform_dataset(ds['train'])\n","transformed_validation = transform_dataset(ds['validation'])"]},{"cell_type":"code","execution_count":16,"metadata":{"execution":{"iopub.execute_input":"2024-09-22T16:09:19.328280Z","iopub.status.busy":"2024-09-22T16:09:19.327739Z","iopub.status.idle":"2024-09-22T16:09:19.350833Z","shell.execute_reply":"2024-09-22T16:09:19.349569Z","shell.execute_reply.started":"2024-09-22T16:09:19.328241Z"},"trusted":true},"outputs":[],"source":["transformed_dataset = DatasetDict({\n"," 'train': transformed_train,\n"," 'validation': transformed_validation\n","})"]},{"cell_type":"code","execution_count":17,"metadata":{"execution":{"iopub.execute_input":"2024-09-22T16:09:23.069359Z","iopub.status.busy":"2024-09-22T16:09:23.068241Z","iopub.status.idle":"2024-09-22T16:09:23.074713Z","shell.execute_reply":"2024-09-22T16:09:23.073749Z","shell.execute_reply.started":"2024-09-22T16:09:23.069316Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["DatasetDict({\n"," train: Dataset({\n"," features: ['id', 'translation'],\n"," num_rows: 2397414\n"," })\n"," validation: Dataset({\n"," features: ['id', 'translation'],\n"," num_rows: 2804\n"," })\n","})\n"]}],"source":["print(transformed_dataset)"]},{"cell_type":"code","execution_count":18,"metadata":{"execution":{"iopub.execute_input":"2024-09-22T16:09:33.982265Z","iopub.status.busy":"2024-09-22T16:09:33.981439Z","iopub.status.idle":"2024-09-22T16:09:42.214228Z","shell.execute_reply":"2024-09-22T16:09:42.213022Z","shell.execute_reply.started":"2024-09-22T16:09:33.982224Z"},"trusted":true},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c0871bd3cfd84bbfa54346500196cc28","version_major":2,"version_minor":0},"text/plain":["Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a1d23f24909544b19842206be23976a5","version_major":2,"version_minor":0},"text/plain":["Creating parquet from Arrow format: 0%| | 0/2398 [00:00<?, ?ba/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"641a18429d8c4fcfacdd568aa5e9e9ad","version_major":2,"version_minor":0},"text/plain":["Uploading the dataset shards: 0%| | 0/1 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3b6e0b99110644a29d97aba3ee927aa3","version_major":2,"version_minor":0},"text/plain":["Creating parquet from Arrow format: 0%| | 0/3 [00:00<?, ?ba/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"081765380fb64a8eb0e6122590532a0f","version_major":2,"version_minor":0},"text/plain":["README.md: 0%| | 0.00/683 [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["{'id': 'nep1', 'translation': {'nepali': 'मुस्कुराउँदै', 'roman': 'muskuraundai'}}\n","{'id': 'nep1', 'translation': {'nepali': 'सर्वसाधारणसम्मले', 'roman': 'sarwasadharansammale'}}\n"]}],"source":["# Save the transformed dataset\n","transformed_dataset.push_to_hub('syubraj/roman2nepali-transliteration')\n","\n","# To verify the transformation, you can load a few examples:\n","print(transformed_dataset['train'][0])\n","print(transformed_dataset['validation'][0])"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"kaggle":{"accelerator":"none","dataSources":[],"dockerImageVersionId":30761,"isGpuEnabled":false,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.0"}},"nbformat":4,"nbformat_minor":4}
|
README.md
CHANGED
@@ -1,12 +1,20 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji: 🌖
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: yellow
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.44.1
|
8 |
app_file: app.py
|
9 |
-
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: syubrajRomanEng2Nep-v2
|
|
|
|
|
|
|
|
|
|
|
3 |
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 4.44.0
|
6 |
---
|
7 |
|
8 |
+
## Steps
|
9 |
+
### 1. Clone this repo
|
10 |
+
```
|
11 |
+
git clone git@github.com:yubraaj11/RomanEng2Nep.git
|
12 |
+
```
|
13 |
+
### 2. Install requirements
|
14 |
+
```
|
15 |
+
pip install -r requirements.txt
|
16 |
+
```
|
17 |
+
### 3. Run the Gradio App
|
18 |
+
```
|
19 |
+
python app.py
|
20 |
+
```
|
app.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, MT5ForConditionalGeneration
|
3 |
+
|
4 |
+
# Load tokenizer and model
|
5 |
+
checkpoint = "syubraj/RomanEng2Nep-v2"
|
6 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
7 |
+
model = MT5ForConditionalGeneration.from_pretrained(checkpoint)
|
8 |
+
|
9 |
+
# Set max sequence length
|
10 |
+
max_seq_len = 20
|
11 |
+
|
12 |
+
# Define the translation function
|
13 |
+
def translate(text):
|
14 |
+
# Tokenize the input text with a max length of 20
|
15 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=max_seq_len)
|
16 |
+
|
17 |
+
# Generate translation
|
18 |
+
translated = model.generate(**inputs)
|
19 |
+
|
20 |
+
# Decode the translated tokens back to text
|
21 |
+
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
22 |
+
return translated_text
|
23 |
+
|
24 |
+
# Gradio interface
|
25 |
+
iface = gr.Interface(
|
26 |
+
fn=translate, # function to use for inference
|
27 |
+
inputs="text", # input type
|
28 |
+
outputs="text", # output type
|
29 |
+
title="Romanized English to Nepali Transliterator",
|
30 |
+
description="Translate Romanized English text into Nepali.",
|
31 |
+
examples=[["ahile"],["prakriti"], ["mahasagar"], ["pradarshan"]]
|
32 |
+
)
|
33 |
+
|
34 |
+
# Launch the Gradio app
|
35 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.45.1
|
2 |
+
gradio==4.44.0
|
3 |
+
protobuf==5.28.2
|
4 |
+
sentencepiece==0.2.0
|
5 |
+
torch
|