File size: 1,512 Bytes
4384641
 
 
e0ead87
 
 
 
 
 
 
 
 
 
4dcc920
e0ead87
 
 
 
 
 
4384641
 
6971908
4384641
 
 
 
 
6971908
4384641
e0ead87
 
 
 
 
 
 
 
4bdd21d
4384641
 
6971908
4384641
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""Gradio app to showcase the language detector."""

import gradio as gr
from transformers import pipeline


# Get transformer model and set up a pipeline
model_ckpt = "papluca/xlm-roberta-base-language-detection"
pipe = pipeline("text-classification", model=model_ckpt)


def predict(text: str) -> dict:
    """Compute predictions for text."""
    preds = pipe(text, return_all_scores=True, truncation=True, max_length=128)
    if preds:
        pred = preds[0]
        return {p["label"]: float(p["score"]) for p in pred}
    else:
        return None


title = "Language detection with XLM-RoBERTa"
description = "Determine the language in which your text is written."
examples = [
    ["Better late than never."],
    ["Tutto è bene ciò che finisce bene."],
    ["Donde hay humo, hay fuego."],
]
explanation = "Supported languages are (20): arabic (ar), bulgarian (bg), german (de), modern greek (el), english (en), spanish (es), french (fr), hindi (hi), italian (it), japanese (ja), dutch (nl), polish (pl), portuguese (pt), russian (ru), swahili (sw), thai (th), turkish (tr), urdu (ur), vietnamese (vi), and chinese (zh)."

app = gr.Interface(
    fn=predict,
    inputs=gr.inputs.Textbox(
        placeholder="What's the text you want to know the language for?",
        label="Text",
        lines=3,
    ),
    outputs=gr.outputs.Label(num_top_classes=3, label="Your text is written in "),
    title=title,
    description=description,
    examples=examples,
    article=explanation,
)

app.launch()