File size: 3,656 Bytes
3a36085
 
 
 
 
7f0ad73
 
 
3a36085
b633a3e
7f0ad73
 
 
 
 
 
 
 
3a36085
7f0ad73
 
 
 
 
 
b633a3e
 
7f0ad73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a36085
b633a3e
3a36085
7f0ad73
 
 
3a36085
7f0ad73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb2f200
79b9407
 
7f0ad73
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import typing
from headline_detector import FasttextDetector, CNNDetector, IndoBERTweetDetector

model_dict = {
    "IndoBERTweet": IndoBERTweetDetector,
    "CNN": CNNDetector,
    "fastText": FasttextDetector,
}
num_text_process_limit = 10
default_model = "fastText"
scenario_number = ["1", "2", "3", "4", "5", "6"]
string_seperator = "|||"
markdown_top_info = """
# [headline_detector](https://huggingface.co/kaenova/headline_detector) spaces
Easily use the model with huggingface πŸ€— and gradio interface!  
Call by API is also available. Check the bottom of the page on 'Use via API πŸš€'
"""


def process_single_text(
    text_input: "str", model_name: "str", scenario_number_choose: "str"
):
    # Split input
    prediction_input = text_input.split(string_seperator)
    if len(prediction_input) > num_text_process_limit:
        prediction_input = prediction_input[:num_text_process_limit]
        
    # Checking and finalizing scneario number
    if not isinstance(scenario_number_choose, int):
        try:
            scenario_number_choose = int(scenario_number_choose)
        except:
            raise gr.Error("Not a valid scenario number")
    if f"{scenario_number_choose}" not in scenario_number:
        raise gr.Error("Scenario number must between 1-6")

    # Checking model name
    if model_name not in list(model_dict.keys()):
        raise gr.Error("Model name must either 'IndoBERTweet' or 'CNN' or 'fastText'")

    # Inferencing to library
    model: typing.Union[
        FasttextDetector, IndoBERTweetDetector, CNNDetector
    ] = model_dict[model_name]
    model = model.load_from_scenario(scenario_number_choose)
    results = model.predict_text(prediction_input)
    
    # Building returned value
    final_string_results = ""
    for i, data in enumerate(results):
        if i != 0:
            final_string_results += "\n"
        if data == 0:
            final_string_results += "0"
        elif data == 1:
            final_string_results += "1"

    return final_string_results


with gr.Blocks() as demo:
    gr.Markdown(markdown_top_info)

    # Single Input
    with gr.Column():
        input_text = gr.Textbox(
            label="Input Text",
            info=f"Text to determine wether headline or not. For multiple input you can divide each string by '{string_seperator}'. Maximum processed string is {num_text_process_limit}.",
            lines=5,
            value="""Kami adalah|||\nseorang kapiten|||daitas kamu ada jurang|||\nDITEMUKAN PRIA DIATAS JURANG"""
        )
        model_used = gr.Dropdown(
            list(model_dict.keys()),
            label="Model",
            info="Model used to determine the text",
            value=default_model,
            interactive=True,
            allow_custom_value=False,
        )
        scenario_used = gr.Radio(
            scenario_number,
            label="Scenario Number",
            type="value",
            info="Preprocessing type (for more information, refer to the paper)",
            value="1",
        )

        single_text_button = gr.Button("Detect!")
        results_text = gr.Textbox(
            label="Result",
            interactive=False,
            info="Each line representing every string. Headline (1) or non-Headline (0)",
        )
        single_text_button.click(
            process_single_text,
            inputs=[input_text, model_used, scenario_used],
            outputs=[results_text],
            api_name="predict_single_text",
        )
    
    gr.Markdown("_Notes:_ for first time running, all models may take 20-60 seconds to load")

if __name__ == "__main__":
    demo.launch()