cahya commited on
Commit
501a925
0 Parent(s):

Duplicate from cahya/indochat

Browse files
Files changed (4) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +92 -0
  4. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Indochat
3
+ emoji: 🇮🇩
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.18.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: creativeml-openrail-m
11
+ duplicated_from: cahya/indochat
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from mtranslate import translate
4
+ import requests
5
+
6
+ HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN")
7
+ indochat_api = 'https://cahya-indonesian-whisperer.hf.space/api/indochat/v1'
8
+ indochat_api_auth_token = os.getenv("INDOCHAT_API_AUTH_TOKEN", "")
9
+
10
+ def get_answer(user_input, decoding_method, num_beams, top_k, top_p, temperature, repetition_penalty, penalty_alpha):
11
+ print(user_input, decoding_method, top_k, top_p, temperature, repetition_penalty, penalty_alpha)
12
+ headers = {'Authorization': 'Bearer ' + indochat_api_auth_token}
13
+ data = {
14
+ "text": user_input,
15
+ "min_length": len(user_input) + 50,
16
+ "max_length": 300,
17
+ "decoding_method": decoding_method,
18
+ "num_beams": num_beams,
19
+ "top_k": top_k,
20
+ "top_p": top_p,
21
+ "temperature": temperature,
22
+ "seed": -1,
23
+ "repetition_penalty": repetition_penalty,
24
+ "penalty_alpha": penalty_alpha
25
+ }
26
+ r = requests.post(indochat_api, headers=headers, data=data)
27
+ if r.status_code == 200:
28
+ result = r.json()
29
+ answer = result["generated_text"]
30
+ user_input_en = translate(user_input, "en", "id")
31
+ answer_en = translate(answer, "en", "id")
32
+ return [(f"{user_input}\n", None), (answer, "")], \
33
+ [(f"{user_input_en}\n", None), (answer_en, "")]
34
+ else:
35
+ return "Error: " + r.text
36
+
37
+
38
+ css = """
39
+ #answer_id span {white-space: pre-line}
40
+ #answer_id span.label {display: none}
41
+ #answer_en span {white-space: pre-line}
42
+ #answer_en span.label {display: none}
43
+ """
44
+
45
+ with gr.Blocks(css=css) as demo:
46
+ with gr.Row():
47
+ gr.Markdown("""## IndoChat
48
+
49
+ A Prove of Concept of a multilingual Chatbot (in this case a bilingual, English and Indonesian), fine-tuned with
50
+ multilingual instructions dataset. The base model is a GPT2-Medium (340M params) which was pretrained with 75GB
51
+ of Indonesian and English dataset, where English part is only less than 1% of the whole dataset.
52
+ """)
53
+ with gr.Row():
54
+ with gr.Column():
55
+ user_input = gr.inputs.Textbox(placeholder="",
56
+ label="Ask me something in Indonesian or English",
57
+ default="Bagaimana cara mendidik anak supaya tidak berbohong?")
58
+ decoding_method = gr.inputs.Dropdown(["Beam Search", "Sampling", "Contrastive Search"],
59
+ default="Sampling", label="Decoding Method")
60
+ num_beams = gr.inputs.Slider(label="Number of beams for beam search",
61
+ default=1, minimum=1, maximum=10, step=1)
62
+ top_k = gr.inputs.Slider(label="Top K",
63
+ default=30, maximum=50, minimum=1, step=1)
64
+ top_p = gr.inputs.Slider(label="Top P", default=0.9, step=0.05, minimum=0.1, maximum=1.0)
65
+ temperature = gr.inputs.Slider(label="Temperature", default=0.5, step=0.05, minimum=0.1, maximum=1.0)
66
+ repetition_penalty = gr.inputs.Slider(label="Repetition Penalty", default=1.1, step=0.05, minimum=1.0, maximum=2.0)
67
+ penalty_alpha = gr.inputs.Slider(label="The penalty alpha for contrastive search",
68
+ default=0.5, step=0.05, minimum=0.05, maximum=1.0)
69
+ with gr.Row():
70
+ button_generate_story = gr.Button("Submit")
71
+ with gr.Column():
72
+ # generated_answer = gr.Textbox()
73
+ generated_answer = gr.HighlightedText(
74
+ elem_id="answer_id",
75
+ label="Generated Text",
76
+ combine_adjacent=True,
77
+ css="#htext span {white-space: pre-line}",
78
+ ).style(color_map={"": "blue", "-": "green"})
79
+ generated_answer_en = gr.HighlightedText(
80
+ elem_id="answer_en",
81
+ label="Translation",
82
+ combine_adjacent=True,
83
+ ).style(color_map={"": "blue", "-": "green"})
84
+ with gr.Row():
85
+ gr.Markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=cahya_indochat)")
86
+
87
+ button_generate_story.click(get_answer,
88
+ inputs=[user_input, decoding_method, num_beams, top_k, top_p, temperature,
89
+ repetition_penalty, penalty_alpha],
90
+ outputs=[generated_answer, generated_answer_en])
91
+
92
+ demo.launch(enable_queue=False)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ torch
2
+ transformers
3
+ mtranslate