Wikidepia commited on
Commit
654d2fa
0 Parent(s):

Initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +27 -0
  2. README.md +9 -0
  3. app/__init__.py +0 -0
  4. app/app.py +56 -0
  5. requirements.txt +1 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Indonesian Paraphrase Generation
3
+ emoji: 🧬
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: streamlit
7
+ app_file: app/app.py
8
+ pinned: true
9
+ ---
app/__init__.py ADDED
File without changes
app/app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+
4
+ import streamlit as st
5
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
6
+
7
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
+
9
+
10
+ @st.cache(allow_output_mutation=True)
11
+ def load_model(model_name):
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
14
+ return model, tokenizer
15
+
16
+
17
+ def paraphrase(model, encoding, top_k=120, top_p=0.95, max_len=120) -> List[str]:
18
+ outputs = model.generate(
19
+ input_ids=encoding["input_ids"],
20
+ attention_mask=encoding["attention_mask"],
21
+ do_sample=True,
22
+ top_k=top_k,
23
+ top_p=top_p,
24
+ max_length=max_len,
25
+ early_stopping=True,
26
+ num_return_sequences=5,
27
+ )
28
+ return [
29
+ tokenizer.decode(
30
+ output, skip_special_tokens=True, clean_up_tokenization_spaces=True
31
+ )
32
+ for output in outputs
33
+ ]
34
+
35
+
36
+ if __name__ == "__main__":
37
+ st.header("Indonesian Paraphrase Generation")
38
+ user_input = st.text_area("Original Sentence", "", height=30)
39
+
40
+ # Slider for max_len
41
+ st.sidebar.header("Decoding Settings")
42
+ max_len = st.sidebar.slider("Max-Length", 0, 512, 256)
43
+ top_k = st.sidebar.slider("Top-K", 0, 512, 200)
44
+ top_p = st.sidebar.slider("Top-P", 0.0, 1.0, 0.95)
45
+
46
+ if st.button("Paraphrase") or user_input:
47
+ with st.spinner("T5 is processing your text..."):
48
+ model, tokenizer = load_model("Wikidepia/IndoT5-base-paraphrase")
49
+ text = "paraphrase: " + user_input + " </s>"
50
+ encode_id = tokenizer(text, return_tensors="pt")
51
+ outputs = paraphrase(
52
+ model, encode_id, top_k=top_k, top_p=top_p, max_len=max_len
53
+ )
54
+ st.markdown("### Hasil Parafrase")
55
+ for i, output in enumerate(outputs):
56
+ st.markdown(f"- {output}")
requirements.txt ADDED
@@ -0,0 +1 @@
 
1
+ streamlit==0.80.0