JollypopChannel commited on
Commit
9d085a8
1 Parent(s): 1e9d5db

Add application file

Browse files
app.py CHANGED
@@ -1,60 +1,71 @@
 
1
  import os
2
- import requests
3
- import json
4
- from io import BytesIO
5
-
6
- from flask import Flask, jsonify, render_template, request, send_file
7
-
8
- from modules.inference import infer_t5
9
- from modules.dataset import query_emotion
10
-
11
- # https://huggingface.co/settings/tokens
12
- # https://huggingface.co/spaces/{username}/{space}/settings
13
- API_TOKEN = os.getenv("BIG_GAN_TOKEN")
14
 
15
  app = Flask(__name__)
16
 
17
-
18
- @app.route("/")
19
  def index():
20
- return render_template("index.html")
21
-
22
-
23
- @app.route("/infer_biggan")
24
- def biggan():
25
- input = request.args.get("input")
26
-
27
- output = requests.request(
28
- "POST",
29
- "https://api-inference.huggingface.co/models/osanseviero/BigGAN-deep-128",
30
- headers={"Authorization": f"Bearer {API_TOKEN}"},
31
- data=json.dumps(input),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  )
33
 
34
- return send_file(BytesIO(output.content), mimetype="image/png")
35
-
36
-
37
- @app.route("/infer_t5")
38
- def t5():
39
- input = request.args.get("input")
40
-
41
- output = infer_t5(input)
42
-
43
- return jsonify({"output": output})
44
-
45
-
46
- @app.route("/query_emotion")
47
- def emotion():
48
- start = request.args.get("start")
49
- end = request.args.get("end")
50
-
51
- print(start)
52
- print(end)
53
-
54
- output = query_emotion(int(start), int(end))
55
-
56
- return jsonify({"output": output})
57
-
58
-
59
- if __name__ == "__main__":
60
- app.run(host="0.0.0.0", port=7860)
 
1
+ from flask import Flask, render_template, request
2
  import os
3
+ from model import model
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  app = Flask(__name__)
6
 
7
+ @app.route('/')
 
8
  def index():
9
+ T2E_exam = str(request.remote_addr) + ".txt"
10
+ with open(T2E_exam, "w") as file:
11
+ file.write("")
12
+ return render_template('index.html')
13
+
14
+ @app.route('/process', methods=['POST'])
15
+ def process():
16
+ T2E_exam = str(request.remote_addr) + ".txt"
17
+ text = request.form['text']
18
+ cefr_level = request.form['cefr_level']
19
+
20
+ # Call your Python function here to process the data
21
+ output = model(text, cefr_level)
22
+
23
+ # Save the output to a file
24
+ count = 0
25
+ max_choice = 4
26
+
27
+ with open(T2E_exam, "a") as file:
28
+ file.write("__________ T2E Vocabulary Exam Generator __________\n")
29
+ file.write("| Welcome to T2E Vocabulary Exam Generator! |\n")
30
+ file.write("| We are glad that our service is useful to you. |\n")
31
+ file.write("| |\n")
32
+ file.write("| Copyrights 2023, Nutnornont Chamadol |\n")
33
+ file.write("| Email: nontc49@gmail.com |\n")
34
+ file.write("| Visit https://nontgcob.com to learn more |\n")
35
+ file.write("| |\n")
36
+ file.write("| Your exam is generated below. |\n")
37
+ file.write("| - Happy using T2E Vocabulary Exam Generator! - |\n")
38
+ file.write("|__________________________________________________|\n")
39
+ file.write("\n")
40
+
41
+ for key, value in output.items():
42
+ vvocab, sentence = key.split(" = ")
43
+ # print(f'What does the word "{vvocab}" means in this sentence "{sentence}"?')
44
+ with open(T2E_exam, "a") as file:
45
+ file.write(f'What does the word "{vvocab}" means in this sentence "{sentence}"?\n')
46
+
47
+ for choice in value:
48
+ ai_score, choice = choice.split(", ")
49
+ # print(f"- {choice}")
50
+ with open(T2E_exam, "a") as file:
51
+ file.write(f"- {choice}\n")
52
+ count += 1
53
+ # if count > (max_choice + 1):
54
+ # break
55
+ with open(T2E_exam, "a") as file:
56
+ file.write("\n")
57
+
58
+ return render_template('result.html', output="Exam successfully generated!", file_path="T2E_exam.txt")
59
+
60
+ from flask import send_file
61
+
62
+ @app.route('/send')
63
+ def get_file():
64
+ T2E_exam = str(request.remote_addr) + ".txt"
65
+ return send_file(
66
+ str(request.remote_addr) + ".txt",
67
+ # download_name = "T2E_exam.txt"
68
  )
69
 
70
+ if __name__ == '__main__':
71
+ app.run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cefr-vocab.csv ADDED
The diff for this file is too large to render. See raw diff
 
model.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def model(passage, level):
2
+ # pip install spacy
3
+ # pip install transformers
4
+ # pip install torch
5
+ # pip install en_core_web_sm
6
+ # python -m spacy download en_core_web_sm
7
+ # pip install spacy-download
8
+ # pip install nltk
9
+
10
+ # Importing libraries
11
+ from nltk.corpus import wordnet
12
+ import spacy
13
+ import nltk
14
+ import transformers
15
+ import pandas as pd
16
+ import json
17
+ import random
18
+ import torch
19
+
20
+ nltk.download('wordnet')
21
+ nltk.download('omw-1.4')
22
+
23
+ # Passing file directories into variables
24
+ # text_input = "./text_input.txt"
25
+ cefr_vocab = "cefr-vocab.csv"
26
+
27
+ # Create and open the text file
28
+ # with open(text_input, "a") as file:
29
+ # file.write(".") # Add a full stop at the end to make sure there is a full stop at the end of the text for the model to understand where to stop the sentence
30
+
31
+
32
+ # Ask the user for the CEFR level
33
+ # while True:
34
+ # cefr_level = input("Which CEFR level you want to test?: ").upper()
35
+ # if "A1" in cefr_level or "A2" in cefr_level or "B1" in cefr_level or "B2" in cefr_level or "C1" in cefr_level or "C2" in cefr_level:
36
+ # break
37
+ # else:
38
+ # continue
39
+ cefr_level = level
40
+
41
+ # Read from the input file
42
+ # with open(text_input, "r") as file:
43
+ # txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
44
+ txt = passage + "."
45
+
46
+ if "." in txt:
47
+ txt = (txt.split("."))
48
+ else:
49
+ txt = txt
50
+
51
+ # Part Of Speech tagging (POS tagging)
52
+ nlp = spacy.load("en_core_web_sm")
53
+
54
+ text_dict = {}
55
+ for n in txt:
56
+ n = n.strip()
57
+ ex1 = nlp(n)
58
+
59
+ for word in ex1:
60
+ sentence_question_tag = n.replace(word.text, f"[{word.text}]")
61
+ text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_
62
+
63
+ # Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
64
+ collector = {}
65
+ for key, value in text_dict.items():
66
+ if "NOUN" in value or "PROPN" in value or "VERB" in value or "ADJ" in value or "ADV" in value:
67
+ collector[key] = value
68
+
69
+ # Collect the CEFR level of the words collected before
70
+ reference = pd.read_csv(cefr_vocab)
71
+
72
+ matching = {}
73
+ for row_idx in range(reference.shape[0]):
74
+ row = reference.iloc[row_idx]
75
+ key = f"{row.headword}, {row.pos}"
76
+ matching[key] = row.CEFR
77
+
78
+ # Convert pos of the word into all lowercase to match another data set with CEFR level
79
+ for key1, value1 in collector.items():
80
+ if value1 == "NOUN":
81
+ collector[key1] = "noun"
82
+ if value1 == "VERB":
83
+ collector[key1] = "verb"
84
+ if value1 == "ADJ":
85
+ collector[key1] = "adjective"
86
+ if value1 == "ADV":
87
+ collector[key1] = "adverb"
88
+
89
+ # Matching 2 datasets together by the word and the pos
90
+ ready2filter = {}
91
+ for key, value in matching.items():
92
+ first_key, second_key = key.split(", ")
93
+ for key2, value2 in collector.items():
94
+ key2 = key2.split(" = ")
95
+ if first_key == key2[0].lower():
96
+ if second_key == value2:
97
+ ready2filter[f"{key} = {key2[1]}"] = value
98
+
99
+ # Filter in just the vocab that has the selected CEFR level that the user provided at the beginning
100
+ filtered0 = {}
101
+ for key, value in ready2filter.items():
102
+ if value == cefr_level:
103
+ filtered0[key] = value
104
+
105
+ # Rearrange the Python dictionary structure
106
+ filtered = {}
107
+ for key, value in filtered0.items():
108
+ key_parts = key.split(', ')
109
+ new_key = key_parts[0]
110
+ new_value = key_parts[1]
111
+ filtered[new_key] = new_value
112
+
113
+ # Grab the definition of each vocab from the wordnet English dictionary
114
+ def_filtered = {}
115
+ for key3, value3 in filtered.items():
116
+ syns = wordnet.synsets(key3)
117
+ partofspeech, context = value3.split(" = ")
118
+ def_filtered[f"{key3} = {context}"] = []
119
+
120
+ # pos conversion
121
+ if partofspeech == "noun":
122
+ partofspeech = "n"
123
+ if partofspeech == "verb":
124
+ partofspeech = "v"
125
+ if partofspeech == "adjective":
126
+ partofspeech = "s"
127
+ if partofspeech == "adverb":
128
+ partofspeech = "r"
129
+
130
+ # print("def_filtered 0:", def_filtered)
131
+
132
+ # Adding the definitions into the Python dictionary, def_filtered (syns variable does the job of finding the relevant word aka synonyms)
133
+ for s in syns:
134
+ # print('s:', s)
135
+ # print("syns:", syns)
136
+ def_filtered[f"{key3} = {context}"].append(s.definition())
137
+ # print("def_filtered 1:", def_filtered)
138
+
139
+ # Use Nvidia CUDA core if available
140
+ if torch.cuda.is_available():
141
+ device=0
142
+ else:
143
+ device='cpu'
144
+
145
+ # Declare the (trained) model that will be used
146
+ classifier = transformers.pipeline("zero-shot-classification", model="simple_trained_wsd_pipeline", device=device)
147
+
148
+ # Process Python dictionary, def_filtered
149
+ correct_def = {}
150
+ for key4, value4 in def_filtered.items():
151
+ vocab, context = key4.split(" = ")
152
+ sequence_to_classify = context
153
+ candidate_labels = value4
154
+ correct_def[key4] = []
155
+ hypothesis_template = 'The meaning of [' + vocab + '] is {}.'
156
+
157
+ output = classifier(sequence_to_classify, candidate_labels, hypothesis_template=hypothesis_template)
158
+
159
+ # Process the score of each definition and add it to the Python dictionary, correct_def
160
+ for label, score in zip(output['labels'], output['scores']):
161
+ correct_def[key4].append(f"{score:.5f}, {label}")
162
+
163
+ return correct_def
164
+
165
+ # with open(T2E_exam, "r") as file:
166
+ # exam = file.readlines()
167
+ # print(exam)
168
+ # return(exam)
169
+
170
+
171
+ # passage = "Computer is good"
172
+ # level = "A1"
173
+ # print(model(passage, level))
simple_trained_wsd_pipeline/config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-large-mnli",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "gelu",
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForSequenceClassification"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "encoder_attention_heads": 16,
22
+ "encoder_ffn_dim": 4096,
23
+ "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 12,
25
+ "eos_token_id": 2,
26
+ "forced_eos_token_id": 2,
27
+ "gradient_checkpointing": false,
28
+ "id2label": {
29
+ "0": "contradiction",
30
+ "1": "neutral",
31
+ "2": "entailment"
32
+ },
33
+ "init_std": 0.02,
34
+ "is_encoder_decoder": true,
35
+ "label2id": {
36
+ "contradiction": 0,
37
+ "entailment": 2,
38
+ "neutral": 1
39
+ },
40
+ "max_position_embeddings": 1024,
41
+ "model_type": "bart",
42
+ "normalize_before": false,
43
+ "num_hidden_layers": 12,
44
+ "output_past": false,
45
+ "pad_token_id": 1,
46
+ "problem_type": "single_label_classification",
47
+ "scale_embedding": false,
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.29.1",
50
+ "use_cache": true,
51
+ "vocab_size": 50265
52
+ }
simple_trained_wsd_pipeline/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
simple_trained_wsd_pipeline/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe92cb602c9187d2ac775c5f0d98827ab9291293307c1a5090efae8ed94f251
3
+ size 1629551961
simple_trained_wsd_pipeline/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
simple_trained_wsd_pipeline/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
simple_trained_wsd_pipeline/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 1024,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "BartTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
simple_trained_wsd_pipeline/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
templates/index.html CHANGED
The diff for this file is too large to render. See raw diff
 
templates/result.html ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Result | Text to Exam (Vocabulary Exam Generator)</title>
5
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" integrity="sha512-iecdLmaskl7CVkqkXNQ/ZH/XLlvWZOJyj7Yy7tcenmpD1ypASozpmT/E0iPtmFIB46ZmdtAc9eNBvH0H/ZpiBw==" crossorigin="anonymous" referrerpolicy="no-referrer" />
6
+ <link rel="preconnect" href="https://fonts.googleapis.com">
7
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
8
+ <link href="https://fonts.googleapis.com/css2?family=Rubik:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
9
+ <style>
10
+ * {
11
+ margin: 0;
12
+ padding: 0;
13
+ font-family: 'Rubik', sans-serif;
14
+ background-color: #1B1B1B;
15
+ color: #FFF7D0;
16
+ font-size: 62.5%;
17
+ box-sizing: border-box;
18
+ }
19
+
20
+ .container {
21
+ padding: 3rem 3.4rem;
22
+ background-color: #212529;
23
+ border-radius: 24px;
24
+ height: 43.5rem;
25
+ width: 38rem;
26
+ margin: 0 auto;
27
+ margin-top: 8%;
28
+ }
29
+
30
+ h1 {
31
+ font-size: 3.5rem;
32
+ text-align: center;
33
+ background-color: transparent;
34
+ margin-bottom: .6rem;
35
+ }
36
+
37
+ .subtitle {
38
+ background-color: transparent;
39
+ font-size: 1.6rem;
40
+ text-align: center;
41
+ margin-bottom: 1rem;
42
+ }
43
+
44
+ i {
45
+ font-size: 15rem;
46
+ background-color: #212529;
47
+ margin: 6.2rem 0;
48
+ margin-left: 27%;
49
+ }
50
+
51
+ .download {
52
+ padding: .5rem 34%;
53
+ font-size: 2rem;
54
+ border: 2px solid #FFF7D0;
55
+ border-radius: 8px;
56
+ background-color: #FFF7D0;
57
+ color: #212529;
58
+ font-weight: 600;
59
+ width: 100%;
60
+ margin-top: 1rem;
61
+ text-decoration: none;
62
+ }
63
+
64
+ .dev {
65
+ background-color: transparent;
66
+ padding: 1rem 3rem;
67
+ width: 38rem;
68
+ margin: 0 auto;
69
+ margin-top: 15px;
70
+ border-radius: 40px;
71
+ border: 2px solid #FFF7D0;
72
+ display: flex;
73
+ flex-direction: row;
74
+ font-size: 2.4rem;
75
+ display: flex;
76
+ justify-content: center;
77
+ background-color: #212529;
78
+ }
79
+
80
+ .intro {
81
+ width: fit-content;
82
+ margin-right: 3px;
83
+ background-color: transparent;
84
+ }
85
+
86
+ .nont {
87
+ width: fit-content;
88
+ text-decoration: underline;
89
+ text-decoration-thickness: 1.5px;
90
+ background-color: transparent;
91
+ }
92
+
93
+ .nont:hover {
94
+ background-color: #FFF7D0;
95
+ text-decoration: none;
96
+ color: #212529;
97
+ }
98
+ </style>
99
+ </head>
100
+ <body>
101
+ <div class="container">
102
+ <h1>Result</h1>
103
+ <p class="subtitle">Exam successfully generated!</p>
104
+ <!-- <p>{{ output }}</p> -->
105
+ <i class="fa-regular fa-circle-check"></i>
106
+ <a href="/send" class="download" download>Download</a>
107
+ </div>
108
+ <div class="dev">
109
+ <p class="intro">Developed by</p>
110
+ <a href="https://nontgcob.com/" class="nont">Nutnornont Chamadol</a>
111
+ </div>
112
+ </body>
113
+ </html>