Spaces:

nontGcob
/

T2E_Vocabulary_Exam_Generator

Runtime error

App Files Files Community

JollypopChannel commited on May 20, 2023

Commit

9d085a8

•

1 Parent(s): 1e9d5db

Add application file

Browse files

Files changed (12) hide show

app.py +64 -53
cefr-vocab.csv +0 -0
model.py +173 -0
simple_trained_wsd_pipeline/config.json +52 -0
simple_trained_wsd_pipeline/merges.txt +0 -0
simple_trained_wsd_pipeline/pytorch_model.bin +3 -0
simple_trained_wsd_pipeline/special_tokens_map.json +15 -0
simple_trained_wsd_pipeline/tokenizer.json +0 -0
simple_trained_wsd_pipeline/tokenizer_config.json +15 -0
simple_trained_wsd_pipeline/vocab.json +0 -0
templates/index.html +0 -0
templates/result.html +113 -0

app.py CHANGED Viewed

@@ -1,60 +1,71 @@
 import os
-import requests
-import json
-from io import BytesIO
-from flask import Flask, jsonify, render_template, request, send_file
-from modules.inference import infer_t5
-from modules.dataset import query_emotion
-# https://huggingface.co/settings/tokens
-# https://huggingface.co/spaces/{username}/{space}/settings
-API_TOKEN = os.getenv("BIG_GAN_TOKEN")
 app = Flask(__name__)
-@app.route("/")
 def index():
-    return render_template("index.html")
-@app.route("/infer_biggan")
-def biggan():
-    input = request.args.get("input")
-    output = requests.request(
-        "POST",
-        "https://api-inference.huggingface.co/models/osanseviero/BigGAN-deep-128",
-        headers={"Authorization": f"Bearer {API_TOKEN}"},
-        data=json.dumps(input),
     )
-    return send_file(BytesIO(output.content), mimetype="image/png")
-@app.route("/infer_t5")
-def t5():
-    input = request.args.get("input")
-    output = infer_t5(input)
-    return jsonify({"output": output})
-@app.route("/query_emotion")
-def emotion():
-    start = request.args.get("start")
-    end = request.args.get("end")
-    print(start)
-    print(end)
-    output = query_emotion(int(start), int(end))
-    return jsonify({"output": output})
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

+from flask import Flask, render_template, request
 import os
+from model import model
 app = Flask(__name__)
+@app.route('/')
 def index():
+    T2E_exam = str(request.remote_addr) + ".txt"
+    with open(T2E_exam, "w") as file:
+        file.write("")
+    return render_template('index.html')
+@app.route('/process', methods=['POST'])
+def process():
+    T2E_exam = str(request.remote_addr) + ".txt"
+    text = request.form['text']
+    cefr_level = request.form['cefr_level']
+    # Call your Python function here to process the data
+    output = model(text, cefr_level)
+    # Save the output to a file
+    count = 0
+    max_choice = 4
+    with open(T2E_exam, "a") as file:
+        file.write("__________ T2E Vocabulary Exam Generator __________\n")
+        file.write("|    Welcome to T2E Vocabulary Exam Generator!     |\n")
+        file.write("|  We are glad that our service is useful to you.  |\n")
+        file.write("|                                                  |\n")
+        file.write("|        Copyrights 2023, Nutnornont Chamadol      |\n")
+        file.write("|             Email: nontc49@gmail.com             |\n")
+        file.write("|     Visit https://nontgcob.com to learn more     |\n")
+        file.write("|                                                  |\n")
+        file.write("|            Your exam is generated below.         |\n")
+        file.write("|  - Happy using T2E Vocabulary Exam Generator! -  |\n")
+        file.write("|__________________________________________________|\n")
+        file.write("\n")
+    for key, value in output.items():
+        vvocab, sentence = key.split(" = ")
+        # print(f'What does the word "{vvocab}" means in this sentence "{sentence}"?')
+        with open(T2E_exam, "a") as file:
+            file.write(f'What does the word "{vvocab}" means in this sentence "{sentence}"?\n')
+        for choice in value:
+            ai_score, choice = choice.split(", ")
+            # print(f"- {choice}")
+            with open(T2E_exam, "a") as file:
+                file.write(f"- {choice}\n")
+            count += 1
+            # if count > (max_choice + 1):
+            #     break
+        with open(T2E_exam, "a") as file:
+            file.write("\n")
+    return render_template('result.html', output="Exam successfully generated!", file_path="T2E_exam.txt")
+from flask import send_file
+@app.route('/send')
+def get_file():
+    T2E_exam = str(request.remote_addr) + ".txt"
+    return send_file(
+        str(request.remote_addr) + ".txt",
+        # download_name = "T2E_exam.txt"
     )
+if __name__ == '__main__':
+    app.run()

cefr-vocab.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

model.py ADDED Viewed

	@@ -0,0 +1,173 @@

+def model(passage, level):
+  # pip install spacy
+  # pip install transformers
+  # pip install torch
+  # pip install en_core_web_sm
+  # python -m spacy download en_core_web_sm
+  # pip install spacy-download
+  # pip install nltk
+  # Importing libraries
+  from nltk.corpus import wordnet
+  import spacy
+  import nltk
+  import transformers
+  import pandas as pd
+  import json
+  import random
+  import torch
+  nltk.download('wordnet')
+  nltk.download('omw-1.4')
+  # Passing file directories into variables
+  # text_input = "./text_input.txt"
+  cefr_vocab = "cefr-vocab.csv"
+  # Create and open the text file
+  # with open(text_input, "a") as file:
+  #   file.write(".") # Add a full stop at the end to make sure there is a full stop at the end of the text for the model to understand where to stop the sentence
+  # Ask the user for the CEFR level
+  # while True:
+    # cefr_level = input("Which CEFR level you want to test?: ").upper()
+    # if "A1" in cefr_level or "A2" in cefr_level or "B1" in cefr_level or "B2" in cefr_level or "C1" in cefr_level or "C2" in cefr_level:
+    #   break
+    # else:
+    #   continue
+  cefr_level = level
+  # Read from the input file
+  # with open(text_input, "r") as file:
+  #   txt = str(file.readlines()).replace("[", "").replace("'", "").replace("]", "")
+  txt = passage + "."
+  if "." in txt:
+    txt = (txt.split("."))
+  else:
+    txt = txt
+  # Part Of Speech tagging (POS tagging)
+  nlp = spacy.load("en_core_web_sm")
+  text_dict = {}
+  for n in txt:
+    n = n.strip()
+    ex1 = nlp(n)
+    for word in ex1:
+      sentence_question_tag = n.replace(word.text, f"[{word.text}]")
+      text_dict[f"{word.lemma_} = {sentence_question_tag}"] = word.pos_
+  # Collect the tagging results (filter in just NOUN, PROPN, VERB, ADJ, or ADV only)
+  collector = {}
+  for key, value in text_dict.items():
+    if "NOUN" in value or "PROPN" in value or "VERB" in value or "ADJ" in value or "ADV" in value:
+      collector[key] = value
+  # Collect the CEFR level of the words collected before
+  reference = pd.read_csv(cefr_vocab)
+  matching = {}
+  for row_idx in range(reference.shape[0]):
+    row = reference.iloc[row_idx]
+    key = f"{row.headword}, {row.pos}"
+    matching[key] = row.CEFR
+  # Convert pos of the word into all lowercase to match another data set with CEFR level
+  for key1, value1 in collector.items():
+    if value1 == "NOUN":
+      collector[key1] = "noun"
+    if value1 == "VERB":
+      collector[key1] = "verb"
+    if value1 == "ADJ":
+      collector[key1] = "adjective"
+    if value1 == "ADV":
+      collector[key1] = "adverb"
+  # Matching 2 datasets together by the word and the pos
+  ready2filter = {}
+  for key, value in matching.items():
+    first_key, second_key = key.split(", ")
+    for key2, value2 in collector.items():
+      key2 = key2.split(" = ")
+      if first_key == key2[0].lower():
+        if second_key == value2:
+          ready2filter[f"{key} = {key2[1]}"] = value
+  # Filter in just the vocab that has the selected CEFR level that the user provided at the beginning
+  filtered0 = {}
+  for key, value in ready2filter.items():
+      if value == cefr_level:
+          filtered0[key] = value
+  # Rearrange the Python dictionary structure
+  filtered = {}
+  for key, value in filtered0.items():
+      key_parts = key.split(', ')
+      new_key = key_parts[0]
+      new_value = key_parts[1]
+      filtered[new_key] = new_value
+  # Grab the definition of each vocab from the wordnet English dictionary
+  def_filtered = {}
+  for key3, value3 in filtered.items():
+    syns = wordnet.synsets(key3)
+    partofspeech, context = value3.split(" = ")
+    def_filtered[f"{key3} = {context}"] = []
+    # pos conversion
+    if partofspeech == "noun":
+      partofspeech = "n"
+    if partofspeech == "verb":
+      partofspeech = "v"
+    if partofspeech == "adjective":
+      partofspeech = "s"
+    if partofspeech == "adverb":
+      partofspeech = "r"
+    # print("def_filtered 0:", def_filtered)
+    # Adding the definitions into the Python dictionary, def_filtered (syns variable does the job of finding the relevant word aka synonyms)
+    for s in syns:
+        # print('s:', s)
+        # print("syns:", syns)
+        def_filtered[f"{key3} = {context}"].append(s.definition())
+        # print("def_filtered 1:", def_filtered)
+  # Use Nvidia CUDA core if available
+  if torch.cuda.is_available():
+      device=0
+  else:
+      device='cpu'
+  # Declare the (trained) model that will be used
+  classifier = transformers.pipeline("zero-shot-classification", model="simple_trained_wsd_pipeline", device=device)
+  # Process Python dictionary, def_filtered
+  correct_def = {}
+  for key4, value4 in def_filtered.items():
+    vocab, context = key4.split(" = ")
+    sequence_to_classify = context
+    candidate_labels = value4
+    correct_def[key4] = []
+    hypothesis_template = 'The meaning of [' + vocab + '] is {}.'
+    output = classifier(sequence_to_classify, candidate_labels, hypothesis_template=hypothesis_template)
+    # Process the score of each definition and add it to the Python dictionary, correct_def
+    for label, score in zip(output['labels'], output['scores']):
+        correct_def[key4].append(f"{score:.5f}, {label}")
+  return correct_def
+  # with open(T2E_exam, "r") as file:
+  #    exam = file.readlines()
+  # print(exam)
+  # return(exam)
+# passage = "Computer is good"
+# level = "A1"
+# print(model(passage, level))

simple_trained_wsd_pipeline/config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_name_or_path": "facebook/bart-large-mnli",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForSequenceClassification"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "contradiction",
+    "1": "neutral",
+    "2": "entailment"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "contradiction": 0,
+    "entailment": 2,
+    "neutral": 1
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "normalize_before": false,
+  "num_hidden_layers": 12,
+  "output_past": false,
+  "pad_token_id": 1,
+  "problem_type": "single_label_classification",
+  "scale_embedding": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.29.1",
+  "use_cache": true,
+  "vocab_size": 50265
+}

simple_trained_wsd_pipeline/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

simple_trained_wsd_pipeline/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebe92cb602c9187d2ac775c5f0d98827ab9291293307c1a5090efae8ed94f251
+size 1629551961

simple_trained_wsd_pipeline/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

simple_trained_wsd_pipeline/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

simple_trained_wsd_pipeline/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

simple_trained_wsd_pipeline/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

templates/index.html CHANGED Viewed

The diff for this file is too large to render. See raw diff

templates/result.html ADDED Viewed

	@@ -0,0 +1,113 @@

+<!DOCTYPE html>
+<html>
+    <head>
+        <title>Result | Text to Exam (Vocabulary Exam Generator)</title>
+        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css" integrity="sha512-iecdLmaskl7CVkqkXNQ/ZH/XLlvWZOJyj7Yy7tcenmpD1ypASozpmT/E0iPtmFIB46ZmdtAc9eNBvH0H/ZpiBw==" crossorigin="anonymous" referrerpolicy="no-referrer" />
+        <link rel="preconnect" href="https://fonts.googleapis.com">
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link href="https://fonts.googleapis.com/css2?family=Rubik:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
+        <style>
+            * {
+                margin: 0;
+                padding: 0;
+                font-family: 'Rubik', sans-serif;
+                background-color: #1B1B1B;
+                color: #FFF7D0;
+                font-size: 62.5%;
+                box-sizing: border-box;
+            }
+            .container {
+                padding: 3rem 3.4rem;
+                background-color: #212529;
+                border-radius: 24px;
+                height: 43.5rem;
+                width: 38rem;
+                margin: 0 auto;
+                margin-top: 8%;
+            }
+            h1 {
+                font-size: 3.5rem;
+                text-align: center;
+                background-color: transparent;
+                margin-bottom: .6rem;
+            }
+            .subtitle {
+                background-color: transparent;
+                font-size: 1.6rem;
+                text-align: center;
+                margin-bottom: 1rem;
+            }
+            i {
+                font-size: 15rem;
+                background-color: #212529;
+                margin: 6.2rem 0;
+                margin-left: 27%;
+            }
+            .download {
+                padding: .5rem 34%;
+                font-size: 2rem;
+                border: 2px solid #FFF7D0;
+                border-radius: 8px;
+                background-color: #FFF7D0;
+                color: #212529;
+                font-weight: 600;
+                width: 100%;
+                margin-top: 1rem;
+                text-decoration: none;
+            }
+            .dev {
+                background-color: transparent;
+                padding: 1rem 3rem;
+                width: 38rem;
+                margin: 0 auto;
+                margin-top: 15px;
+                border-radius: 40px;
+                border: 2px solid #FFF7D0;
+                display: flex;
+                flex-direction: row;
+                font-size: 2.4rem;
+                display: flex;
+                justify-content: center;
+                background-color: #212529;
+            }
+            .intro {
+                width: fit-content;
+                margin-right: 3px;
+                background-color: transparent;
+            }
+            .nont {
+                width: fit-content;
+                text-decoration: underline;
+                text-decoration-thickness: 1.5px;
+                background-color: transparent;
+            }
+            .nont:hover {
+                background-color: #FFF7D0;
+                text-decoration: none;
+                color: #212529;
+            }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>Result</h1>
+            <p class="subtitle">Exam successfully generated!</p>
+            <!-- <p>{{ output }}</p> -->
+            <i class="fa-regular fa-circle-check"></i>
+            <a href="/send" class="download" download>Download</a>
+        </div>
+        <div class="dev">
+            <p class="intro">Developed by</p>
+            <a href="https://nontgcob.com/" class="nont">Nutnornont Chamadol</a>
+        </div>
+    </body>
+</html>