Upload 6 files

Browse files

Files changed (6) hide show

config.json +38 -0
generation_config.json +6 -0
model.py +74 -0
pytorch_model.bin +3 -0
server.py +36 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_or_path": "./save_model_7",
+  "activation_function": "gelu_new",
+  "architectures": ["GPT2LMHeadModel"],
+  "attn_pdrop": 0.0,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-5,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.0,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.26.1"
+}

model.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from transformers import GPT2LMHeadModel, AutoTokenizer
+import re
+tokenizer = AutoTokenizer.from_pretrained("flax-community/gpt2-bengali")
+model = GPT2LMHeadModel.from_pretrained("./350")
+model.to("cuda")
+print(model)
+BEGIN_TOKEN = "<।summary_begin।>"
+END_TOKEN = "<।summary_end।>"
+SUMMARY_TOKEN = "<।summary।>"
+def processTxt(txt):
+    txt = re.sub(r"।", "। ", txt)
+    txt = re.sub(r",", ", ", txt)
+    txt = re.sub(r"!", "। ", txt)
+    txt = re.sub(r"\?", "। ", txt)
+    txt = re.sub(r"\"", "", txt)
+    txt = re.sub(r"'", "", txt)
+    txt = re.sub(r"’", "", txt)
+    txt = re.sub(r"’", "", txt)
+    txt = re.sub(r"‘", "", txt)
+    txt = re.sub(r";", "। ", txt)
+    txt = re.sub(r"\s+", " ", txt)
+    return txt
+def index_of(val, in_text, after=0):
+    try:
+        return in_text.index(val, after)
+    except ValueError:
+        return -1
+def summarize(txt):
+    txt = processTxt(txt.strip())
+    txt = "<|SUMMARY_BEGIN|>" + txt + "<|SUMMARY|>"
+    inputs = tokenizer(txt, max_length=800, truncation=True, return_tensors="pt")
+    inputs.to("cuda")
+    output = model.generate(inputs["input_ids"], max_length=len(txt) + 120)
+    txt = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
+    start = index_of(SUMMARY_TOKEN, txt) + len(SUMMARY_TOKEN)
+    print(txt)
+    if start == len(SUMMARY_TOKEN) - 1:
+        return "No Summary!"
+    end = index_of(END_TOKEN, txt, start)
+    if end == -1:
+        end = index_of(SUMMARY_TOKEN, txt, start)
+    if end == -1:
+        end = index_of(BEGIN_TOKEN, txt, start)
+    if end == -1:
+        return txt[start:].strip()
+    txt = txt[start:end].strip()
+    end = index_of(SUMMARY_TOKEN, txt)
+    if end == -1:
+        return txt
+    else:
+        return txt[:end].strip()

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdc50ee66a8dba2a224c595a373a0b94f4ce9cfdc050a4847bf961594ee29cef
+size 510398013

server.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Run this file on server to return a Concentration Index (CI).
+# Analysis is in 'Util' folder.
+import base64
+import io
+import sys
+from flask import (Flask, Response, jsonify, json, make_response, render_template,
+                   request, send_file, send_from_directory)
+from flask_cors import CORS, cross_origin
+from model import summarize
+app = Flask(__name__)
+cors = CORS(app, resources={r'/*': {"origins": '*'}})
+app.config['CORS_HEADER'] = 'Content-Type'
+@app.route('/')
+def index():
+    return "Hello World!"
+@app.route('/summarize/', methods=['POST'])
+@cross_origin(origin='*', headers=['Content-Type'])
+def getSummary():
+    data = request.get_json(force=True)
+    summary = summarize(data['text'])
+    response = make_response({"summary": summary})
+    response.headers.set('Content-Type', 'application/json')
+    return response
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', debug=True, port=5000)

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d54fa4212c860b24a2f8df6215464f4a6dc98f5bfc673423e77c451d800edc0e
+size 3451