Spaces:

Sanjar
/

text_classification

Runtime error

App Files Files Community

Sanjar commited on Oct 11, 2022

Commit

35c1c66

•

1 Parent(s): b9de727

commit

Browse files

Files changed (10) hide show

.gitattributes +1 -0
app.py +43 -0
onnx/config.json +38 -0
onnx/model_quantized.onnx +3 -0
onnx/ort_config.json +30 -0
onnx/special_tokens_map.json +7 -0
onnx/tokenizer.json +0 -0
onnx/tokenizer_config.json +16 -0
onnx/vocab.txt +0 -0
requirements.txt +78 -0

.gitattributes CHANGED Viewed

@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+.fileextension filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import streamlit as st
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoTokenizer
+from transformers import TextClassificationPipeline
+from transformers import pipeline
+from optimum.onnxruntime import ORTModelForSequenceClassification
+from transformers import pipeline, AutoTokenizer
+from transformers import pipeline
+from pathlib import Path
+# load_model = AutoModelForSequenceClassification.from_pretrained("onnx")
+# load_tokenizer = AutoTokenizer.from_pretrained("onnx")
+# st.write("Airi.uz jamoasi amaliyotchilari tomonidan tayyorlangan text classification uchun mo'ljallangan model")
+# st.write("Ishlatish uchun pastdagi maydonga matn kiriting va model sizga kiritilgan matnni qaysi sohaga aloqador ekanligini ko'rsatadi")
+# input = st.text_area(label='input_areaf',placeholder='matnni shu yerga kiriting',height=350,max_chars = 5000)
+# try:
+#       if st.button(label='bashorat qilish'):
+#             my_pipeline  = pipeline("text-classification", model=load_model, tokenizer=load_tokenizer)
+#             data = input
+#             st.info(my_pipeline(data))
+# except RuntimeError:
+#       st.info("Iltimos kamroq malumot kiriting")
+onnx_path = Path("onnx")
+model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="model_quantized.onnx")
+tokenizer = AutoTokenizer.from_pretrained(onnx_path)
+st.write("Airi.uz jamoasi amaliyotchilari tomonidan tayyorlangan text classification uchun mo'ljallangan model")
+st.write("Ishlatish uchun pastdagi maydonga matn kiriting va model sizga kiritilgan matnni qaysi sohaga aloqador ekanligini ko'rsatadi")
+input = st.text_area(label='input_areaf',placeholder='matnni shu yerga kiriting',height=350,max_chars = 5000)
+try:
+      if st.button(label='bashorat qilish'):
+            cls_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
+            data = input
+            st.info(cls_pipeline(data))
+except RuntimeError:
+      st.info("Iltimos kamroq malumot kiriting")
+# results = cls_pipeline("Men rossiyaliklarga shuni aytmoqchimanki, butun sivilizatsiyalashgan dunyo biz terrorchi emasligimiz")
+# print(results)

onnx/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_or_path": "/home/airi/pip_kun_uz/Kun_uz_classification",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Fan va Texnika",
+    "1": "Jahon",
+    "2": "Jamiyat",
+    "3": "O'zbekiston"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Fan va Texnika": 0,
+    "Jahon": 1,
+    "Jamiyat": 2,
+    "O'zbekiston": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.22.2",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 30522
+}

onnx/model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83fe76c3bef7e3938a460f6bc5f875149080d374a7e77d69dc6e7e7767f15293
+size 181572857

onnx/ort_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_commit_hash": null,
+  "opset": null,
+  "optimization": {},
+  "optimum_version": "1.4.0",
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QOperator",
+    "is_static": false,
+    "mode": "IntegerOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "MatMul",
+      "Add"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "transformers_version": "4.22.2",
+  "use_external_data_format": false
+}

onnx/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

onnx/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "name_or_path": "/home/airi/pip_kun_uz/Kun_uz_classification",
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

onnx/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,78 @@

+aiohttp==3.8.3
+aiosignal==1.2.0
+altair==4.2.0
+async-timeout==4.0.2
+attrs==22.1.0
+blinker==1.5
+cachetools==5.2.0
+certifi==2022.9.24
+charset-normalizer==2.1.1
+click==8.1.3
+colorama==0.4.5
+coloredlogs==15.0.1
+commonmark==0.9.1
+datasets==2.5.2
+decorator==5.1.1
+dill==0.3.5.1
+entrypoints==0.4
+filelock==3.8.0
+flatbuffers==22.9.24
+frozenlist==1.3.1
+fsspec==2022.8.2
+gitdb==4.0.9
+GitPython==3.1.28
+huggingface-hub==0.10.0
+humanfriendly==10.0
+idna==3.4
+importlib-metadata==5.0.0
+Jinja2==3.1.2
+jsonschema==4.16.0
+MarkupSafe==2.1.1
+mpmath==1.2.1
+multidict==6.0.2
+multiprocess==0.70.13
+numpy==1.23.3
+onnx==1.12.0
+onnxruntime==1.12.1
+optimum==1.4.0
+packaging==21.3
+pandas==1.5.0
+Pillow==9.2.0
+protobuf==3.20.1
+pyarrow==9.0.0
+pydeck==0.8.0b3
+Pygments==2.13.0
+Pympler==1.0.1
+pyparsing==3.0.9
+pyrsistent==0.18.1
+python-dateutil==2.8.2
+pytz==2022.4
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+regex==2022.9.13
+requests==2.28.1
+responses==0.18.0
+rich==12.6.0
+semver==2.13.0
+sentencepiece==0.1.97
+six==1.16.0
+smmap==5.0.0
+streamlit==1.13.0
+sympy==1.11.1
+tokenizers==0.12.1
+toml==0.10.2
+toolz==0.12.0
+torch==1.12.1
+tornado==6.2
+tqdm==4.64.1
+transformers==4.22.2
+typing_extensions==4.4.0
+tzdata==2022.4
+tzlocal==4.2
+urllib3==1.26.12
+validators==0.20.0
+watchdog==2.1.9
+xxhash==3.0.0
+yarl==1.8.1
+zipp==3.9.0