Sanjar commited on
Commit
35c1c66
1 Parent(s): b9de727
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ .fileextension filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForSequenceClassification
3
+ from transformers import AutoTokenizer
4
+ from transformers import TextClassificationPipeline
5
+ from transformers import pipeline
6
+
7
+ from optimum.onnxruntime import ORTModelForSequenceClassification
8
+ from transformers import pipeline, AutoTokenizer
9
+ from transformers import pipeline
10
+ from pathlib import Path
11
+
12
+ # load_model = AutoModelForSequenceClassification.from_pretrained("onnx")
13
+
14
+ # load_tokenizer = AutoTokenizer.from_pretrained("onnx")
15
+ # st.write("Airi.uz jamoasi amaliyotchilari tomonidan tayyorlangan text classification uchun mo'ljallangan model")
16
+ # st.write("Ishlatish uchun pastdagi maydonga matn kiriting va model sizga kiritilgan matnni qaysi sohaga aloqador ekanligini ko'rsatadi")
17
+ # input = st.text_area(label='input_areaf',placeholder='matnni shu yerga kiriting',height=350,max_chars = 5000)
18
+ # try:
19
+ # if st.button(label='bashorat qilish'):
20
+ # my_pipeline = pipeline("text-classification", model=load_model, tokenizer=load_tokenizer)
21
+ # data = input
22
+ # st.info(my_pipeline(data))
23
+ # except RuntimeError:
24
+ # st.info("Iltimos kamroq malumot kiriting")
25
+ onnx_path = Path("onnx")
26
+ model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="model_quantized.onnx")
27
+ tokenizer = AutoTokenizer.from_pretrained(onnx_path)
28
+
29
+ st.write("Airi.uz jamoasi amaliyotchilari tomonidan tayyorlangan text classification uchun mo'ljallangan model")
30
+ st.write("Ishlatish uchun pastdagi maydonga matn kiriting va model sizga kiritilgan matnni qaysi sohaga aloqador ekanligini ko'rsatadi")
31
+ input = st.text_area(label='input_areaf',placeholder='matnni shu yerga kiriting',height=350,max_chars = 5000)
32
+ try:
33
+ if st.button(label='bashorat qilish'):
34
+ cls_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
35
+ data = input
36
+ st.info(cls_pipeline(data))
37
+ except RuntimeError:
38
+ st.info("Iltimos kamroq malumot kiriting")
39
+
40
+
41
+
42
+ # results = cls_pipeline("Men rossiyaliklarga shuni aytmoqchimanki, butun sivilizatsiyalashgan dunyo biz terrorchi emasligimiz")
43
+ # print(results)
onnx/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/airi/pip_kun_uz/Kun_uz_classification",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Fan va Texnika",
14
+ "1": "Jahon",
15
+ "2": "Jamiyat",
16
+ "3": "O'zbekiston"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "label2id": {
21
+ "Fan va Texnika": 0,
22
+ "Jahon": 1,
23
+ "Jamiyat": 2,
24
+ "O'zbekiston": 3
25
+ },
26
+ "layer_norm_eps": 1e-12,
27
+ "max_position_embeddings": 512,
28
+ "num_attention_heads": 12,
29
+ "num_hidden_layers": 12,
30
+ "pad_token_id": 0,
31
+ "position_embedding_type": "absolute",
32
+ "problem_type": "single_label_classification",
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.22.2",
35
+ "type_vocab_size": 2,
36
+ "use_cache": false,
37
+ "vocab_size": 30522
38
+ }
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83fe76c3bef7e3938a460f6bc5f875149080d374a7e77d69dc6e7e7767f15293
3
+ size 181572857
onnx/ort_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "opset": null,
4
+ "optimization": {},
5
+ "optimum_version": "1.4.0",
6
+ "quantization": {
7
+ "activations_dtype": "QUInt8",
8
+ "activations_symmetric": false,
9
+ "format": "QOperator",
10
+ "is_static": false,
11
+ "mode": "IntegerOps",
12
+ "nodes_to_exclude": [],
13
+ "nodes_to_quantize": [],
14
+ "operators_to_quantize": [
15
+ "MatMul",
16
+ "Add"
17
+ ],
18
+ "per_channel": false,
19
+ "qdq_add_pair_to_weight": false,
20
+ "qdq_dedicated_pair": false,
21
+ "qdq_op_type_per_channel_support_to_axis": {
22
+ "MatMul": 1
23
+ },
24
+ "reduce_range": false,
25
+ "weights_dtype": "QInt8",
26
+ "weights_symmetric": true
27
+ },
28
+ "transformers_version": "4.22.2",
29
+ "use_external_data_format": false
30
+ }
onnx/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
onnx/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
onnx/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "name_or_path": "/home/airi/pip_kun_uz/Kun_uz_classification",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": null,
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
onnx/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.3
2
+ aiosignal==1.2.0
3
+ altair==4.2.0
4
+ async-timeout==4.0.2
5
+ attrs==22.1.0
6
+ blinker==1.5
7
+ cachetools==5.2.0
8
+ certifi==2022.9.24
9
+ charset-normalizer==2.1.1
10
+ click==8.1.3
11
+ colorama==0.4.5
12
+ coloredlogs==15.0.1
13
+ commonmark==0.9.1
14
+ datasets==2.5.2
15
+ decorator==5.1.1
16
+ dill==0.3.5.1
17
+ entrypoints==0.4
18
+ filelock==3.8.0
19
+ flatbuffers==22.9.24
20
+ frozenlist==1.3.1
21
+ fsspec==2022.8.2
22
+ gitdb==4.0.9
23
+ GitPython==3.1.28
24
+ huggingface-hub==0.10.0
25
+ humanfriendly==10.0
26
+ idna==3.4
27
+ importlib-metadata==5.0.0
28
+ Jinja2==3.1.2
29
+ jsonschema==4.16.0
30
+ MarkupSafe==2.1.1
31
+ mpmath==1.2.1
32
+ multidict==6.0.2
33
+ multiprocess==0.70.13
34
+ numpy==1.23.3
35
+ onnx==1.12.0
36
+ onnxruntime==1.12.1
37
+ optimum==1.4.0
38
+ packaging==21.3
39
+ pandas==1.5.0
40
+ Pillow==9.2.0
41
+ protobuf==3.20.1
42
+ pyarrow==9.0.0
43
+ pydeck==0.8.0b3
44
+ Pygments==2.13.0
45
+ Pympler==1.0.1
46
+ pyparsing==3.0.9
47
+ pyrsistent==0.18.1
48
+ python-dateutil==2.8.2
49
+ pytz==2022.4
50
+ pytz-deprecation-shim==0.1.0.post0
51
+ PyYAML==6.0
52
+ regex==2022.9.13
53
+ requests==2.28.1
54
+ responses==0.18.0
55
+ rich==12.6.0
56
+ semver==2.13.0
57
+ sentencepiece==0.1.97
58
+ six==1.16.0
59
+ smmap==5.0.0
60
+ streamlit==1.13.0
61
+ sympy==1.11.1
62
+ tokenizers==0.12.1
63
+ toml==0.10.2
64
+ toolz==0.12.0
65
+ torch==1.12.1
66
+ tornado==6.2
67
+ tqdm==4.64.1
68
+ transformers==4.22.2
69
+ typing_extensions==4.4.0
70
+ tzdata==2022.4
71
+ tzlocal==4.2
72
+ urllib3==1.26.12
73
+ validators==0.20.0
74
+ watchdog==2.1.9
75
+ xxhash==3.0.0
76
+ yarl==1.8.1
77
+ zipp==3.9.0
78
+