added model files

Browse files

Files changed (7) hide show

README.md +34 -0
config.json +60 -0
pytorch_model.bin +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +17 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,34 @@

+---
+language:
+- tr
+tags:
+- text  # Example: audio
+- text-classification  # Example: automatic-speech-recognition
+- news-category-classification  # Example: speech
+metrics:
+- accuracy  # Example: wer. Use metric id from https://hf.co/metrics
+- f1
+- precision
+- recall
+---
+## INTERPRESS TURKISH NEWS CATEGORY CLASSIFICATION MODEL - TEST - v0.1
+LABELS = {
+  0: 'spor',
+  1: 'is_ve_finans',
+  2: 'lifestyle',
+  3: 'eglence',
+  4: 'seyahat',
+  5: 'egitim',
+  6: 'bilim',
+  7: 'teknoloji',
+  8: 'kultur_sanat',
+  9: 'otomotiv',
+  10: 'politika',
+  11: 'endustri',
+  12: 'moda',
+  13: 'yemek'
+}
+ACC = 0.9128, F1 = 0.7142, PRECISION = 0.7355, RECALL = 0.06996

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+    "_name_or_path": "dbmdz/bert-base-turkish-cased",
+    "architectures": [
+        "BertForSequenceClassification"
+    ],
+    "attention_probs_dropout_prob": 0.1,
+    "classifier_dropout": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.1,
+    "hidden_size": 768,
+    "id2label": {
+        "0": "spor",
+        "1": "is_ve_finans",
+        "2": "lifestyle",
+        "3": "eglence",
+        "4": "seyahat",
+        "5": "egitim",
+        "6": "bilim",
+        "7": "teknoloji",
+        "8": "kultur_sanat",
+        "9": "otomotiv",
+        "10": "politika",
+        "11": "endustri",
+        "12": "moda",
+        "13": "yemek",
+        "14": "saglik"
+    },
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "label2id": {
+        "spor": 0,
+        "is_ve_finans": 1,
+        "politika": 10,
+        "endustri": 11,
+        "moda": 12,
+        "yemek": 13,
+        "saglik": 14,
+        "lifestyle": 2,
+        "eglence": 3,
+        "seyahat": 4,
+        "egitim": 5,
+        "bilim": 6,
+        "teknoloji": 7,
+        "kultur_sanat": 8,
+        "otomotiv": 9
+    },
+    "layer_norm_eps": 1e-12,
+    "max_position_embeddings": 512,
+    "model_type": "bert",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "pad_token_id": 0,
+    "position_embedding_type": "absolute",
+    "problem_type": "single_label_classification",
+    "torch_dtype": "float32",
+    "transformers_version": "4.25.1",
+    "type_vocab_size": 2,
+    "use_cache": true,
+    "vocab_size": 32000
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27f5517135d20afa6497a04503d1aa187a4ca9c3f50b402c3bfabc3c519026aa
+size 442588341

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "model_max_length": 512,
+  "name_or_path": "dbmdz/bert-base-turkish-cased",
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff