skytree
/

roberta-base-relu-quantized-sst5

Text Classification

Model card Files Files and versions

skytree commited on Jan 13

Commit

2b7f95c

·

verified ·

1 Parent(s): db27b32

Upload qann_model_arch.txt

Files changed (1) hide show

qann_model_arch.txt +60 -0

qann_model_arch.txt ADDED Viewed

	@@ -0,0 +1,60 @@

+RobertModel(
+  (bert): RobertaForSequenceClassification(
+    (roberta): RobertaModel(
+      (embeddings): RobertaEmbeddings(
+        (word_embeddings): Embedding(50265, 768, padding_idx=1)
+        (position_embeddings): Embedding(514, 768, padding_idx=1)
+        (token_type_embeddings): Embedding(1, 768)
+        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (encoder): RobertaEncoder(
+        (layer): ModuleList(
+          (0-11): 12 x RobertaLayer(
+            (attention): RobertaAttention(
+              (self): QRobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (query_quan): MyQuan(level=32, sym=True, pos_max=15.0, neg_min=-16.0, s=1.0)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (key_quan): MyQuan(level=32, sym=True, pos_max=15.0, neg_min=-16.0, s=1.0)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (value_quan): MyQuan(level=32, sym=True, pos_max=15.0, neg_min=-16.0, s=1.0)
+                (attn_quan): MyQuan(level=32, sym=False, pos_max=31.0, neg_min=0.0, s=1.0)
+                (after_attn_quan): MyQuan(level=32, sym=False, pos_max=31.0, neg_min=0.0, s=1.0)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Sequential(
+                  (0): Linear(in_features=768, out_features=768, bias=True)
+                  (1): MyQuan(level=32, sym=True, pos_max=15.0, neg_min=-16.0, s=1.0)
+                )
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+              (intermediate_act_fn): Sequential(
+                (0): MyQuan(level=32, sym=False, pos_max=31.0, neg_min=0.0, s=1.0)
+                (1): ReLU()
+              )
+            )
+            (output): RobertaOutput(
+              (dense): Sequential(
+                (0): Linear(in_features=3072, out_features=768, bias=True)
+                (1): MyQuan(level=32, sym=True, pos_max=15.0, neg_min=-16.0, s=1.0)
+              )
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+        )
+      )
+    )
+    (classifier): RobertaClassificationHead(
+      (dense): Linear(in_features=768, out_features=768, bias=True)
+      (dropout): Dropout(p=0.1, inplace=False)
+      (out_proj): Linear(in_features=768, out_features=5, bias=True)
+    )
+  )
+)