yodiaditya commited on
Commit
cdcdbf3
1 Parent(s): c5a0cfc
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - Local
4
+ license: bigscience-bloom-rail-1.0
5
+ language:
6
+ - id
7
+ pipeline_tag: text-generation
8
+ ---
9
+
10
+ # Table of Contents
11
+
12
+ 1. [Model Summary](#model-summary)
13
+ 2. [Use](#use)
14
+ 3. [Limitations](#limitations)
15
+ 4. [Training](#training)
16
+ 5. [Evaluation](#evaluation)
17
+ 7. [Citation](#citation)
18
+
19
+ # Model Summary
20
+
21
+ > We present KARINA, finetuned from BLOOMZ bigscience/bloomz-3b, a family of models capable of following human instructions in dozens of languages zero-shot. We finetune BLOOMZ pretrained multilingual language models on our crosslingual task mixture (xP3) and find the resulting models capable of crosslingual generalization to unseen tasks & languages.
22
+
23
+ # Use
24
+
25
+ ## Intended use
26
+
27
+ We recommend using the model to perform tasks expressed in natural language. For example, given the prompt "*prompt = f"Given the question:\n{{ siapa kamu? }}\n---\nAnswer:\n"*", the model will most likely answer "*Saya Karina. Ada yang bisa saya bantu?*".
28
+
29
+ ## How to use
30
+
31
+ ### CPU
32
+
33
+ <details>
34
+ <summary> Click to expand </summary>
35
+
36
+ ```python
37
+ # pip install -q transformers
38
+ from transformers import AutoModelForCausalLM, AutoTokenizer
39
+
40
+ checkpoint = "yodi/karina"
41
+
42
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
43
+ model = AutoModelForCausalLM.from_pretrained(checkpoint)
44
+
45
+ inputs = tokenizer.encode("Given the question:\n{{ siapa kamu? }}\n---\nAnswer:\n", return_tensors="pt")
46
+ outputs = model.generate(inputs)
47
+ print(tokenizer.decode(outputs[0]))
48
+ ```
49
+
50
+ </details>
51
+
52
+ ### GPU
53
+
54
+ <details>
55
+ <summary> Click to expand </summary>
56
+
57
+ ```python
58
+ # pip install -q transformers accelerate
59
+ from transformers import AutoModelForCausalLM, AutoTokenizer
60
+
61
+ checkpoint = "yodi/karina"
62
+
63
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
64
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
65
+
66
+ inputs = tokenizer.encode("Given the question:\n{{ siapa kamu? }}\n---\nAnswer:\n", return_tensors="pt").to("cuda")
67
+ outputs = model.generate(inputs)
68
+ print(tokenizer.decode(outputs[0]))
69
+ ```
70
+
71
+ </details>
72
+
73
+ ### GPU in 8bit
74
+
75
+ <details>
76
+ <summary> Click to expand </summary>
77
+
78
+ ```python
79
+ # pip install -q transformers accelerate bitsandbytes
80
+ from transformers import AutoModelForCausalLM, AutoTokenizer
81
+
82
+ checkpoint = "yodi/karina"
83
+
84
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
85
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", load_in_8bit=True)
86
+
87
+ inputs = tokenizer.encode("Given the question:\n{{ siapa kamu? }}\n---\nAnswer:\n", return_tensors="pt").to("cuda")
88
+ outputs = model.generate(inputs)
89
+ print(tokenizer.decode(outputs[0]))
90
+ ```
91
+
92
+ </details>
93
+
94
+ <!-- Necessary for whitespace -->
95
+ ###
96
+
97
+ # Limitations
98
+
99
+ **Prompt Engineering:** The performance may vary depending on the prompt and its following BLOOMZ models.
100
+
101
+ # Training
102
+
103
+ ## Model
104
+
105
+ - **Architecture:** Same as [bloom](https://huggingface.co/bigscience/bloom), also refer to the `config.json` file
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "outputs/model/",
3
  "apply_residual_connection_post_layernorm": false,
4
  "architectures": [
5
  "BloomForCausalLM"
@@ -25,7 +25,7 @@
25
  "skip_bias_add": true,
26
  "skip_bias_add_qkv": false,
27
  "slow_but_exact": false,
28
- "torch_dtype": "bfloat16",
29
  "transformers_version": "4.31.0",
30
  "unk_token_id": 0,
31
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "bigscience/bloomz-3b",
3
  "apply_residual_connection_post_layernorm": false,
4
  "architectures": [
5
  "BloomForCausalLM"
 
25
  "skip_bias_add": true,
26
  "skip_bias_add_qkv": false,
27
  "slow_but_exact": false,
28
+ "torch_dtype": "float16",
29
  "transformers_version": "4.31.0",
30
  "unk_token_id": 0,
31
  "use_cache": true,
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a208233d2ee8d8c83b23bc214df737c44806a1919f444e89b31e586cd956ba
3
+ size 14500471
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": false,
5
+ "eos_token": "</s>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "<pad>",
8
+ "tokenizer_class": "BloomTokenizer",
9
+ "unk_token": "<unk>"
10
+ }