afrideva commited on
Commit
b460d08
1 Parent(s): 3884aef

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +72 -0
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: BEE-spoke-data/Mixtral-GQA-400m-v2
3
+ inference: false
4
+ language:
5
+ - en
6
+ license: apache-2.0
7
+ model_creator: BEE-spoke-data
8
+ model_name: Mixtral-GQA-400m-v2
9
+ pipeline_tag: text-generation
10
+ quantized_by: afrideva
11
+ tags:
12
+ - gguf
13
+ - ggml
14
+ - quantized
15
+ - q2_k
16
+ - q3_k_m
17
+ - q4_k_m
18
+ - q5_k_m
19
+ - q6_k
20
+ - q8_0
21
+ ---
22
+ # BEE-spoke-data/Mixtral-GQA-400m-v2-GGUF
23
+
24
+ Quantized GGUF model files for [Mixtral-GQA-400m-v2](https://huggingface.co/BEE-spoke-data/Mixtral-GQA-400m-v2) from [BEE-spoke-data](https://huggingface.co/BEE-spoke-data)
25
+
26
+
27
+ | Name | Quant method | Size |
28
+ | ---- | ---- | ---- |
29
+ | [mixtral-gqa-400m-v2.fp16.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.fp16.gguf) | fp16 | 4.01 GB |
30
+ | [mixtral-gqa-400m-v2.q2_k.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.q2_k.gguf) | q2_k | 703.28 MB |
31
+ | [mixtral-gqa-400m-v2.q3_k_m.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.q3_k_m.gguf) | q3_k_m | 899.86 MB |
32
+ | [mixtral-gqa-400m-v2.q4_k_m.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.q4_k_m.gguf) | q4_k_m | 1.15 GB |
33
+ | [mixtral-gqa-400m-v2.q5_k_m.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.q5_k_m.gguf) | q5_k_m | 1.39 GB |
34
+ | [mixtral-gqa-400m-v2.q6_k.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.q6_k.gguf) | q6_k | 1.65 GB |
35
+ | [mixtral-gqa-400m-v2.q8_0.gguf](https://huggingface.co/afrideva/Mixtral-GQA-400m-v2-GGUF/resolve/main/mixtral-gqa-400m-v2.q8_0.gguf) | q8_0 | 2.13 GB |
36
+
37
+
38
+
39
+ ## Original Model Card:
40
+ # BEE-spoke-data/Mixtral-GQA-400m-v2
41
+
42
+
43
+
44
+
45
+ ## testing code
46
+
47
+ ```python
48
+ # !pip install -U -q transformers datasets accelerate sentencepiece
49
+ import pprint as pp
50
+ from transformers import pipeline
51
+
52
+ pipe = pipeline(
53
+ "text-generation",
54
+ model="BEE-spoke-data/Mixtral-GQA-400m-v2",
55
+ device_map="auto",
56
+ )
57
+ pipe.model.config.pad_token_id = pipe.model.config.eos_token_id
58
+
59
+ prompt = "My favorite movie is Godfather because"
60
+
61
+ res = pipe(
62
+ prompt,
63
+ max_new_tokens=256,
64
+ top_k=4,
65
+ penalty_alpha=0.6,
66
+ use_cache=True,
67
+ no_repeat_ngram_size=4,
68
+ repetition_penalty=1.1,
69
+ renormalize_logits=True,
70
+ )
71
+ pp.pprint(res[0])
72
+ ```