arlineka commited on
Commit
1621f99
1 Parent(s): 94315ce

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -18
README.md CHANGED
@@ -2,24 +2,37 @@
2
  license: apache-2.0
3
  ---
4
 
 
 
 
 
 
 
5
  ```
 
 
 
 
6
  import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
- import math
9
-
10
- model_path = "arlineka/manbasya_2x7b_MOE"
11
- tokenizer = AutoTokenizer.from_pretrained(model_path, use_default_system_prompt=False)
12
- model = AutoModelForCausalLM.from_pretrained(
13
- model_path, torch_dtype=torch.float32, device_map='auto',local_files_only=False, load_in_4bit=True
14
- )
15
- print(model)
16
- prompt = input("please input prompt:")
17
- while len(prompt) > 0:
18
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
19
-
20
- generation_output = model.generate(
21
- input_ids=input_ids, max_new_tokens=1024,repetition_penalty=1.2
22
- )
23
- print(tokenizer.decode(generation_output[0]))
24
- prompt = input("please input prompt:")
 
 
 
25
  ```
 
2
  license: apache-2.0
3
  ---
4
 
5
+ AWQ Quantized
6
+
7
+ ```
8
+ !pip install git+https://github.com/huggingface/transformers.git -q
9
+ !pip install huggingface_hub
10
+ !pip install autoawq -q
11
  ```
12
+
13
+ ```
14
+ from awq import AutoAWQForCausalLM
15
+ from transformers import AutoTokenizer
16
  import torch
17
+
18
+ # Assuming your model and tokenizer are loaded
19
+ model_name_or_path = "arlineka/manbasya_2x7b_MOE"
20
+ model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layer=True, trust_remote_code=False, safetensors=True)
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)
22
+
23
+ # Set device to CUDA if available
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+
26
+ # Move model to the device
27
+ model.to(device)
28
+
29
+ # Prepare your input text and move input tensors to the same device
30
+ input_text = "Hello. Input Here"
31
+ input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
32
+
33
+ # Now generate text with model and input tensors on the same device
34
+ output = model.generate(input_ids, max_new_tokens=2048) # Example usage, adjust as necessary
35
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
36
+
37
+ print(generated_text)
38
  ```