0xtaipoian
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -42,9 +42,14 @@ The comments on LIHKG also tend to be very short. Thus the model cannot generate
|
|
42 |
## How to use it?
|
43 |
You can run it on [Colab](https://colab.research.google.com/drive/1FgdwkkPcLzn_x1ohgzJCA1xZ4MTesC_8?usp=sharing) or anywhere you want based on the code:
|
44 |
```python
|
45 |
-
|
|
|
46 |
from peft import PeftModel, PeftMixedModel
|
|
|
|
|
47 |
|
|
|
|
|
48 |
|
49 |
model_name = "0xtaipoian/open-lilm"
|
50 |
|
@@ -54,16 +59,16 @@ bnb_config = BitsAndBytesConfig(
|
|
54 |
bnb_4bit_quant_type="nf4",
|
55 |
bnb_4bit_compute_dtype=torch.bfloat16
|
56 |
)
|
|
|
57 |
model = AutoModelForCausalLM.from_pretrained(
|
58 |
model_name,
|
59 |
torch_dtype=torch.bfloat16,
|
60 |
device_map='auto',
|
61 |
trust_remote_code=True,
|
62 |
quantization_config=bnb_config,
|
|
|
63 |
)
|
64 |
|
65 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
66 |
-
|
67 |
|
68 |
def chat(messages, temperature=0.9, max_new_tokens=200):
|
69 |
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to('cuda:0')
|
|
|
42 |
## How to use it?
|
43 |
You can run it on [Colab](https://colab.research.google.com/drive/1FgdwkkPcLzn_x1ohgzJCA1xZ4MTesC_8?usp=sharing) or anywhere you want based on the code:
|
44 |
```python
|
45 |
+
|
46 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer, GenerationConfig, pipeline
|
47 |
from peft import PeftModel, PeftMixedModel
|
48 |
+
import torch
|
49 |
+
import pprint
|
50 |
|
51 |
+
# enable torch CUDA tf32
|
52 |
+
torch.backends.cudnn.allow_tf32 = True
|
53 |
|
54 |
model_name = "0xtaipoian/open-lilm"
|
55 |
|
|
|
59 |
bnb_4bit_quant_type="nf4",
|
60 |
bnb_4bit_compute_dtype=torch.bfloat16
|
61 |
)
|
62 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
63 |
model = AutoModelForCausalLM.from_pretrained(
|
64 |
model_name,
|
65 |
torch_dtype=torch.bfloat16,
|
66 |
device_map='auto',
|
67 |
trust_remote_code=True,
|
68 |
quantization_config=bnb_config,
|
69 |
+
revision="main", #qlora-merged (qLoRA finetuned for 3 epochs) or main (full parameter finetune for 1 epoch)
|
70 |
)
|
71 |
|
|
|
|
|
72 |
|
73 |
def chat(messages, temperature=0.9, max_new_tokens=200):
|
74 |
input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to('cuda:0')
|