weiren119 commited on
Commit
22fcb6d
1 Parent(s): a07ca1a

Create model card

Browse files
Files changed (1) hide show
  1. README.md +88 -0
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: apache-2.0
4
+ tags:
5
+ - llama2
6
+ - qLoRa
7
+ - traditional_chinese
8
+ - alpaca
9
+ ---
10
+ ## Finetuned dataset
11
+ - NTU NLP Lab's translated alapaca-tw_en dataset: alpaca-tw_en-align.json: [ntunpllab](https://github.com/ntunlplab/traditional-chinese-alpaca) translate Stanford Alpaca 52k dataset
12
+
13
+ ## Use which pretrained model
14
+ - NousResearch: https://huggingface.co/NousResearch/Llama-2-7b-chat-hf
15
+
16
+ ## Training procedure
17
+
18
+ The following `bitsandbytes` quantization config was used during training:
19
+ - load_in_8bit: False
20
+ - load_in_4bit: True
21
+ - llm_int8_threshold: 6.0
22
+ - llm_int8_skip_modules: None
23
+ - llm_int8_enable_fp32_cpu_offload: False
24
+ - llm_int8_has_fp16_weight: False
25
+ - bnb_4bit_quant_type: nf4
26
+ - bnb_4bit_use_double_quant: True
27
+ - bnb_4bit_compute_dtype: bfloat16
28
+ ### Framework versions
29
+
30
+
31
+ - PEFT 0.4.0
32
+
33
+ ## Usage
34
+ ### Installation dependencies
35
+ ```
36
+ $pip install transformers torch peft
37
+ ```
38
+ #### Run the inference
39
+ ```
40
+ import transformers
41
+ import torch
42
+ from transformers import AutoTokenizer, TextStreamer
43
+
44
+ # Use the same tokenizer from the source model
45
+ model_id="weiren119/traditional_chinese_qlora_llama2_merged"
46
+ tokenizer = AutoTokenizer.from_pretrained(original_model_path, use_fast=False)
47
+
48
+ # Load fine-tuned model, you can replace this with your own model
49
+ model = AutoPeftModelForCausalLM.from_pretrained(
50
+ model_id,
51
+ load_in_4bit=model_id.endswith("4bit"),
52
+ torch_dtype=torch.float16,
53
+ device_map='auto'
54
+ )
55
+
56
+ system_prompt = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
57
+
58
+ If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
59
+
60
+
61
+
62
+
63
+ def get_prompt(message: str, chat_history: list[tuple[str, str]]) -> str:
64
+ texts = [f'[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
65
+ for user_input, response in chat_history:
66
+ texts.append(f'{user_input.strip()} [/INST] {response.strip()} </s><s> [INST] ')
67
+ texts.append(f'{message.strip()} [/INST]')
68
+ return ''.join(texts)
69
+
70
+
71
+ print ("="*100)
72
+ print ("-"*80)
73
+ print ("Have a try!")
74
+
75
+ s = ''
76
+ chat_history = []
77
+ while True:
78
+ s = input("User: ")
79
+ if s != '':
80
+ prompt = get_prompt(s, chat_history)
81
+ print ('Answer:')
82
+ tokens = tokenizer(prompt, return_tensors='pt').input_ids
83
+ #generate_ids = model.generate(tokens.cuda(), max_new_tokens=4096, streamer=streamer)
84
+ generate_ids = model.generate(input_ids=tokens.cuda(), max_new_tokens=4096, streamer=streamer)
85
+ output = tokenizer.decode(generate_ids[0, len(tokens[0]):-1]).strip()
86
+ chat_history.append([s, output])
87
+ print ('-'*80)
88
+ ```