nguyenthanhdo commited on
Commit
b6f999e
1 Parent(s): 817d155

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +81 -0
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - vi
4
+ ---
5
+ You can check our model card here: [`llm4fun/vietrag-7b-v1.0`](https://huggingface.co/llm4fun/vietrag-7b-v1.0)
6
+ ```py
7
+ from transformers import GenerationConfig, TextStreamer
8
+ from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaConfig
9
+ import torch
10
+
11
+ question = "<your-question>"
12
+ context = "<your-context>"
13
+ instruction = 'You are an AI assistant. Provide a detailed answer so user don’t need to search outside to understand the answer.'
14
+ input = f"Dựa vào một số ngữ cảnh được cho dưới đây, trả lời câu hỏi ở cuối.\n\n{context}\n\nQuestion: {question}"
15
+ prompt_template = (
16
+ "### System:\n"
17
+ "Below is an instruction that describes a task, paired with an input that provides further context. "
18
+ "Write a response that appropriately completes the request.\n\n\n\n"
19
+ "### Instruction:\n{instruction}\n\n"
20
+ "### Input:\n{input}\n\n"
21
+ "### Response:\n{output}"
22
+ )
23
+ prompt = prompt_template.format(instruction=instruction, input=input, output='')
24
+
25
+ torch_dtype = torch.bfloat16
26
+ model_id = "llm4fun/vietrag-7b-v1.0"
27
+ device = "cuda"
28
+
29
+ tokenizer = LlamaTokenizer.from_pretrained(model_id)
30
+ model = LlamaForCausalLM.from_pretrained(
31
+ model_id,
32
+ config=LlamaConfig.from_pretrained(model_id),
33
+ torch_dtype=torch_dtype
34
+ )
35
+
36
+ model = model.eval().to(device)
37
+
38
+ def generate(prompt, max_new_tokens=1024):
39
+ input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(model.device)
40
+ model.eval()
41
+ with torch.no_grad():
42
+ generation_config = GenerationConfig(
43
+ repetition_penalty=1.13,
44
+ max_new_tokens=max_new_tokens,
45
+ # temperature=0.2,
46
+ # top_p=0.95,
47
+ # top_k=20,
48
+ # bos_token_id=tokenizer.bos_token_id,
49
+ # eos_token_id=tokenizer.eos_token_id,
50
+ # eos_token_id=0, # for open-end generation.
51
+ pad_token_id=tokenizer.pad_token_id,
52
+ do_sample=False,
53
+ use_cache=True,
54
+ return_dict_in_generate=True,
55
+ output_attentions=False,
56
+ output_hidden_states=False,
57
+ output_scores=False,
58
+ )
59
+ streamer = TextStreamer(tokenizer, skip_prompt=True)
60
+ generated = model.generate(
61
+ inputs=input_ids,
62
+ generation_config=generation_config,
63
+ streamer=streamer,
64
+ )
65
+
66
+ gen_tokens = generated["sequences"].cpu()[:, len(input_ids[0]):]
67
+ output = tokenizer.batch_decode(gen_tokens)[0]
68
+ output = output.split(tokenizer.eos_token)[0]
69
+ return output.strip()
70
+
71
+ output = generate(prompt)
72
+ ```
73
+ To tweak the model's answering style, feel free to replace the `instruction` part of the prompt. I reccommend you select one of these following instructions, because they are used during training.
74
+ ```py
75
+ instructions = [
76
+ 'You are an AI assistant. Provide a detailed answer so user don’t need to search outside to understand the answer.',
77
+ 'You are an AI assistant. You will be given a task. You must generate a detailed and long answer.',
78
+ 'You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.',
79
+ 'You are an smart assistant. Provide a direct, short and exact answer to the following question from its provided context.'
80
+ ]
81
+ ```