sarahlintang commited on
Commit
eb50516
1 Parent(s): f33e323

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -1
README.md CHANGED
@@ -5,4 +5,68 @@ language:
5
  tags:
6
  - mistral
7
  - text-generation-inference
8
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  tags:
6
  - mistral
7
  - text-generation-inference
8
+ ---
9
+ ### mistral-indo-7b
10
+
11
+ [Mistral 7b v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) fine-tuned on [Indonesian's instructions dataset](https://huggingface.co/datasets/sarahlintang/Alpaca_indo_instruct).
12
+
13
+
14
+ ### Prompt template:
15
+ ```
16
+ ### Human: {Instruction}### Assistant: {response}
17
+ ```
18
+
19
+ ### Example of Usage
20
+ ```
21
+ import torch
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoTokenizer, GenerationConfig
23
+
24
+ model_id = "sarahlintang/mistral-indo-7b"
25
+
26
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda")
27
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
28
+
29
+ def create_instruction(instruction):
30
+ prompt = f"### Human: {instruction} ### Assistant: "
31
+ return prompt
32
+
33
+
34
+ def generate(
35
+ instruction,
36
+ max_new_tokens=128,
37
+ temperature=0.1,
38
+ top_p=0.75,
39
+ top_k=40,
40
+ num_beams=4,
41
+ **kwargs
42
+ ):
43
+
44
+ prompt = create_instruction(instruction)
45
+ inputs = tokenizer(prompt, return_tensors="pt")
46
+ input_ids = inputs["input_ids"].to("cuda")
47
+ attention_mask = inputs["attention_mask"].to("cuda")
48
+ generation_config = GenerationConfig(
49
+ temperature=temperature,
50
+ top_p=top_p,
51
+ top_k=top_k,
52
+ num_beams=num_beams,
53
+ **kwargs,
54
+ )
55
+ with torch.no_grad():
56
+ generation_output = model.generate(
57
+ input_ids=input_ids,
58
+ attention_mask=attention_mask,
59
+ generation_config=generation_config,
60
+ return_dict_in_generate=True,
61
+ output_scores=True,
62
+ max_new_tokens=max_new_tokens,
63
+ early_stopping=True
64
+ )
65
+ s = generation_output.sequences[0]
66
+ output = tokenizer.decode(s)
67
+ return output.split("### Assistant:")[1].strip()
68
+
69
+ instruction = "Sebutkan lima macam makanan khas Indonesia."
70
+ print(generate(instruction))
71
+
72
+ ```