sh2orc commited on
Commit
3df13c8
β€’
1 Parent(s): 57ca967

READMD.md modify

Browse files
Files changed (1) hide show
  1. README.md +59 -0
README.md CHANGED
@@ -1,3 +1,62 @@
1
  ---
2
  license: llama3
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: llama3
3
  ---
4
+
5
+ - Foundation Model [Bllossom 8B](https://huggingface.co/MLP-KTLim/llama-3-Korean-Bllossom-8B)
6
+ - datasets
7
+ - [Koalpaca v1.1a](https://huggingface.co/datasets/beomi/KoAlpaca-v1.1a)
8
+ - [jojo0217/korean_safe_conversation](https://huggingface.co/datasets/jojo0217/korean_safe_conversation)
9
+
10
+ # Query
11
+ ```python
12
+
13
+ import torch
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
15
+
16
+ BASE_MODEL = "sh2orc/llama-3-korean-8b"
17
+
18
+ model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,device_map="auto")
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
21
+ tokenizer.pad_token = tokenizer.eos_token
22
+ tokenizer.padding_side = 'right'
23
+
24
+ instruction = "ν•œκ°•μ—λŠ” λŒ€κ΅κ°€ λͺ‡ 개 μžˆμ–΄?"
25
+
26
+ pipe = pipeline("text-generation",
27
+ model=model,
28
+ tokenizer=tokenizer,
29
+ max_new_tokens=1024)
30
+
31
+ messages = [
32
+ {"role": "user", "content": instruction},
33
+ ]
34
+
35
+ prompt = pipe.tokenizer.apply_chat_template(
36
+ messages,
37
+ tokenize=False,
38
+ add_generation_prompt=True
39
+ )
40
+
41
+ outputs = pipe(
42
+ prompt,
43
+ do_sample=True,
44
+ temperature=0.8,
45
+ top_k=10,
46
+ top_p=0.9,
47
+ add_special_tokens=True,
48
+ eos_token_id = [
49
+ pipe.tokenizer.eos_token_id,
50
+ pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
51
+ ]
52
+ )
53
+
54
+ print(outputs[0]['generated_text'][len(prompt):])
55
+ ```
56
+
57
+ # Result
58
+ <pre>
59
+
60
+ ν•œκ°•μ—λŠ” 총 8개의 닀리(ꡐ)κ°€ μžˆμŠ΅λ‹ˆλ‹€. κ·Έ 쀑 3κ°œλŠ” 뢁μͺ½μœΌλ‘œ ν–₯ν•΄ 있고, λ‚˜λ¨Έμ§€ 5κ°œλŠ” 남μͺ½μœΌλ‘œ ν–₯ν•΄ μžˆμŠ΅λ‹ˆλ‹€.
61
+
62
+ </pre>