jason9693 commited on
Commit
34c0980
โ€ข
1 Parent(s): 090c0f5

modified model path

Browse files
Files changed (1) hide show
  1. app.py +16 -12
app.py CHANGED
@@ -4,19 +4,26 @@ import os
4
  import torch
5
 
6
  theme = "darkgrass"
7
- title = "GPT-NeoX(Korean) Demo"
8
- model_name = "EleutherAI/gpt-neox-ko-1.3b"
 
 
 
 
 
 
 
9
 
10
- description = "GPT-NeoX ํ•œ๊ตญ์–ด ๋ชจ๋ธ์„ ์‹œ์—ฐํ•˜๋Š” ๋ฐ๋ชจํŽ˜์ด์ง€ ์ž…๋‹ˆ๋‹ค."
11
- # article = "<p style='text-align: center'><a href='https://github.com/kingoflolz/mesh-transformer-jax' target='_blank'>GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model</a></p>"
12
  examples = [
13
- ["์ธ๊ฐ„์ฒ˜๋Ÿผ ์ƒ๊ฐํ•˜๊ณ , ํ–‰๋™ํ•˜๋Š” '์ง€๋Šฅ'์„ ํ†ตํ•ด"],
14
- ["์งˆ๋ฌธ: ์ผ๋ณธ์˜ ์•ž์œผ๋กœ์˜ 100๋…„์€ ์–ด๋–ป๊ฒŒ ๋ ๊นŒ์š”? \n๋‹ต๋ณ€:"],
15
  ["2040๋…„ ๋ฏธ๊ตญ์€, "]
16
  ]
17
- tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-ko-1.3b-release", use_auth_token=os.environ['TOKEN'])
18
  model = AutoModelForCausalLM.from_pretrained(
19
- "EleutherAI/gpt-neox-ko-1.3b-release", use_auth_token=os.environ['TOKEN']
20
  )
21
  model.eval()
22
 
@@ -30,10 +37,7 @@ def predict(text):
30
  tokens, do_sample=True, temperature=0.8, max_new_tokens=64, top_k=50, top_p=0.8,
31
  no_repeat_ngram_size=3, repetition_penalty=1.2,
32
  bad_words_ids=[
33
- tokenizer.encode('...'),
34
- tokenizer.encode('....'),
35
- tokenizer.encode('(์ค‘๋žต)'),
36
- tokenizer.encode('http')
37
  ],
38
  eos_token_id=tokenizer.eos_token_id,
39
  pad_token_id=tokenizer.pad_token_id
 
4
  import torch
5
 
6
  theme = "darkgrass"
7
+ title = "Polyglot(Korean) Demo"
8
+ model_name = "EleutherAI/polyglot-ko-1.3b"
9
+
10
+ bad_words = [
11
+ '...',
12
+ '....',
13
+ '(์ค‘๋žต)',
14
+ 'http'
15
+ ]
16
 
17
+ description = "polyglot (1.3B ํŒŒ๋ผ๋ฏธํ„ฐ ์‚ฌ์ด์ฆˆ) ํ•œ๊ตญ์–ด ๋ชจ๋ธ์„ ์‹œ์—ฐํ•˜๋Š” ๋ฐ๋ชจํŽ˜์ด์ง€ ์ž…๋‹ˆ๋‹ค."
18
+ article = "<p style='text-align: center'><a href='https://github.com/EleutherAI/polyglot' target='_blank'>Polyglot: Large Language Models of Well-balanced Competence in Multi-languages</a></p>"
19
  examples = [
20
+ ["CPU์™€ GPU์˜ ์ฐจ์ด๋Š”,"],
21
+ ["์งˆ๋ฌธ: ์šฐํฌ๋ผ์ด๋‚˜ ์ „์Ÿ์ด ์„ธ๊ณ„3์ฐจ๋Œ€์ „์œผ๋กœ ํ™•์ „์ด ๋ ๊นŒ์š”? \n๋‹ต๋ณ€:"],
22
  ["2040๋…„ ๋ฏธ๊ตญ์€, "]
23
  ]
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
  model = AutoModelForCausalLM.from_pretrained(
26
+ model_name
27
  )
28
  model.eval()
29
 
 
37
  tokens, do_sample=True, temperature=0.8, max_new_tokens=64, top_k=50, top_p=0.8,
38
  no_repeat_ngram_size=3, repetition_penalty=1.2,
39
  bad_words_ids=[
40
+ tokenizer.encode(bad_word) for bad_word in bad_words
 
 
 
41
  ],
42
  eos_token_id=tokenizer.eos_token_id,
43
  pad_token_id=tokenizer.pad_token_id