ldhldh commited on
Commit
b64f859
1 Parent(s): 46893aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -1,25 +1,22 @@
1
- import subprocess
2
  from threading import Thread
3
  from llama_cpp import Llama
4
  import torch
5
  import gradio as gr
6
  import re
7
 
8
- subprocess.call(f"""CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir
9
- """, shell=True)
10
 
11
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
12
  print("Running on device:", torch_device)
13
  print("CPU threads:", torch.get_num_threads())
14
 
15
  llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
16
- n_ctx=50,
17
  n_threads = 8,
18
  n_batch = 5
19
  )
20
 
21
  def gen(x, max_new_tokens):
22
- output = llm(f"### 명령어:\n{x}\n\n### 응답:", max_tokens=max_new_tokens, stop=["###", "\n", ":"], echo=True)
23
 
24
  return output['choices'][0]['text'].replace('▁',' ')
25
 
 
 
1
  from threading import Thread
2
  from llama_cpp import Llama
3
  import torch
4
  import gradio as gr
5
  import re
6
 
 
 
7
 
8
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
9
  print("Running on device:", torch_device)
10
  print("CPU threads:", torch.get_num_threads())
11
 
12
  llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
13
+ n_ctx=40,
14
  n_threads = 8,
15
  n_batch = 5
16
  )
17
 
18
  def gen(x, max_new_tokens):
19
+ output = llm(f"### 명령어:\n{x}\n\n### 응답:", max_tokens=max_new_tokens, stop=["###"], echo=True)
20
 
21
  return output['choices'][0]['text'].replace('▁',' ')
22