peterpeter8585 commited on
Commit
a676ccb
·
verified ·
1 Parent(s): 6060c1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import inspect
2
  from tqdm import tqdm
3
- from langchain_huggingface import HuggingFacePipeline,ChatHuggingFace
4
  path_hf=inspect.getfile(HuggingFacePipeline)
5
  from subprocess import Popen, PIPE as P
6
  from langchain_experimental.tools.python.tool import PythonREPLTool as PYT
@@ -87,8 +86,21 @@ else:
87
  #m=M.from_pretrained("peterpeter8585/syai4.3")
88
  #t=T.from_pretrained("peterpeter8585/syai4.3")
89
  #pipe=pipeline(model=m,tokenizer=t,task="text-generation")
90
- from langchain_community.llms import GPT4ALL
91
- llm=GPT4ALL(model="./llama-3-open-ko-8b-instruct-preview-q5_k_m.gguf")
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  from langchain.retrievers import WikipediaRetriever as Wiki
93
  import gradio as gr
94
  chatbot = gr.Chatbot(
 
1
  import inspect
2
  from tqdm import tqdm
 
3
  path_hf=inspect.getfile(HuggingFacePipeline)
4
  from subprocess import Popen, PIPE as P
5
  from langchain_experimental.tools.python.tool import PythonREPLTool as PYT
 
86
  #m=M.from_pretrained("peterpeter8585/syai4.3")
87
  #t=T.from_pretrained("peterpeter8585/syai4.3")
88
  #pipe=pipeline(model=m,tokenizer=t,task="text-generation")
89
+ import multiprocessing
90
+
91
+ from langchain_community.chat_models import ChatLlamaCpp
92
+
93
+ llm = ChatLlamaCpp(
94
+ temperature=0,
95
+ model_path="./llama-3-open-ko-8b-instruct-preview-q5_k_m.gguf",
96
+ n_ctx=10000,
97
+ n_gpu_layers=8,
98
+ n_batch=300, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
99
+ max_tokens=512,
100
+ n_threads=multiprocessing.cpu_count() - 1,
101
+ repeat_penalty=1.5,
102
+ top_p=0.5,
103
+ )
104
  from langchain.retrievers import WikipediaRetriever as Wiki
105
  import gradio as gr
106
  chatbot = gr.Chatbot(