gorkemgoknar commited on
Commit
72160fc
1 Parent(s): 99259d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from ctransformers import AutoModelForCausalLM
2
  import re, requests, json
3
  import gradio as gr
4
  import random
@@ -23,14 +23,21 @@ MAX_NEW_TOKENS = 25
23
  GPU_LAYERS = 0
24
  STOP_LIST=["###","##"]
25
 
26
- stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=64)])
27
 
 
 
 
28
 
29
- llm = AutoModelForCausalLM.from_pretrained("gorkemgoknar/llama2-7f-moviechatbot-ggml-q4",
30
- model_type='llama',
31
- gpu_layers=GPU_LAYERS,
32
- max_new_tokens=MAX_NEW_TOKENS,
33
- stop=STOP_LIST)
 
 
 
 
34
 
35
 
36
 
@@ -79,7 +86,9 @@ def get_audio_url(text,character):
79
 
80
  def get_response_cpp(prompt):
81
 
82
- response_text= llm(prompt)
 
 
83
 
84
  return response_text
85
 
 
1
+ #from ctransformers import AutoModelForCausalLM
2
  import re, requests, json
3
  import gradio as gr
4
  import random
 
23
  GPU_LAYERS = 0
24
  STOP_LIST=["###","##"]
25
 
26
+ #stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=64)])
27
 
28
+ from huggingface_hub import hf_hub_download
29
+ hf_hub_download(repo_id="gorkemgoknar/llama2-7f-moviechatbot-ggml-q4", local_dir=".", filename="llama2-7f-fp16-ggml-q4.bin")
30
+ model_path="./llama2-7f-fp16-ggml-q4.bin"
31
 
32
+ llm = Llama(model_path=model_path,n_gpu_layers=0)
33
+
34
+
35
+ # to use with ctransfomers
36
+ #llm = AutoModelForCausalLM.from_pretrained("gorkemgoknar/llama2-7f-moviechatbot-ggml-q4",
37
+ # model_type='llama',
38
+ # gpu_layers=GPU_LAYERS,
39
+ # max_new_tokens=MAX_NEW_TOKENS,
40
+ # stop=STOP_LIST)
41
 
42
 
43
 
 
86
 
87
  def get_response_cpp(prompt):
88
 
89
+ #response_text= llm(prompt)
90
+ output = llm("### Context: talks friendly### History: ### Morpheus: I challenge you to battle of words!### Gerald:", max_tokens=32, stop=["#","sierpeda"], echo=True)
91
+ response_Text= output["choices"][0]["text"]
92
 
93
  return response_text
94