Roger Condori commited on
Commit
a263964
1 Parent(s): 61c3f19

add limt in HF app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -77,7 +77,9 @@ def convert():
77
  data_docs += f"<hr><h3 style='color:red;'>{pg}</h2><p>{txt}</p><p>{sc}</p>"
78
  return data_docs
79
 
80
-
 
 
81
 
82
  with gr.Blocks(theme=theme, css=css) as demo:
83
  with gr.Tab("Chat"):
@@ -93,7 +95,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
93
  sou = gr.HTML("")
94
 
95
  with gr.Tab("Chat Options"):
96
- max_docs = gr.inputs.Slider(1, 10, default=3, label="Maximum querys to the DB.", step=1)
97
  row_table = gr.HTML("<hr><h4> </h2>")
98
  clear_button = gr.Button("CLEAR CHAT HISTORY", )
99
  link_output = gr.HTML("")
@@ -105,7 +107,7 @@ with gr.Blocks(theme=theme, css=css) as demo:
105
  gr.HTML("<h3>Only models from the GGML library are accepted.</h3>")
106
  repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
107
  file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q2_K.bin")
108
- max_tokens = gr.inputs.Slider(1, 2048, default=16, label="Max new tokens", step=1)
109
  temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
110
  top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
111
  top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
@@ -118,9 +120,12 @@ with gr.Blocks(theme=theme, css=css) as demo:
118
  msg.submit(predict,[msg, chatbot, max_docs],[msg, chatbot]).then(convert,[],[sou])
119
 
120
  change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify])
121
- falcon_button.click(dc.default_falcon_model, [], [model_verify])
122
 
 
123
 
124
- DEMO = os.getenv("DEMO")
125
- print(DEMO)
 
 
 
126
  demo.launch(enable_queue=True)
 
77
  data_docs += f"<hr><h3 style='color:red;'>{pg}</h2><p>{txt}</p><p>{sc}</p>"
78
  return data_docs
79
 
80
+ # Max values in generation
81
+ DOC_DB_LIMIT = 10
82
+ MAX_NEW_TOKENS = 2048
83
 
84
  with gr.Blocks(theme=theme, css=css) as demo:
85
  with gr.Tab("Chat"):
 
95
  sou = gr.HTML("")
96
 
97
  with gr.Tab("Chat Options"):
98
+ max_docs = gr.inputs.Slider(1, DOC_DB_LIMIT, default=3, label="Maximum querys to the DB.", step=1)
99
  row_table = gr.HTML("<hr><h4> </h2>")
100
  clear_button = gr.Button("CLEAR CHAT HISTORY", )
101
  link_output = gr.HTML("")
 
107
  gr.HTML("<h3>Only models from the GGML library are accepted.</h3>")
108
  repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
109
  file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q2_K.bin")
110
+ max_tokens = gr.inputs.Slider(1, MAX_NEW_TOKENS, default=16, label="Max new tokens", step=1)
111
  temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
112
  top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
113
  top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
 
120
  msg.submit(predict,[msg, chatbot, max_docs],[msg, chatbot]).then(convert,[],[sou])
121
 
122
  change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify])
 
123
 
124
+ falcon_button.click(dc.default_falcon_model, [], [model_verify])
125
 
126
+ # limit in HF, no need to set it
127
+ if "SET_LIMIT" == os.getenv("DEMO"):
128
+ DOC_DB_LIMIT = 4
129
+ MAX_NEW_TOKENS = 32
130
+
131
  demo.launch(enable_queue=True)