eduardo-alvarez commited on
Commit
6af4a5e
β€’
1 Parent(s): fee1876

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -32
app.py CHANGED
@@ -52,39 +52,49 @@ with demo:
52
  #chat_model_selection = chat_model_dropdown.value
53
  chat_model_selection = 'Intel/neural-chat-7b-v1-1'
54
 
55
- #def slow_echo(message, history):
56
- # for i in range(len(message)):
57
- # time.sleep(0.3)
58
- # yield "You typed: " + message[: i+1]
59
- #
60
- #gr.ChatInterface(slow_echo).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- #def call_api_and_stream_response(query, chat_model):
63
- # """
64
- # Call the API endpoint and yield characters as they are received.
65
- # This function simulates streaming by yielding characters one by one.
66
- # """
67
- # url = inference_endpoint_url
68
- # params = {"query": query,"selected_model":chat_model}
69
- # with requests.get(url, json=params, stream=True) as r:
70
- # for chunk in r.iter_content(chunk_size=1):
71
- # if chunk:
72
- # yield chunk.decode()
73
- #def get_response(query, history):
74
- # """
75
- # Wrapper function to call the streaming API and compile the response.
76
- # """
77
- # response = ''
78
- #
79
- # global chat_model_selection
80
- #
81
- # for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
82
- # if char == '<':
83
- # break
84
- # response += char
85
- # yield response
86
- #
87
- #gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
88
 
89
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
90
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
 
52
  #chat_model_selection = chat_model_dropdown.value
53
  chat_model_selection = 'Intel/neural-chat-7b-v1-1'
54
 
55
+ def call_api_and_stream_response(query, chat_model):
56
+ """
57
+ Call the API endpoint and yield characters as they are received.
58
+ This function simulates streaming by yielding characters one by one.
59
+ """
60
+ url = inference_endpoint_url
61
+ params = {"query": query,"selected_model":chat_model}
62
+ with requests.get(url, json=params, stream=True) as r:
63
+ for chunk in r.iter_content(chunk_size=1):
64
+ if chunk:
65
+ yield chunk.decode()
66
+ def get_response(query, history):
67
+ """
68
+ Wrapper function to call the streaming API and compile the response.
69
+ """
70
+ response = ''
71
+
72
+ global chat_model_selection
73
+
74
+ for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
75
+ if char == '<':
76
+ break
77
+ response += char
78
+ yield response
79
+
80
+ with gr.Blocks():
81
+ with gr.Row():
82
+ message_input = gr.Textbox(label="Your message")
83
+ submit_button = gr.Button("Submit")
84
+ clear_button = gr.Button("Clear")
85
+ chatbox = gr.Chatbot()
86
+
87
+ submit_button.click(
88
+ fn=get_response,
89
+ inputs=message_input,
90
+ outputs=chatbox
91
+ )
92
 
93
+ clear_button.click(
94
+ fn=clear_chat,
95
+ inputs=[],
96
+ outputs=chatbox
97
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
100
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):