eduardo-alvarez commited on
Commit
a75abaf
β€’
1 Parent(s): e51fe0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -30
app.py CHANGED
@@ -48,36 +48,35 @@ with demo:
48
  interactive=True,
49
  )
50
 
51
- # #chat_model_selection = chat_model_dropdown.value
52
- # chat_model_selection = 'Intel/neural-chat-7b-v1-1'
53
- #
54
- # def call_api_and_stream_response(query, chat_model):
55
- # """
56
- # Call the API endpoint and yield characters as they are received.
57
- # This function simulates streaming by yielding characters one by one.
58
- # """
59
- # url = inference_endpoint_url
60
- # params = {"query": query,"selected_model":chat_model}
61
- # with requests.get(url, json=params, stream=True) as r:
62
- # for chunk in r.iter_content(chunk_size=1):
63
- # if chunk:
64
- # yield chunk.decode()
65
- #
66
- # def get_response(query, history):
67
- # """
68
- # Wrapper function to call the streaming API and compile the response.
69
- # """
70
- # response = ''
71
- #
72
- # global chat_model_selection
73
- #
74
- # for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
75
- # if char == '<':
76
- # break
77
- # response += char
78
- # yield response
79
- #
80
- # gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
81
 
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
 
48
  interactive=True,
49
  )
50
 
51
+ #chat_model_selection = chat_model_dropdown.value
52
+ chat_model_selection = 'Intel/neural-chat-7b-v1-1'
53
+
54
+ def call_api_and_stream_response(query, chat_model):
55
+ """
56
+ Call the API endpoint and yield characters as they are received.
57
+ This function simulates streaming by yielding characters one by one.
58
+ """
59
+ url = inference_endpoint_url
60
+ params = {"query": query,"selected_model":chat_model}
61
+ with requests.get(url, json=params, stream=True) as r:
62
+ for chunk in r.iter_content(chunk_size=1):
63
+ if chunk:
64
+ yield chunk.decode()
65
+ def get_response(query, history):
66
+ """
67
+ Wrapper function to call the streaming API and compile the response.
68
+ """
69
+ response = ''
70
+
71
+ global chat_model_selection
72
+
73
+ for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
74
+ if char == '<':
75
+ break
76
+ response += char
77
+ yield response
78
+
79
+ gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
 
80
 
81
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
82
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):