eduardo-alvarez commited on
Commit
e51fe0f
β€’
1 Parent(s): 867b5a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -30
app.py CHANGED
@@ -48,36 +48,36 @@ with demo:
48
  interactive=True,
49
  )
50
 
51
- #chat_model_selection = chat_model_dropdown.value
52
- chat_model_selection = 'Intel/neural-chat-7b-v1-1'
53
-
54
- def call_api_and_stream_response(query, chat_model):
55
- """
56
- Call the API endpoint and yield characters as they are received.
57
- This function simulates streaming by yielding characters one by one.
58
- """
59
- url = inference_endpoint_url
60
- params = {"query": query,"selected_model":chat_model}
61
- with requests.get(url, json=params, stream=True) as r:
62
- for chunk in r.iter_content(chunk_size=1):
63
- if chunk:
64
- yield chunk.decode()
65
-
66
- def get_response(query, history):
67
- """
68
- Wrapper function to call the streaming API and compile the response.
69
- """
70
- response = ''
71
-
72
- global chat_model_selection
73
-
74
- for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
75
- if char == '<':
76
- break
77
- response += char
78
- yield response
79
-
80
- gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
81
 
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
 
48
  interactive=True,
49
  )
50
 
51
+ # #chat_model_selection = chat_model_dropdown.value
52
+ # chat_model_selection = 'Intel/neural-chat-7b-v1-1'
53
+ #
54
+ # def call_api_and_stream_response(query, chat_model):
55
+ # """
56
+ # Call the API endpoint and yield characters as they are received.
57
+ # This function simulates streaming by yielding characters one by one.
58
+ # """
59
+ # url = inference_endpoint_url
60
+ # params = {"query": query,"selected_model":chat_model}
61
+ # with requests.get(url, json=params, stream=True) as r:
62
+ # for chunk in r.iter_content(chunk_size=1):
63
+ # if chunk:
64
+ # yield chunk.decode()
65
+ #
66
+ # def get_response(query, history):
67
+ # """
68
+ # Wrapper function to call the streaming API and compile the response.
69
+ # """
70
+ # response = ''
71
+ #
72
+ # global chat_model_selection
73
+ #
74
+ # for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
75
+ # if char == '<':
76
+ # break
77
+ # response += char
78
+ # yield response
79
+ #
80
+ # gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
81
 
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):