Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
More logging + timeout if model is not loaded + url not needed
#4
by
Wauplin
HF staff
- opened
app.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
|
|
3 |
import os
|
4 |
|
|
|
|
|
5 |
token = os.getenv("TOKEN")
|
6 |
endpoint = os.getenv("ENDPOINT")
|
7 |
|
8 |
# initialize InferenceClient
|
9 |
-
client = InferenceClient(model="
|
10 |
|
11 |
# query client using streaming mode
|
12 |
def inference(message, history):
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
+
from huggingface_hub import logging
|
4 |
import os
|
5 |
|
6 |
+
logging.set_verbosity_info()
|
7 |
+
|
8 |
token = os.getenv("TOKEN")
|
9 |
endpoint = os.getenv("ENDPOINT")
|
10 |
|
11 |
# initialize InferenceClient
|
12 |
+
client = InferenceClient(model="meta-llama/Llama-2-7b-chat-hf", timeout=60, token=token)
|
13 |
|
14 |
# query client using streaming mode
|
15 |
def inference(message, history):
|