Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -74,14 +74,16 @@ splittet = False
|
|
74 |
print ("Inf.Client")
|
75 |
#client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
|
76 |
#client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
|
77 |
-
|
|
|
|
|
78 |
|
79 |
##############################################
|
80 |
# tokenizer for generating prompt
|
81 |
##############################################
|
82 |
print ("Tokenizer")
|
83 |
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
|
84 |
-
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
85 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
|
86 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
87 |
|
@@ -279,22 +281,21 @@ def generate(text, history, rag_option, model_option, temperature=0.5, max_new_
|
|
279 |
|
280 |
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
281 |
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
top_p=0.9,
|
289 |
-
temperature=0.6,
|
290 |
-
)
|
291 |
except Exception as e:
|
292 |
raise gr.Error(e)
|
293 |
|
|
|
|
|
294 |
#Antwort als Stream ausgeben...
|
295 |
-
for i in range(len(result)):
|
296 |
-
time.sleep(0.05)
|
297 |
-
yield result[: i+1]
|
298 |
|
299 |
|
300 |
|
|
|
74 |
print ("Inf.Client")
|
75 |
#client = InferenceClient("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
|
76 |
#client = InferenceClient("https://ybdhvwle4ksrawzo.eu-west-1.aws.endpoints.huggingface.cloud")
|
77 |
+
#Inference mit Authorisation:
|
78 |
+
API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
79 |
+
HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
|
80 |
|
81 |
##############################################
|
82 |
# tokenizer for generating prompt
|
83 |
##############################################
|
84 |
print ("Tokenizer")
|
85 |
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-chat-hf")
|
86 |
+
#tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
|
87 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
|
88 |
#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
89 |
|
|
|
281 |
|
282 |
#Anfrage an Modell (mit RAG: mit chunks aus Vektorstore, ohne: nur promt und history)
|
283 |
#payload = tokenizer.apply_chat_template([{"role":"user","content":prompt}],tokenize=False)
|
284 |
+
#Für LLAMA:
|
285 |
+
#payload = tokenizer.apply_chat_template(prompt,tokenize=False)
|
286 |
+
#result = client.text_generation(payload, do_sample=True,return_full_text=False, max_new_tokens=2048,top_p=0.9,temperature=0.6,)
|
287 |
+
#inference allg:
|
288 |
+
result= requests.post(API_URL, headers=HEADERS, json=prompt)
|
289 |
+
|
|
|
|
|
|
|
290 |
except Exception as e:
|
291 |
raise gr.Error(e)
|
292 |
|
293 |
+
return result.json()
|
294 |
+
|
295 |
#Antwort als Stream ausgeben...
|
296 |
+
#for i in range(len(result)):
|
297 |
+
#time.sleep(0.05)
|
298 |
+
#yield result[: i+1]
|
299 |
|
300 |
|
301 |
|