alexkueck commited on
Commit
f179d98
1 Parent(s): 5ffd14f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -85,31 +85,7 @@ print ("Inf.Client")
85
  #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
86
  HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
87
 
88
- ##############################################
89
- #Inference Endpoint
90
- ##############################################
91
- endpoint = create_inference_endpoint(
92
- "smaug-72b-v0-1-bmw",
93
- repository="abacusai/Smaug-72B-v0.1",
94
- framework="pytorch",
95
- task="text-generation",
96
- accelerator="gpu",
97
- vendor="aws",
98
- region="us-east-1",
99
- type="protected",
100
- instance_size="medium",
101
- instance_type="g5.2xlarge",
102
- custom_image={
103
- "health_route": "/health",
104
- "env": {
105
- "MAX_BATCH_PREFILL_TOKENS": "2048",
106
- "MAX_INPUT_LENGTH": "1024",
107
- "MAX_TOTAL_TOKENS": "1512",
108
- "MODEL_ID": "/repository"
109
- },
110
- "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
111
- },
112
- )
113
 
114
  ##############################################
115
  # tokenizer for generating prompt
@@ -376,6 +352,31 @@ def generate(text, history, rag_option, model_option, k=3, top_p=0.6, temperatu
376
  "inputs": prompt,
377
  "options": {"max_new_tokens": max_new_tokens},
378
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  response= endpoint.client.text_generation(prompt) #requests.post(API_URL, headers=HEADERS, json=data)
380
  if response != None:
381
  result = response.json()
 
85
  #API_URL = "https://api-inference.huggingface.co/models/argilla/notux-8x7b-v1"
86
  HEADERS = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}
87
 
88
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  ##############################################
91
  # tokenizer for generating prompt
 
352
  "inputs": prompt,
353
  "options": {"max_new_tokens": max_new_tokens},
354
  }
355
+ ##############################################
356
+ #Inference Endpoint
357
+ ##############################################
358
+ endpoint = create_inference_endpoint(
359
+ "smaug-72b-v0-1-bmw",
360
+ repository="abacusai/Smaug-72B-v0.1",
361
+ framework="pytorch",
362
+ task="text-generation",
363
+ accelerator="gpu",
364
+ vendor="aws",
365
+ region="us-east-1",
366
+ type="protected",
367
+ instance_size="medium",
368
+ instance_type="g5.2xlarge",
369
+ custom_image={
370
+ "health_route": "/health",
371
+ "env": {
372
+ "MAX_BATCH_PREFILL_TOKENS": "2048",
373
+ "MAX_INPUT_LENGTH": "1024",
374
+ "MAX_TOTAL_TOKENS": "1512",
375
+ "MODEL_ID": "/repository"
376
+ },
377
+ "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
378
+ },
379
+ )
380
  response= endpoint.client.text_generation(prompt) #requests.post(API_URL, headers=HEADERS, json=data)
381
  if response != None:
382
  result = response.json()