Spaces:

alexkueck
/

TestInferenceAPI

Sleeping

App Files Files Community

alexkueck commited on Apr 15, 2024

Commit

a53f544

verified ·

1 Parent(s): f179d98

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -26

app.py CHANGED Viewed

@@ -352,32 +352,8 @@ def generate(text, history, rag_option, model_option,  k=3, top_p=0.6, temperatu
                 "inputs": prompt,
                 "options": {"max_new_tokens": max_new_tokens},
             }
-            ##############################################
-            #Inference Endpoint
-            ##############################################
-            endpoint = create_inference_endpoint(
-                "smaug-72b-v0-1-bmw",
-                repository="abacusai/Smaug-72B-v0.1",
-                framework="pytorch",
-                task="text-generation",
-                accelerator="gpu",
-                vendor="aws",
-                region="us-east-1",
-                type="protected",
-                instance_size="medium",
-                instance_type="g5.2xlarge",
-                custom_image={
-                    "health_route": "/health",
-                    "env": {
-                        "MAX_BATCH_PREFILL_TOKENS": "2048",
-                        "MAX_INPUT_LENGTH": "1024",
-                        "MAX_TOTAL_TOKENS": "1512",
-                        "MODEL_ID": "/repository"
-                    },
-                    "url": "https://ih7lj8onsvp1wbh0.us-east-1.aws.endpoints.huggingface.cloud",
-                },
-)
-            response= endpoint.client.text_generation(prompt)   #requests.post(API_URL, headers=HEADERS, json=data)
             if response != None:
                 result = response.json()
                 print("result:------------------")

                 "inputs": prompt,
                 "options": {"max_new_tokens": max_new_tokens},
             }
+            response= requests.post(API_URL, headers=HEADERS, json=data)
             if response != None:
                 result = response.json()
                 print("result:------------------")