Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,8 @@ import gradio as gr
|
|
2 |
import requests
|
3 |
import json
|
4 |
import logging
|
5 |
-
import
|
|
|
6 |
|
7 |
# Set up logging
|
8 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -10,21 +11,37 @@ logger = logging.getLogger(__name__)
|
|
10 |
|
11 |
# Hugging Face API settings
|
12 |
API_URL = "https://api-inference.huggingface.co/models/mattshumer/Reflection-Llama-3.1-70B"
|
13 |
-
API_TOKEN = os.environ.get("HUGGINGFACE_API_TOKEN") # Make sure to set this in your Space's secrets
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
|
18 |
-
logger.info(f"Sending payload to API: {payload}")
|
19 |
-
response = requests.post(API_URL, headers=headers, json=payload)
|
20 |
-
logger.info(f"Received response with status code: {response.status_code}")
|
21 |
-
return response.json()
|
22 |
|
23 |
def generate_text(prompt):
|
24 |
logger.info(f"Received prompt: {prompt}")
|
25 |
|
26 |
try:
|
27 |
-
# Generate text
|
28 |
payload = {
|
29 |
"inputs": prompt,
|
30 |
"parameters": {
|
@@ -39,6 +56,9 @@ def generate_text(prompt):
|
|
39 |
|
40 |
logger.info(f"Raw response from API: {json.dumps(response, indent=2)}")
|
41 |
|
|
|
|
|
|
|
42 |
if isinstance(response, list) and len(response) > 0:
|
43 |
generated_text = response[0].get('generated_text', '')
|
44 |
logger.info(f"Processed response: {generated_text[:100]}...") # Log first 100 chars
|
|
|
2 |
import requests
|
3 |
import json
|
4 |
import logging
|
5 |
+
import time
|
6 |
+
from requests.exceptions import RequestException
|
7 |
|
8 |
# Set up logging
|
9 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
11 |
|
12 |
# Hugging Face API settings
|
13 |
API_URL = "https://api-inference.huggingface.co/models/mattshumer/Reflection-Llama-3.1-70B"
|
|
|
14 |
|
15 |
+
def query(payload, max_retries=3, delay=5):
|
16 |
+
for attempt in range(max_retries):
|
17 |
+
try:
|
18 |
+
logger.info(f"Sending payload to API (attempt {attempt + 1}/{max_retries}): {payload}")
|
19 |
+
response = requests.post(API_URL, json=payload, timeout=60)
|
20 |
+
logger.info(f"Received response with status code: {response.status_code}")
|
21 |
+
|
22 |
+
if response.status_code == 200:
|
23 |
+
return response.json()
|
24 |
+
elif response.status_code == 503:
|
25 |
+
logger.warning("Model is loading. Retrying...")
|
26 |
+
time.sleep(delay)
|
27 |
+
else:
|
28 |
+
logger.error(f"API request failed with status code {response.status_code}: {response.text}")
|
29 |
+
return {"error": f"API request failed with status code {response.status_code}"}
|
30 |
+
|
31 |
+
except RequestException as e:
|
32 |
+
logger.error(f"Request failed: {str(e)}")
|
33 |
+
if attempt < max_retries - 1:
|
34 |
+
logger.info(f"Retrying in {delay} seconds...")
|
35 |
+
time.sleep(delay)
|
36 |
+
else:
|
37 |
+
return {"error": f"Failed to connect after {max_retries} attempts: {str(e)}"}
|
38 |
|
39 |
+
return {"error": "Maximum retries reached"}
|
|
|
|
|
|
|
|
|
40 |
|
41 |
def generate_text(prompt):
|
42 |
logger.info(f"Received prompt: {prompt}")
|
43 |
|
44 |
try:
|
|
|
45 |
payload = {
|
46 |
"inputs": prompt,
|
47 |
"parameters": {
|
|
|
56 |
|
57 |
logger.info(f"Raw response from API: {json.dumps(response, indent=2)}")
|
58 |
|
59 |
+
if "error" in response:
|
60 |
+
return f"Error: {response['error']}"
|
61 |
+
|
62 |
if isinstance(response, list) and len(response) > 0:
|
63 |
generated_text = response[0].get('generated_text', '')
|
64 |
logger.info(f"Processed response: {generated_text[:100]}...") # Log first 100 chars
|