Titan / utils /doi.py
NEXAS's picture
Update utils/doi.py
ccfcb67 verified
import base64
import requests
import os
import logging
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.StreamHandler(), # Log to console
logging.FileHandler("api_request_logs.log") # Log to a file
]
)
# Get the API key from environment variable
GROQ_API_KEY = os.getenv("GROQ_API_KEY2")
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY is not set in the .env file")
def process_image_and_get_description(image_path, model="llama-3.2-90b-vision-preview", retries=3):
"""
Process the image using the Groq API and get a description.
Retries in case of failure.
Args:
image_path (str): Path to the image.
model (str): Model to use for processing.
retries (int): Number of retries before giving up.
Returns:
str: Description of the image or an error message.
"""
encoded_image = image_path
# # Encode the image to base64
# try:
# with open(image_path, "rb") as image_file:
# encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
# logging.info("Successfully encoded the image to base64.")
# except Exception as e:
# logging.error(f"Error encoding the image: {e}")
# return "Error encoding the image."
# Prepare the message payload
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Analyze the image to identify what is happening, describe the overall context, and perform OCR to extract any visible text. Additionally, specify whether the subject is a human, animal, or object, and provide a detailed description of any object the human is holding or their specific actions."},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
]
}
]
for attempt in range(1, retries + 1):
try:
logging.info(f"Attempt {attempt} to process the image with Groq API.")
# Make the API request
response = requests.post(
"https://api.groq.com/openai/v1/chat/completions",
json={
"model": model,
"messages": messages,
"max_tokens": 4096,
"stop": None,
"stream": False
},
headers={
"Authorization": f"Bearer {GROQ_API_KEY}",
"Content-Type": "application/json"
},
timeout=30
)
# Process the response
if response.status_code == 200:
result = response.json()
answer = result["choices"][0]["message"]["content"]
logging.info("Successfully processed the image and received a response.")
return answer
else:
logging.warning(f"Received error response: {response.status_code} - {response.text}")
except requests.RequestException as e:
logging.error(f"RequestException on attempt {attempt}: {e}")
logging.error("All attempts to process the image failed.")
return "Error: Unable to process the image after multiple attempts."
# # Example usage
# image_path = r"/content/temp.jpeg"
# description = process_image_and_get_description(image_path)
# print(description)