File size: 3,557 Bytes
ccfcb67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import base64
import requests
import os
import logging
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.StreamHandler(),  # Log to console
        logging.FileHandler("api_request_logs.log")  # Log to a file
    ]
)

# Get the API key from environment variable
GROQ_API_KEY = os.getenv("GROQ_API_KEY2")
if not GROQ_API_KEY:
    raise ValueError("GROQ_API_KEY is not set in the .env file")

def process_image_and_get_description(image_path, model="llama-3.2-90b-vision-preview", retries=3):
    """
    Process the image using the Groq API and get a description.
    Retries in case of failure.

    Args:
        image_path (str): Path to the image.
        model (str): Model to use for processing.
        retries (int): Number of retries before giving up.

    Returns:
        str: Description of the image or an error message.
    """
    encoded_image = image_path
    # # Encode the image to base64
    # try:
    #     with open(image_path, "rb") as image_file:
    #         encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
    #     logging.info("Successfully encoded the image to base64.")
    # except Exception as e:
    #     logging.error(f"Error encoding the image: {e}")
    #     return "Error encoding the image."

    # Prepare the message payload
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Analyze the image to identify what is happening, describe the overall context, and perform OCR to extract any visible text. Additionally, specify whether the subject is a human, animal, or object, and provide a detailed description of any object the human is holding or their specific actions."},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
            ]
        }
    ]

    for attempt in range(1, retries + 1):
        try:
            logging.info(f"Attempt {attempt} to process the image with Groq API.")

            # Make the API request
            response = requests.post(
                "https://api.groq.com/openai/v1/chat/completions",
                json={
                    "model": model,
                    "messages": messages,
                    "max_tokens": 4096,
                    "stop": None,
                    "stream": False
                },
                headers={
                    "Authorization": f"Bearer {GROQ_API_KEY}",
                    "Content-Type": "application/json"
                },
                timeout=30
            )

            # Process the response
            if response.status_code == 200:
                result = response.json()
                answer = result["choices"][0]["message"]["content"]
                logging.info("Successfully processed the image and received a response.")
                return answer
            else:
                logging.warning(f"Received error response: {response.status_code} - {response.text}")
        except requests.RequestException as e:
            logging.error(f"RequestException on attempt {attempt}: {e}")

    logging.error("All attempts to process the image failed.")
    return "Error: Unable to process the image after multiple attempts."

# # Example usage
# image_path = r"/content/temp.jpeg"
# description = process_image_and_get_description(image_path)
# print(description)