from youtube_transcript_api import YouTubeTranscriptApi
import openai
from urllib.parse import urlparse, parse_qs
import requests
from requests.auth import HTTPBasicAuth
import os
import logging

logging.basicConfig(filename='app.log', filemode='a',
                    format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)


def get_video_id_from_url(url):
    """
    Extracts the YouTube video ID from a given URL.

    Supports both 'youtube.com' and 'youtu.be' URL formats. For 'youtube.com', it looks for the 'v' query parameter.
    For 'youtu.be', it extracts the ID directly from the path.

    Parameters:
        url (str): The full URL of the YouTube video.

    Returns:
        str: The extracted video ID if found, otherwise None.

    Note:
        This function silently handles exceptions and returns None if the video ID cannot be extracted.
    """
    try:
        url_data = urlparse(url)
        if url_data.hostname == 'www.youtube.com' or url_data.hostname == 'youtube.com':
            query = parse_qs(url_data.query)
            video_id = query.get("v")
            if video_id:
                #logging.info(f"Video ID {video_id[0]} extracted from URL.")
                return video_id[0]
        elif url_data.hostname == 'youtu.be':
            # Extract the video ID from the path for youtu.be URLs
            video_id = url_data.path[1:]  # Remove the leading '/'
            if video_id:
                #logging.info(f"Video ID {video_id} extracted from URL.")
                return video_id
        
        #logging.warning(f"No video ID found in URL: {url}")
        return None
    except Exception:
        #logging.error(f"Error extracting video ID from URL {url}: {e}")
        return None
    
def get_first_youtube_video_url(urls):
    """
    Finds and returns the first YouTube video URL from a list of URLs.

    Iterates over a provided list of URLs, checking each for a substring that matches
    'youtube' or 'youtu.be'. Returns the first URL that matches these criteria.

    Parameters:
        urls (list of str): A list containing URLs to be checked.

    Returns:
        str: The first YouTube video URL found in the list, or None if no YouTube URL is found.
    """
    for url in urls:
        if 'youtube' in url or 'youtu.be' in url:
            return url
    return None

def get_youtube_url(opportunity_id):
    """
    Retrieves the YouTube video URL associated with a given opportunity ID from the Lever API.

    This function makes a GET request to the Lever API to fetch the opportunity details using the provided
    opportunity ID. It then extracts and returns the first YouTube video URL found in the 'links' section
    of the opportunity data.

    Parameters:
        opportunity_id (str): The unique identifier for the opportunity in the Lever system.

    Returns:
        str: The YouTube video URL associated with the opportunity, or None if no YouTube URL is found.

    Note:
        Requires the 'LeverKey' environment variable to be set for authentication with the Lever API.
    """
    url = 'https://api.lever.co/v1/opportunities/{}'.format(opportunity_id)
    response = requests.get(url, auth=HTTPBasicAuth(os.getenv('LeverKey'),''))

    links = response.json()['data']['links']
    youtube_link = get_first_youtube_video_url(links)
    
    return youtube_link

def parse_decision_to_binary(decision_text):
    """
    Converts a decision text to a binary outcome based on the presence of the word 'yes'.

    This function checks if the word 'yes' is present in the provided decision text, performing
    a case-insensitive comparison. It is designed to interpret a textual decision as a binary
    outcome, where the presence of 'yes' indicates a positive (True) decision, and its absence
    indicates a negative (False) decision.

    Parameters:
        decision_text (str): The decision text to be analyzed.

    Returns:
        bool: True if 'yes' is present in the decision text, False otherwise.
    """
    decision_text_lower = decision_text.lower()
    return "yes" in decision_text_lower

def get_transcript_data_and_pause_count(video_id):
    """
    Fetches a video's transcript, calculates its total duration in minutes, and counts pauses between segments.

    Utilizes the YouTubeTranscriptApi to retrieve the English transcript of a video given its ID, then analyzes
    the transcript to determine the total duration and estimate the number of pauses based on gaps between
    transcript segments.

    Parameters:
        video_id (str): The unique identifier of the YouTube video.

    Returns:
        tuple: A tuple containing the full transcript text (str), total duration in minutes (int),
               and the estimated number of pauses (int), or (None, None, None) if an error occurs.
    """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        if transcript:
            last_segment = transcript[-1]
            total_duration = last_segment['start'] + last_segment['duration']

            # Estimate the number of pauses
            pauses = 0
            for i in range(1, len(transcript)):
                current_start = transcript[i]['start']
                previous_end = transcript[i-1]['start'] + transcript[i-1]['duration']
                if current_start > previous_end:
                    pauses += 1

            full_transcript = " ".join(segment['text'] for segment in transcript)
            logging.info(f"Transcript retrieved successfully for video ID {video_id}.")
            return full_transcript, total_duration // 60, pauses
    except Exception as e:
        logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}")
        return None, None, None

def analyze_transcript(url):
    """
    Analyzes a YouTube video's transcript for content quality, using a predefined prompt for GPT evaluation.

    This function reads a prompt from 'prompt.txt', extracts the video ID from the provided URL, retrieves the
    video's transcript and its analysis metrics (total duration and pauses), and evaluates these metrics against
    a GPT model to determine if the candidate qualifies for an interview.

    Parameters:
        url (str): The URL of the YouTube video to be analyzed.

    Returns:
        str: A message indicating whether the candidate qualifies for an interview, an error message if the
             video URL is invalid or the transcript could not be retrieved, or a detailed error message if
             any other error occurs during processing.
    """
    try:
        with open('prompt.txt', 'r') as file:
            prompt = file.read()
    except Exception as e:
        logging.error(f"Error opening or reading from 'prompt.txt': {e}")
        return "Error processing the prompt file."

    try:
        video_id = get_video_id_from_url(url)
        if not video_id:
            logging.error("Invalid URL provided.")
            return "Unable to process the video URL. Currently only YouTube URLs are accepted."

        full_transcript, total_duration, pauses = get_transcript_data_and_pause_count(
            video_id)

        if full_transcript is None:  # If there was an error retrieving the transcript
            logging.error("Error retrieving the transcript.")
            return pauses

        # Define the prompt for GPT evaluation based on the rubric
        prompt = prompt.format(full_transcript, pauses, total_duration)

        # Using the new OpenAI client structure
        client = openai.OpenAI(api_key=os.getenv('OpenAIKey'))
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
        )

        decision = parse_decision_to_binary(response.choices[0].message.content.strip())

        if decision:
            return "The candidate qualifies for an interview."
        return "The candidate does not qualify for an interview."
    except Exception as e:
        logging.error(f"An error occurred during the analysis: {e}")
        return f"An error occurred during the processing. {e}"