# === Imports ===

# Standard Library
import os
import re
import json
import random
import subprocess
from io import BytesIO
from collections import Counter

# Third-Party Libraries
import fitz  # PyMuPDF
import requests
import spacy
import streamlit as st
from fuzzywuzzy import fuzz
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
from huggingface_hub import InferenceClient

# Local Configuration
from config import (
    SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS,
    supabase, HF_MODELS, query, embedding_model
)

# === Initialization ===

# Hugging Face inference client for Gemma model
client = InferenceClient(
    model="google/gemma-1.1-7b-it",
    token=HF_API_TOKEN
)

# Load or download spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
    nlp = spacy.load("en_core_web_sm")


# === Core Resume Evaluation ===

def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
    """
    Evaluate uploaded resumes and return shortlisted candidates with scores and summaries.
    """
    candidates, removed_candidates = [], []

    for pdf_file in uploaded_files:
        resume_text = parse_resume(pdf_file)
        score = score_candidate(resume_text, job_description)
        email = extract_email(resume_text)
        summary = summarize_resume(resume_text)

        if score < 0.20:
            removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
            continue

        candidates.append({
            "name": pdf_file.name,
            "resume": resume_text,
            "score": score,
            "email": email,
            "summary": summary
        })

    filtered_candidates, keyword_removed = filter_resumes_by_keywords(
        candidates, job_description, min_keyword_match
    )

    for name in keyword_removed:
        removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})

    shortlisted = sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5]

    return shortlisted if isinstance(shortlisted, list) else [], removed_candidates


# === Keyword & Scoring Functions ===

def extract_keywords(text, top_n=10):
    """
    Extracts top keywords from the job description using spaCy and TF-IDF.
    """
    if not text.strip():
        return []

    doc = nlp(text.lower())
    keywords = [t.text for t in doc if t.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not t.is_stop]

    if not keywords:
        return []

    try:
        tfidf = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))
        matrix = tfidf.fit_transform([" ".join(keywords)])
        scores = matrix.toarray()[0]
        features = tfidf.get_feature_names_out()
        ranked = sorted(zip(features, scores), key=lambda x: x[1], reverse=True)

        return [kw for kw, _ in ranked[:top_n]]

    except ValueError:
        return []


def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
    """
    Filters resumes by keyword match using fuzzy logic.
    """
    job_keywords = extract_keywords(job_description)
    if len(job_keywords) < min_keyword_match:
        st.warning("⚠️ Job description too short or missing for keyword filtering.")
        return resumes, []

    filtered, removed = [], []

    for resume in resumes:
        matched = {
            keyword for keyword in job_keywords
            if any(fuzz.partial_ratio(keyword, word) > 80 for word in resume["resume"].lower().split())
        }

        if len(matched) >= min_keyword_match:
            filtered.append(resume)
        else:
            removed.append(resume["name"])

    return filtered, removed


def score_candidate(resume_text, job_description):
    """
    Computes cosine similarity between resume and job description using embeddings.
    """
    try:
        resume_vec = embedding_model.encode(resume_text, convert_to_tensor=True)
        job_vec = embedding_model.encode(job_description, convert_to_tensor=True)
        score = util.pytorch_cos_sim(resume_vec, job_vec).item()
        return round(score, 4)
    except Exception as e:
        print(f"Error computing similarity: {e}")
        return 0


# === Text Extraction & Summarization ===

def parse_resume(pdf_file):
    """
    Extracts raw text from a PDF file.
    """
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    return "\n".join([page.get_text("text") for page in doc])


def extract_email(resume_text):
    """
    Extracts the first valid email found in text.
    """
    match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
    return match.group(0) if match else None


def summarize_resume(resume_text):
    """
    Generates a summary of the resume using Hugging Face BART.
    """
    payload = {"inputs": f"Summarize this resume: {resume_text}"}
    response = query(payload, model="bart")

    if not response:
        return "Summary unavailable due to API issues."

    try:
        response = response[0] if isinstance(response, list) else response
        summary = response.get("generated_text") or response.get("summary_text")

        return summary.strip() if summary else "Summary unavailable."
    except Exception as e:
        print(f"Error parsing summary: {e}")
        return "Summary unavailable."


# === Data Storage & Reporting ===

def store_in_supabase(resume_text, score, candidate_name, email, summary):
    """
    Saves candidate data to the Supabase table.
    """
    data = {
        "name": candidate_name,
        "resume": resume_text,
        "score": score or 0,
        "email": email,
        "summary": summary
    }

    return supabase.table("candidates").insert(data).execute()


def generate_pdf_report(shortlisted_candidates, questions=None):
    """
    Creates a PDF report summarizing top candidates and interview questions.
    """
    pdf = BytesIO()
    doc = fitz.open()

    for candidate in shortlisted_candidates:
        page = doc.new_page()
        info = (
            f"Candidate: {candidate['name']}\n"
            f"Email: {candidate['email']}\n"
            f"Score: {candidate['score']}\n\n"
            f"Summary:\n{candidate.get('summary', 'No summary available')}"
        )
        page.insert_textbox(fitz.Rect(50, 50, 550, 750), info, fontsize=11, fontname="helv", align=0)

    if questions:
        q_page = doc.new_page()
        q_text = "Suggested Interview Questions:\n\n" + "\n".join(questions)
        q_page.insert_textbox(fitz.Rect(50, 50, 550, 750), q_text, fontsize=11, fontname="helv", align=0)

    doc.save(pdf)
    pdf.seek(0)
    return pdf


def generate_interview_questions_from_summaries(candidates):
    """
    Generates 5 interview questions based on combined summaries using Gemma model.
    """
    if not isinstance(candidates, list):
        raise TypeError("Expected a list of candidate dictionaries.")

    summaries = " ".join(c.get("summary", "") for c in candidates)

    prompt = (
        "Based on the following summary of this top candidate for a job role, "
        "generate 5 thoughtful, general interview questions that would help a recruiter assess their fit:\n"
        f"{summaries}"
    )

    try:
        response = client.chat_completion(
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=500
        )
        result = response.choices[0].message.content

        questions = [re.sub(r"^(?:\*\*)?(Q?\d+[\.\)\-]?\s*)+(?:\*\*)?", "", q.strip())
                     for q in result.split("\n") if q.strip()]

        return [f"Q{i+1}. {q}" for i, q in enumerate(questions[:5])] or ["⚠️ No questions generated."]
    except Exception as e:
        print(f"❌ Error generating interview questions: {e}")
        return ["⚠️ Error generating questions."]