import spacy
import streamlit as st
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import requests
import re
import pandas as pd
import fitz  # PyMuPDF
import PyPDF2
from PIL import Image
import pytesseract
from pdf2image import convert_from_path
import docx  # For DOCX processing
import io
import os

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Download and load the SpaCy model if not already available
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")  # Downloads the model
    nlp = spacy.load("en_core_web_sm")  # Loads the model after downloading

# Function for PyMuPDF text extraction
def extract_text_with_pymupdf(pdf_file):
    """Extract text using PyMuPDF (fitz)"""
    text = ""
    pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf")
    for page_num in range(len(pdf_document)):
        page = pdf_document[page_num]
        text += page.get_text()
    pdf_document.close()
    return text

# Function for PyPDF2 text extraction
def extract_text_with_pypdf2(pdf_file):
    """Extract text using PyPDF2"""
    text = ""
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]
        text += page.extract_text()
    return text

# Function for OCR extraction (for scanned PDFs)
def extract_text_with_ocr(pdf_file):
    """Extract text using OCR (for scanned PDFs)"""
    text = ""
    images = convert_from_path(pdf_file.name)  # Convert PDF pages to images
    for image in images:
        text += pytesseract.image_to_string(image)
    return text

# Function for DOCX text extraction
def extract_text_from_docx(docx_file):
    """Extract text from a DOCX file."""
    doc = docx.Document(docx_file)
    text = '\n'.join([para.text for para in doc.paragraphs])
    return text

# Unified PDF extraction function
def extract_text_from_pdf(pdf_file):
    """Extract text using multiple methods."""
    text = ""
    
    # Attempt PyMuPDF extraction
    try:
        text = extract_text_with_pymupdf(pdf_file)
        if text.strip():  # If PyMuPDF returns meaningful text
            return text
    except Exception as e:
        print(f"Error with PyMuPDF: {e}")

    # Reset file pointer
    pdf_file.seek(0)
    
    # Attempt PyPDF2 extraction
    try:
        text = extract_text_with_pypdf2(pdf_file)
        if text.strip():  # If PyPDF2 returns meaningful text
            return text
    except Exception as e:
        print(f"Error with PyPDF2: {e}")

    # Reset file pointer
    pdf_file.seek(0)
    
    # Attempt OCR as a last resort
    try:
        text = extract_text_with_ocr(pdf_file)
        if text.strip():  # If OCR returns meaningful text
            return text
    except Exception as e:
        print(f"Error with OCR: {e}")

    return text  # Return empty text if all methods fail

# Function to clean and normalize text
def clean_and_normalize_text(text):
    """Clean and normalize the resume/job description text."""
    # Tokenization
    tokens = word_tokenize(text)
    
    # Lowercasing and removing non-alphabetical tokens
    tokens = [word.lower() for word in tokens if word.isalpha()]
    
    # Removing stopwords using NLTK
    stop_words = set(stopwords.words("english"))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    
    # Lemmatization using SpaCy
    doc = nlp(' '.join(filtered_tokens))
    lemmatized_tokens = [token.lemma_ for token in doc]
    
    # Reconstruct the cleaned text
    cleaned_text = ' '.join(lemmatized_tokens)
    
    # Optionally, remove extra spaces or characters
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    
    return cleaned_text

# Function for Named Entity Recognition (NER)
def extract_named_entities(text):
    """Extract named entities from text using SpaCy."""
    doc = nlp(text)
    
    # Extract named entities
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    
    return entities

# Function to analyze the resume and job description using Gemini 1.5 Flash model
def analyze_documents(resume_text, job_description):
    """Analyze resume text against the job description using Gemini 1.5 Flash."""
    custom_prompt = f"""
    Please analyze the following resume in the context of the job description provided. 
    For the match percentage, please consider:
    - The relevance of the hard skills mentioned.
    - The match of experiences and achievements listed in the resume.
    - Only return a 100% match if all critical skills, experiences, and keywords align well and meaningfully with the job description.

    Job Description: {job_description}
    Resume: {resume_text}
    """
    
    API_KEY = os.getenv("GEMINI_API_KEY")  # Ensure you set this environment variable securely
    if not API_KEY:
        return {"Match Percentage": "API Key Missing", "Recommendations": "Please set the GEMINI_API_KEY environment variable."}
    
    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={API_KEY}"
    headers = {'Content-Type': 'application/json'}
    data = {
        "contents": [
            {"role": "user", "parts": [{"text": custom_prompt}]}
        ]
    }
    response = requests.post(url, headers=headers, json=data)
    
    if response.status_code == 200:
        try:
            result = response.json()
            # Parse the response as needed. This is a placeholder.
            return {
                "Match Percentage": result.get('choices', [{}])[0].get('text', 'N/A').strip(),
                "Recommendations": "Placeholder for actual recommendations."
            }
        except ValueError:
            return {"Match Percentage": "Error", "Recommendations": "Failed to parse response."}
    else:
        return {"Match Percentage": "Error", "Recommendations": f"API request failed with status code {response.status_code}."}

# Streamlit app configuration
st.set_page_config(page_title="ATS Resume Evaluation System", layout="wide")

# Header Section
st.markdown(
    """
    <style>
    .title {text-align: center; font-size: 36px; font-weight: bold; color: #4CAF50;}
    .subtitle {text-align: center; font-size: 18px; color: #6c757d;}
    </style>
    """, unsafe_allow_html=True
)
st.markdown('<div class="title">📝🔍🌟 ATS Resume Evaluation System</div>', unsafe_allow_html=True)
st.markdown('<div class="subtitle">Upload your resume and job description for analysis</div>', unsafe_allow_html=True)

# Inputs: Job description and resume file upload
job_description = st.text_area("Enter the Job Description:", height=250)
resume_file = st.file_uploader("Upload Resume (PDF or DOCX)", type=["pdf", "docx"])

# Process the uploaded resume and job description
if resume_file:
    if job_description:
        try:
            if resume_file.type == "application/pdf":
                resume_text = extract_text_from_pdf(resume_file)
            elif resume_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                resume_text = extract_text_from_docx(resume_file)
            else:
                st.error("Unsupported file type.")
                resume_text = ""
            
            if resume_text:
                cleaned_resume = clean_and_normalize_text(resume_text)
                cleaned_job_description = clean_and_normalize_text(job_description)
                
                # Analyze the resume and job description
                result = analyze_documents(cleaned_resume, cleaned_job_description)

                # Display the analysis results
                st.write(f"**Match Percentage**: {result.get('Match Percentage', 'N/A')}")
                st.write(f"**Recommendations**: {result.get('Recommendations', 'N/A')}")
            else:
                st.error("Failed to extract text from the uploaded file.")
        except Exception as e:
            st.error(f"An error occurred during processing: {e}")
    else:
        st.warning("Please enter the job description to begin analysis.")