Spaces:

husseinelsaadi
/

Codingo

Paused

File size: 22,631 Bytes

import os
import sys

# Hugging Face safe cache
os.environ["HF_HOME"] = "/tmp/huggingface"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface/transformers"
os.environ["HUGGINGFACE_HUB_CACHE"] = "/tmp/huggingface/hub"

# Force Flask instance path to a writable temporary folder
safe_instance_path = "/tmp/flask_instance"

# Create the safe instance path after imports
os.makedirs(safe_instance_path, exist_ok=True)

from flask import Flask, render_template, redirect, url_for, flash, request, jsonify
from flask_login import LoginManager, login_required, current_user
from werkzeug.utils import secure_filename
import sys
import json
from datetime import datetime

# Adjust sys.path for import flexibility
current_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(current_dir)

# Import and initialize DB
from backend.models.database import db, Job, Application, init_db
from backend.models.user import User
from backend.routes.auth import auth_bp, handle_resume_upload
from backend.routes.interview_api import interview_api
# Import additional utilities
import re
import json

# -----------------------------------------------------------------------------
# Chatbot setup
#
# The chatbot feature uses a local vector database (Chroma) to search the
# ``chatbot/chatbot.txt`` knowledge base and then calls the Groq API via the
# OpenAI client.  To avoid the expensive model and database initialisation on
# every request, we lazily load the embeddings and collection the first time
# a chat query is processed.  Subsequent requests reuse the same global
# objects.  See ``init_chatbot()`` and ``get_chatbot_response()`` below for
# implementation details.

# Paths for the chatbot knowledge base and persistent vector store.  We
# compute these relative to the current file so that the app can be deployed
# anywhere without needing to change configuration.  The ``chroma_db``
# directory will be created automatically by the Chroma client if it does not
# exist.
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
CHATBOT_DB_DIR = os.path.join(current_dir, 'chatbot', 'chroma_db')

# API credentials for Groq.  These values mirror those in the standalone
# ``chatbot/chatbot.py`` script.  If you need to update your API key or
# model name, modify these constants.  The API key is public in this
# repository purely for demonstration purposes; in a real deployment it
# should be stored securely (e.g. via environment variables or Secrets).
GROQ_API_KEY = "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"
GROQ_MODEL = "llama3-8b-8192"

# Global objects used by the chatbot.  They remain ``None`` until
# ``init_chatbot()`` runs.  After initialisation, ``_chatbot_embedder`` holds
# the SentenceTransformer model and ``_chatbot_collection`` is the Chroma
# collection with embedded knowledge base documents.  A separate import of
# the OpenAI client is performed in ``get_chatbot_response()`` to avoid
# unintentional import side effects at module import time.
_chatbot_embedder = None
_chatbot_collection = None

def init_chatbot() -> None:
    """Initialise the chatbot embedding model and vector database.

    This function is designed to be idempotent: it only performs the heavy
    initialisation steps once.  Subsequent calls will return immediately if
    the global variables are already populated.  The knowledge base is read
    from ``CHATBOT_TXT_PATH``, split into overlapping chunks and encoded
    using a lightweight sentence transformer.  The resulting embeddings are
    stored in a Chroma collection located at ``CHATBOT_DB_DIR``.  We set
    ``anonymized_telemetry=False`` to prevent any external network calls from
    the Chroma client.
    """
    global _chatbot_embedder, _chatbot_collection
    if _chatbot_embedder is not None and _chatbot_collection is not None:
        return
    # Perform imports locally to avoid slowing down application startup.  These
    # libraries are heavy and only needed when the chatbot is used.
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from sentence_transformers import SentenceTransformer
    import chromadb
    from chromadb.config import Settings

    # Ensure the persist directory exists.  Chroma will create it if missing,
    # but explicitly creating it avoids permission errors on some platforms.
    os.makedirs(CHATBOT_DB_DIR, exist_ok=True)

    # Read the raw FAQ text and split into overlapping chunks to improve
    # retrieval granularity.  The chunk size and overlap are tuned to
    # accommodate the relatively small knowledge base.
    with open(CHATBOT_TXT_PATH, encoding='utf-8') as f:
        text = f.read()
    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
    docs = [doc.strip() for doc in splitter.split_text(text)]

    # Load the sentence transformer.  This model is small and runs quickly on
    # CPU.  If you wish to change the model, update the name here.
    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)

    # Initialise Chroma with an on‑disk persistent store.  If the collection
    # already exists and contains all documents, the add operation below will
    # silently merge duplicates.
    client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
    collection = client.get_or_create_collection('chatbot')
    ids = [f'doc_{i}' for i in range(len(docs))]
    try:
        # Attempt to query an existing document to see if the collection is
        # populated.  If this fails, we'll proceed to add all documents.
        existing = collection.get(ids=ids[:1])
        if not existing.get('documents'):
            raise ValueError('No documents in collection')
    except Exception:
        collection.add(documents=docs, embeddings=embeddings, ids=ids)

    _chatbot_embedder = embedder
    _chatbot_collection = collection

def get_chatbot_response(query: str) -> str:
    """Generate a reply to the user's query using the knowledge base and Groq API.

    The function first calls ``init_chatbot()`` to ensure that the embedding
    model and Chroma collection are loaded.  It then embeds the user's query
    and retrieves the top three most relevant context chunks via a nearest
    neighbour search.  These chunks are concatenated and passed to the
    Groq API via the OpenAI client.  The system prompt constrains the model
    to only answer questions about Codingo; for unrelated queries it will
    politely decline to answer.  Any exceptions during the API call are
    propagated to the caller.

    Parameters
    ----------
    query: str
        The user's input message.

    Returns
    -------
    str
        The assistant's reply.
    """
    init_chatbot()
    # Local imports to avoid pulling heavy dependencies on module import.
    import openai
    embedder = _chatbot_embedder
    collection = _chatbot_collection

    query_embedding = embedder.encode([query])[0]
    results = collection.query(query_embeddings=[query_embedding], n_results=3)
    retrieved_docs = results['documents'][0]
    context = "\n".join(retrieved_docs)

    system_prompt = (
        "You are a helpful assistant for the Codingo website. "
        "Only answer questions that are directly relevant to the context provided. "
        "If the user asks anything unrelated, politely refuse by saying: "
        "\"I'm only trained to answer questions about the Codingo platform.\""
    )
    user_prompt = f"Context:\n{context}\n\nQuestion: {query}"

    # Configure the OpenAI client to talk to the Groq API.  The base URL is
    # set here rather than globally to avoid interfering with other parts of
    # the application that might use OpenAI for different providers.
    openai.api_key = GROQ_API_KEY
    openai.api_base = "https://api.groq.com/openai/v1"

    completion = openai.ChatCompletion.create(
        model=GROQ_MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=200,
        temperature=0.3,
    )

    return completion['choices'][0]['message']['content'].strip()
# Initialize Flask app
app = Flask(
    __name__,
    static_folder='backend/static',
    static_url_path='/static',
    template_folder='backend/templates',
    instance_path=safe_instance_path  # ✅ points to writable '/tmp/flask_instance'
)

app.config['SECRET_KEY'] = 'saadi'

# -----------------------------------------------------------------------------
# Cookie configuration for Hugging Face Spaces
#
# When running this app inside an iframe (as is typical on Hugging Face Spaces),
# browsers will drop cookies that have the default SameSite policy of ``Lax``.
# This prevents the Flask session cookie from being stored and means that
# ``login_user()`` will appear to have no effect – the user will be redirected
# back to the home page but remain anonymous. By explicitly setting the
# SameSite policy to ``None`` and enabling the ``Secure`` flag, we allow the
# session and remember cookies to be sent even when the app is embedded in an
# iframe. Without these settings the sign‑up and login flows work locally
# but silently fail in Spaces, causing the "redirect to home page without
# anything" behaviour reported by users.
app.config['SESSION_COOKIE_SAMESITE'] = 'None'
app.config['SESSION_COOKIE_SECURE'] = True
app.config['REMEMBER_COOKIE_SAMESITE'] = 'None'
app.config['REMEMBER_COOKIE_SECURE'] = True

# Configure the database connection
# Use /tmp directory for database in Hugging Face Spaces
# Note: Data will be lost when the space restarts
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:////tmp/codingo.db'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
from flask_wtf.csrf import CSRFProtect

# csrf = CSRFProtect(app)

# Create necessary directories in writable locations
os.makedirs('/tmp/static/audio', exist_ok=True)
os.makedirs('/tmp/temp', exist_ok=True)

# Initialize DB with app
init_db(app)

# Flask-Login setup
login_manager = LoginManager()
login_manager.login_view = 'auth.login'
login_manager.init_app(app)

@login_manager.user_loader
def load_user(user_id):
    return db.session.get(User, int(user_id))

# Register blueprints
app.register_blueprint(auth_bp)
app.register_blueprint(interview_api, url_prefix="/api")

# Routes (keep your existing routes)
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/jobs')
def jobs():
    all_jobs = Job.query.order_by(Job.date_posted.desc()).all()
    return render_template('jobs.html', jobs=all_jobs)

@app.route('/job/<int:job_id>')
def job_detail(job_id):
    job = Job.query.get_or_404(job_id)
    return render_template('job_detail.html', job=job)

@app.route('/apply/<int:job_id>', methods=['GET', 'POST'])
@login_required
def apply(job_id):
    job = Job.query.get_or_404(job_id)
    if request.method == 'POST':
        # Retrieve the uploaded resume file from the request.  The ``name``
        # attribute in the HTML form is ``resume``.
        file = request.files.get('resume')
        # Use our safe upload helper to store the resume.  ``filepath``
        # contains the location where the file was saved so that recruiters
        # can download it later. Resume parsing has been disabled, so
        # ``features`` will always be an empty dictionary.
        features, error, filepath = handle_resume_upload(file)

        # If there was an error saving the resume, notify the user.  We no
        # longer attempt to parse the resume contents, so the manual fields
        # collected below will form the entire feature set.
        if error:
            flash("Resume upload failed. Please try again.", "danger")
            return render_template('apply.html', job=job)

        # Collect the manually entered fields for skills, experience and education.
        # Users can separate entries with commas, semicolons or newlines; we
        # normalise the input into lists of trimmed strings.
        def parse_entries(raw_value: str):
            import re
            entries = []
            if raw_value:
                # Split on commas, semicolons or newlines
                for item in re.split(r'[\n,;]+', raw_value):
                    item = item.strip()
                    if item:
                        entries.append(item)
            return entries

        skills_input = request.form.get('skills', '')
        experience_input = request.form.get('experience', '')
        education_input = request.form.get('education', '')

        manual_features = {
            "skills": parse_entries(skills_input),
            "experience": parse_entries(experience_input),
            "education": parse_entries(education_input)
        }

        # Prepare the application record.  We ignore the empty ``features``
        # returned by ``handle_resume_upload`` and instead persist the
        # manually collected attributes.  The extracted_features column
        # expects a JSON string; json.dumps handles proper serialization.
        application = Application(
            job_id=job_id,
            user_id=current_user.id,
            name=current_user.username,
            email=current_user.email,
            resume_path=filepath,
            extracted_features=json.dumps(manual_features)
        )

        db.session.add(application)
        db.session.commit()

        flash('Your application has been submitted successfully!', 'success')
        return redirect(url_for('jobs'))

    return render_template('apply.html', job=job)

@app.route('/my_applications')
@login_required
def my_applications():
    applications = Application.query.filter_by(
        user_id=current_user.id
    ).order_by(Application.date_applied.desc()).all()
    return render_template('my_applications.html', applications=applications)

# -----------------------------------------------------------------------------
# Chatbot API endpoint
#
# This route receives a JSON payload containing a ``message`` field from the
# front‑end chat widget.  It validates the input, invokes the chatbot
# response function and returns a JSON response.  Any errors are surfaced
# as a 400 or 500 response with an ``error`` message field.
@app.route('/chatbot', methods=['POST'])
def chatbot_endpoint():
    data = request.get_json(silent=True) or {}
    user_input = str(data.get('message', '')).strip()
    if not user_input:
        return jsonify({"error": "Empty message"}), 400
    try:
        reply = get_chatbot_response(user_input)
        return jsonify({"response": reply})
    except Exception as exc:
        # Log the exception to stderr for debugging in the console.  In a
        # production setting you might want to log this to a proper logging
        # facility instead.
        print(f"Chatbot error: {exc}", file=sys.stderr)
        return jsonify({"error": str(exc)}), 500

@app.route('/parse_resume', methods=['POST'])
def parse_resume():
    file = request.files.get('resume')
    features, error, filepath = handle_resume_upload(file)

    # If the upload failed, return an error.  Parsing is no longer
    # supported, so we do not attempt to inspect the resume contents.
    if error:
        return {"error": "Error processing resume. Please try again."}, 400

    # If no features were extracted (the normal case now), respond with
    # empty fields rather than an error.  This preserves the API
    # contract expected by any front‑end code that might call this
    # endpoint.
    if not features:
        return {
            "name": "",
            "email": "",
            "mobile_number": "",
            "skills": [],
            "experience": [],
            "education": [],
            "summary": ""
        }, 200

    # Should features contain values (unlikely in the new implementation),
    # pass them through to the client.
    response = {
        "name": features.get('name', ''),
        "email": features.get('email', ''),
        "mobile_number": features.get('mobile_number', ''),
        "skills": features.get('skills', []),
        "experience": features.get('experience', []),
        "education": features.get('education', []),
        "summary": features.get('summary', '')
    }
    return response, 200

@app.route("/interview/<int:job_id>")
@login_required
def interview_page(job_id):
    job = Job.query.get_or_404(job_id)
    application = Application.query.filter_by(
        user_id=current_user.id, 
        job_id=job_id
    ).first()
    
    if not application or not application.extracted_features:
        flash("Please apply for this job and upload your resume first.", "warning")
        return redirect(url_for('job_detail', job_id=job_id))
    
    cv_data = json.loads(application.extracted_features)
    return render_template("interview.html", job=job, cv=cv_data)


# -----------------------------------------------------------------------------
# Recruiter job posting route
#
# Authenticated users with a recruiter or admin role can access this page to
# create new job listings.  Posted jobs are associated with the current
# recruiter via the ``recruiter_id`` foreign key on the ``Job`` model.
@app.route('/post_job', methods=['GET', 'POST'])
@login_required
def post_job():
    # Only allow recruiters and admins to post jobs
    if current_user.role not in ('recruiter', 'admin'):
        flash('You do not have permission to post jobs.', 'warning')
        return redirect(url_for('jobs'))

    if request.method == 'POST':
        # Extract fields from the form
        role_title = request.form.get('role', '').strip()
        description = request.form.get('description', '').strip()
        seniority = request.form.get('seniority', '').strip()
        skills_input = request.form.get('skills', '').strip()
        company = request.form.get('company', '').strip()

        # Validate required fields
        errors = []
        if not role_title:
            errors.append('Job title is required.')
        if not description:
            errors.append('Job description is required.')
        if not seniority:
            errors.append('Seniority level is required.')
        if not skills_input:
            errors.append('Skills are required.')
        if not company:
            errors.append('Company name is required.')

        if errors:
            for err in errors:
                flash(err, 'danger')
            return render_template('post_job.html')

        # Normalise the skills input into a JSON encoded list.  Users can
        # separate entries with commas, semicolons or newlines.
        skills_list = [s.strip() for s in re.split(r'[\n,;]+', skills_input) if s.strip()]
        skills_json = json.dumps(skills_list)

        # Create and persist the new job
        new_job = Job(
            role=role_title,
            description=description,
            seniority=seniority,
            skills=skills_json,
            company=company,
            recruiter_id=current_user.id
        )
        db.session.add(new_job)
        db.session.commit()

        flash('Job posted successfully!', 'success')
        return redirect(url_for('jobs'))

    # GET request returns the form
    return render_template('post_job.html')


# -----------------------------------------------------------------------------
# Recruiter dashboard route
#
# Displays a list of candidates who applied to jobs posted by the current
# recruiter.  Candidates are sorted by a simple skill match score computed
# against the job requirements.  A placeholder download button is provided
# for future PDF report functionality.
@app.route('/dashboard')
@login_required
def dashboard():
    # Only recruiters and admins can view the dashboard
    if current_user.role not in ('recruiter', 'admin'):
        flash('You do not have permission to access the dashboard.', 'warning')
        return redirect(url_for('index'))

    # Fetch jobs posted by the current recruiter
    posted_jobs = Job.query.filter_by(recruiter_id=current_user.id).all()
    job_ids = [job.id for job in posted_jobs]

    candidates_with_scores = []
    if job_ids:
        # Fetch applications associated with these job IDs
        candidate_apps = Application.query.filter(Application.job_id.in_(job_ids)).all()

        # Helper to compute a match score based on skills overlap
        def compute_score(application):
            try:
                # Extract candidate skills from stored JSON
                candidate_features = json.loads(application.extracted_features) if application.extracted_features else {}
                candidate_skills = candidate_features.get('skills', [])
                # Retrieve the job's required skills and parse from JSON
                job_skills = json.loads(application.job.skills) if application.job and application.job.skills else []
                if not job_skills:
                    return ('Medium', 2)  # Default when job specifies no skills

                # Compute case‑insensitive intersection
                candidate_set = {s.lower() for s in candidate_skills}
                job_set = {s.lower() for s in job_skills}
                common = candidate_set & job_set
                ratio = len(common) / len(job_set) if job_set else 0

                # Map ratio to qualitative score
                if ratio >= 0.75:
                    return ('Excellent', 4)
                elif ratio >= 0.5:
                    return ('Good', 3)
                elif ratio >= 0.25:
                    return ('Medium', 2)
                else:
                    return ('Poor', 1)
            except Exception:
                return ('Medium', 2)

        # Build a list of candidate applications with computed scores
        for app_record in candidate_apps:
            score_label, score_value = compute_score(app_record)
            candidates_with_scores.append({
                'application': app_record,
                'score_label': score_label,
                'score_value': score_value
            })

        # Sort candidates from highest to lowest score
        candidates_with_scores.sort(key=lambda item: item['score_value'], reverse=True)

    return render_template('dashboard.html', candidates=candidates_with_scores)

if __name__ == '__main__':
    print("Starting Codingo application...")
    with app.app_context():
        db.create_all()
    
    # Use port from environment or default to 7860
    port = int(os.environ.get('PORT', 7860))
    app.run(debug=True, host='0.0.0.0', port=port)