Spaces:

drift-ai
/

recruiter-assistant

Runtime error

File size: 5,265 Bytes

import boto3
import os
import json
import pandas as pd
from urllib.parse import urlparse
import random

from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain, SequentialChain

llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])


def generate_skills() -> list:
    template_generate_skills = """
    Can you generate me a list of skills you would need to be successfully employed in a Data Scientist role?
    Return 10 skills as a JSON list.
    """

    prompt_generate_skills = ChatPromptTemplate.from_template(
        template=template_generate_skills
    )
    role_skills = LLMChain(
        llm=llm, prompt=prompt_generate_skills, output_key="role_skills"
    )

    generate_skills_chain = SequentialChain(
        chains=[role_skills],
        input_variables=[],
        output_variables=["role_skills"],
        verbose=False,
    )

    result = generate_skills_chain({})
    result_array = json.loads(result["role_skills"])["skills"]
    return result_array


def generate_resume(skills: list) -> str:
    template_generate_resume = """
    Given the following list of skills as an array delimited by three backticks, generate a resume of a data scientist with 3 years of experience.
    Make sure to include a section "skills" in the resume.

    ```
    {skills}
    ```
    """

    prompt_generate_resume = ChatPromptTemplate.from_template(
        template=template_generate_resume
    )
    resume = LLMChain(llm=llm, prompt=prompt_generate_resume, output_key="resume")

    generate_resume_chain = SequentialChain(
        chains=[resume],
        input_variables=["skills"],
        output_variables=["resume"],
        verbose=False,
    )

    result = generate_resume_chain({"skills": skills})

    return result


def retrieve_skills(resume: str) -> str:
    template_retrieve_skills = """
    Given the following resume delimited by three backticks, retrieve the skills this data scientist possesses.
    Return them as a JSON list.

    ```
    {resume}
    ```
    """

    prompt_retrieve_skills = ChatPromptTemplate.from_template(
        template=template_retrieve_skills
    )
    skills = LLMChain(llm=llm, prompt=prompt_retrieve_skills, output_key="skills")

    retrieve_skills_chain = SequentialChain(
        chains=[skills],
        input_variables=["resume"],
        output_variables=["skills"],
        verbose=False,
    )

    result = retrieve_skills_chain({"resume": resume})
    result_array = json.loads(result["skills"])

    return result_array


def get_score(true_values: list, predicted_values: list) -> float:
    intersection_list = [value for value in predicted_values if value in true_values]
    print(intersection_list)
    return len(intersection_list) / len(true_values)


if __name__ == "__main__":
    role_skills = generate_skills()
    random_skills = random.sample(role_skills, 3)
    resume = generate_resume(random_skills)
    skills = retrieve_skills(resume)
    score = get_score(random_skills, skills)
    print(random_skills)
    print(skills)
    print(score)

# def get_resumes() -> str:

#     s3 = boto3.client(
#         's3',
#         region_name='eu-west-1'
#     )

# resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')

# resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
# resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
# resumes_list = str(resumes_list).replace('. ', '.\n')
# resumes_list = str(resumes_list).replace('â¢', '\n - ')
# resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
# resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list]
# resume_string =''.join(resumes_list)
#     s3_uri = urlparse("s3://ausy-datalake-drift-nonprod/resume-matcher/raw/resume-dataset.csv", allow_fragments=False).geturl()
#     resumes_list = pd.read_csv(s3_uri, header=None, encoding='utf-8')[0].tolist()

#     return resumes_list

# def get_skills(resumes: str) -> list:

#     template_resumes_get_skills = """
#     Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain.
#     For each domain list the skills of the resumes that are part of that domain.

#     Create a JSON object where they keys are the domains and the values are a list containing the skills.

#     Return that JSON object only.

#     <RESUMES>
#     {resumes}
#     </RESUMES>
#     """

#     prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills)
#     resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills")

#     get_skills_resumes_chain = SequentialChain(
#         chains=[resume_skills],
#         input_variables=["resumes"],
#         output_variables=["resume_skills"],
#         verbose=False
#     )

#     result = get_skills_resumes_chain({"resumes": resumes})
#     # print(result)
#     resume_skills = json.loads(result['resume_skills'])
#     print(resume_skills)

# if __name__ == "__main__":
#     resumes = get_resumes()
#     print(resumes)
# for x in resumes:
#     get_skills(x)