Spaces:
Runtime error
Runtime error
import boto3 | |
import os | |
import json | |
import pandas as pd | |
from urllib.parse import urlparse | |
import random | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.chains import LLMChain, SequentialChain | |
llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"]) | |
def generate_skills() -> list: | |
template_generate_skills = """ | |
Can you generate me a list of skills you would need to be successfully employed in a Data Scientist role? | |
Return 10 skills as a JSON list. | |
""" | |
prompt_generate_skills = ChatPromptTemplate.from_template( | |
template=template_generate_skills | |
) | |
role_skills = LLMChain( | |
llm=llm, prompt=prompt_generate_skills, output_key="role_skills" | |
) | |
generate_skills_chain = SequentialChain( | |
chains=[role_skills], | |
input_variables=[], | |
output_variables=["role_skills"], | |
verbose=False, | |
) | |
result = generate_skills_chain({}) | |
result_array = json.loads(result["role_skills"])["skills"] | |
return result_array | |
def generate_resume(skills: list) -> str: | |
template_generate_resume = """ | |
Given the following list of skills as an array delimited by three backticks, generate a resume of a data scientist with 3 years of experience. | |
Make sure to include a section "skills" in the resume. | |
``` | |
{skills} | |
``` | |
""" | |
prompt_generate_resume = ChatPromptTemplate.from_template( | |
template=template_generate_resume | |
) | |
resume = LLMChain(llm=llm, prompt=prompt_generate_resume, output_key="resume") | |
generate_resume_chain = SequentialChain( | |
chains=[resume], | |
input_variables=["skills"], | |
output_variables=["resume"], | |
verbose=False, | |
) | |
result = generate_resume_chain({"skills": skills}) | |
return result | |
def retrieve_skills(resume: str) -> str: | |
template_retrieve_skills = """ | |
Given the following resume delimited by three backticks, retrieve the skills this data scientist possesses. | |
Return them as a JSON list. | |
``` | |
{resume} | |
``` | |
""" | |
prompt_retrieve_skills = ChatPromptTemplate.from_template( | |
template=template_retrieve_skills | |
) | |
skills = LLMChain(llm=llm, prompt=prompt_retrieve_skills, output_key="skills") | |
retrieve_skills_chain = SequentialChain( | |
chains=[skills], | |
input_variables=["resume"], | |
output_variables=["skills"], | |
verbose=False, | |
) | |
result = retrieve_skills_chain({"resume": resume}) | |
result_array = json.loads(result["skills"]) | |
return result_array | |
def get_score(true_values: list, predicted_values: list) -> float: | |
intersection_list = [value for value in predicted_values if value in true_values] | |
print(intersection_list) | |
return len(intersection_list) / len(true_values) | |
if __name__ == "__main__": | |
role_skills = generate_skills() | |
random_skills = random.sample(role_skills, 3) | |
resume = generate_resume(random_skills) | |
skills = retrieve_skills(resume) | |
score = get_score(random_skills, skills) | |
print(random_skills) | |
print(skills) | |
print(score) | |
# def get_resumes() -> str: | |
# s3 = boto3.client( | |
# 's3', | |
# region_name='eu-west-1' | |
# ) | |
# resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv') | |
# resumes_list = resumes['Body'].read().decode('utf-8').splitlines() | |
# resumes_list = resumes['Body'].read().decode('utf-8').splitlines() | |
# resumes_list = str(resumes_list).replace('. ', '.\n') | |
# resumes_list = str(resumes_list).replace('â¢', '\n - ') | |
# resumes_list = [s.replace('. ', '.\n') for s in resumes_list] | |
# resumes_list = [s.replace('â¢', '\n - ') for s in resumes_list] | |
# resume_string =''.join(resumes_list) | |
# s3_uri = urlparse("s3://ausy-datalake-drift-nonprod/resume-matcher/raw/resume-dataset.csv", allow_fragments=False).geturl() | |
# resumes_list = pd.read_csv(s3_uri, header=None, encoding='utf-8')[0].tolist() | |
# return resumes_list | |
# def get_skills(resumes: str) -> list: | |
# template_resumes_get_skills = """ | |
# Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain. | |
# For each domain list the skills of the resumes that are part of that domain. | |
# Create a JSON object where they keys are the domains and the values are a list containing the skills. | |
# Return that JSON object only. | |
# <RESUMES> | |
# {resumes} | |
# </RESUMES> | |
# """ | |
# prompt_vacancy_get_skills = ChatPromptTemplate.from_template(template=template_resumes_get_skills) | |
# resume_skills = LLMChain(llm=llm, prompt=prompt_vacancy_get_skills, output_key="resume_skills") | |
# get_skills_resumes_chain = SequentialChain( | |
# chains=[resume_skills], | |
# input_variables=["resumes"], | |
# output_variables=["resume_skills"], | |
# verbose=False | |
# ) | |
# result = get_skills_resumes_chain({"resumes": resumes}) | |
# # print(result) | |
# resume_skills = json.loads(result['resume_skills']) | |
# print(resume_skills) | |
# if __name__ == "__main__": | |
# resumes = get_resumes() | |
# print(resumes) | |
# for x in resumes: | |
# get_skills(x) | |