FilipinosRich commited on
Commit
5f111f9
1 Parent(s): e481d34
Files changed (1) hide show
  1. test.py +35 -28
test.py CHANGED
@@ -12,31 +12,32 @@ from langchain.chains import LLMChain, SequentialChain
12
  llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])
13
 
14
 
15
-
16
-
17
  def generate_skills() -> list:
18
-
19
  template_generate_skills = """
20
  Can you generate me a list of skills you would need to be successfully employed in a Data Scientist role?
21
  Return 10 skills as a JSON list.
22
  """
23
 
24
- prompt_generate_skills = ChatPromptTemplate.from_template(template=template_generate_skills)
25
- role_skills = LLMChain(llm=llm, prompt=prompt_generate_skills, output_key="role_skills")
 
 
 
 
26
 
27
  generate_skills_chain = SequentialChain(
28
  chains=[role_skills],
29
  input_variables=[],
30
  output_variables=["role_skills"],
31
- verbose=False
32
  )
33
 
34
  result = generate_skills_chain({})
35
  result_array = json.loads(result["role_skills"])["skills"]
36
  return result_array
37
 
38
- def generate_resume(skills: list) -> str:
39
 
 
40
  template_generate_resume = """
41
  Given the following list of skills as an array delimited by three backticks, generate a resume of a data scientist with 3 years of experience.
42
  Make sure to include a section "skills" in the resume.
@@ -46,22 +47,24 @@ def generate_resume(skills: list) -> str:
46
  ```
47
  """
48
 
49
- prompt_generate_resume = ChatPromptTemplate.from_template(template=template_generate_resume)
 
 
50
  resume = LLMChain(llm=llm, prompt=prompt_generate_resume, output_key="resume")
51
 
52
  generate_resume_chain = SequentialChain(
53
  chains=[resume],
54
  input_variables=["skills"],
55
  output_variables=["resume"],
56
- verbose=False
57
  )
58
 
59
  result = generate_resume_chain({"skills": skills})
60
 
61
  return result
62
 
 
63
  def retrieve_skills(resume: str) -> str:
64
-
65
  template_retrieve_skills = """
66
  Given the following resume delimited by three backticks, retrieve the skills this data scientist possesses.
67
  Return them as a JSON list.
@@ -71,14 +74,16 @@ def retrieve_skills(resume: str) -> str:
71
  ```
72
  """
73
 
74
- prompt_retrieve_skills = ChatPromptTemplate.from_template(template=template_retrieve_skills)
 
 
75
  skills = LLMChain(llm=llm, prompt=prompt_retrieve_skills, output_key="skills")
76
 
77
  retrieve_skills_chain = SequentialChain(
78
  chains=[skills],
79
  input_variables=["resume"],
80
  output_variables=["skills"],
81
- verbose=False
82
  )
83
 
84
  result = retrieve_skills_chain({"resume": resume})
@@ -86,10 +91,12 @@ def retrieve_skills(resume: str) -> str:
86
 
87
  return result_array
88
 
89
- def get_score(true_values:list, predicted_values:list) -> float:
 
90
  intersection_list = [value for value in predicted_values if value in true_values]
91
  print(intersection_list)
92
- return len(intersection_list)/len(true_values)
 
93
 
94
  if __name__ == "__main__":
95
  role_skills = generate_skills()
@@ -107,16 +114,16 @@ if __name__ == "__main__":
107
  # 's3',
108
  # region_name='eu-west-1'
109
  # )
110
-
111
- # resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')
112
-
113
- # resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
114
- # resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
115
- # resumes_list = str(resumes_list).replace('. ', '.\n')
116
- # resumes_list = str(resumes_list).replace('•', '\n - ')
117
- # resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
118
- # resumes_list = [s.replace('•', '\n - ') for s in resumes_list]
119
- # resume_string =''.join(resumes_list)
120
  # s3_uri = urlparse("s3://ausy-datalake-drift-nonprod/resume-matcher/raw/resume-dataset.csv", allow_fragments=False).geturl()
121
  # resumes_list = pd.read_csv(s3_uri, header=None, encoding='utf-8')[0].tolist()
122
 
@@ -125,9 +132,9 @@ if __name__ == "__main__":
125
  # def get_skills(resumes: str) -> list:
126
 
127
  # template_resumes_get_skills = """
128
- # Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain.
129
  # For each domain list the skills of the resumes that are part of that domain.
130
-
131
  # Create a JSON object where they keys are the domains and the values are a list containing the skills.
132
 
133
  # Return that JSON object only.
@@ -155,5 +162,5 @@ if __name__ == "__main__":
155
  # if __name__ == "__main__":
156
  # resumes = get_resumes()
157
  # print(resumes)
158
- # for x in resumes:
159
- # get_skills(x)
 
12
  llm = ChatOpenAI(temperature=0.0, openai_api_key=os.environ["OPENAI"])
13
 
14
 
 
 
15
  def generate_skills() -> list:
 
16
  template_generate_skills = """
17
  Can you generate me a list of skills you would need to be successfully employed in a Data Scientist role?
18
  Return 10 skills as a JSON list.
19
  """
20
 
21
+ prompt_generate_skills = ChatPromptTemplate.from_template(
22
+ template=template_generate_skills
23
+ )
24
+ role_skills = LLMChain(
25
+ llm=llm, prompt=prompt_generate_skills, output_key="role_skills"
26
+ )
27
 
28
  generate_skills_chain = SequentialChain(
29
  chains=[role_skills],
30
  input_variables=[],
31
  output_variables=["role_skills"],
32
+ verbose=False,
33
  )
34
 
35
  result = generate_skills_chain({})
36
  result_array = json.loads(result["role_skills"])["skills"]
37
  return result_array
38
 
 
39
 
40
+ def generate_resume(skills: list) -> str:
41
  template_generate_resume = """
42
  Given the following list of skills as an array delimited by three backticks, generate a resume of a data scientist with 3 years of experience.
43
  Make sure to include a section "skills" in the resume.
 
47
  ```
48
  """
49
 
50
+ prompt_generate_resume = ChatPromptTemplate.from_template(
51
+ template=template_generate_resume
52
+ )
53
  resume = LLMChain(llm=llm, prompt=prompt_generate_resume, output_key="resume")
54
 
55
  generate_resume_chain = SequentialChain(
56
  chains=[resume],
57
  input_variables=["skills"],
58
  output_variables=["resume"],
59
+ verbose=False,
60
  )
61
 
62
  result = generate_resume_chain({"skills": skills})
63
 
64
  return result
65
 
66
+
67
  def retrieve_skills(resume: str) -> str:
 
68
  template_retrieve_skills = """
69
  Given the following resume delimited by three backticks, retrieve the skills this data scientist possesses.
70
  Return them as a JSON list.
 
74
  ```
75
  """
76
 
77
+ prompt_retrieve_skills = ChatPromptTemplate.from_template(
78
+ template=template_retrieve_skills
79
+ )
80
  skills = LLMChain(llm=llm, prompt=prompt_retrieve_skills, output_key="skills")
81
 
82
  retrieve_skills_chain = SequentialChain(
83
  chains=[skills],
84
  input_variables=["resume"],
85
  output_variables=["skills"],
86
+ verbose=False,
87
  )
88
 
89
  result = retrieve_skills_chain({"resume": resume})
 
91
 
92
  return result_array
93
 
94
+
95
+ def get_score(true_values: list, predicted_values: list) -> float:
96
  intersection_list = [value for value in predicted_values if value in true_values]
97
  print(intersection_list)
98
+ return len(intersection_list) / len(true_values)
99
+
100
 
101
  if __name__ == "__main__":
102
  role_skills = generate_skills()
 
114
  # 's3',
115
  # region_name='eu-west-1'
116
  # )
117
+
118
+ # resumes = s3.get_object(Bucket='ausy-datalake-drift-nonprod', Key='resume-matcher/raw/resume-dataset.csv')
119
+
120
+ # resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
121
+ # resumes_list = resumes['Body'].read().decode('utf-8').splitlines()
122
+ # resumes_list = str(resumes_list).replace('. ', '.\n')
123
+ # resumes_list = str(resumes_list).replace('•', '\n - ')
124
+ # resumes_list = [s.replace('. ', '.\n') for s in resumes_list]
125
+ # resumes_list = [s.replace('•', '\n - ') for s in resumes_list]
126
+ # resume_string =''.join(resumes_list)
127
  # s3_uri = urlparse("s3://ausy-datalake-drift-nonprod/resume-matcher/raw/resume-dataset.csv", allow_fragments=False).geturl()
128
  # resumes_list = pd.read_csv(s3_uri, header=None, encoding='utf-8')[0].tolist()
129
 
 
132
  # def get_skills(resumes: str) -> list:
133
 
134
  # template_resumes_get_skills = """
135
+ # Given the following string, delimited by <RESUMES> and </RESUMES> which contains resumes which are not properly formatted, categorize the resumes based on domain.
136
  # For each domain list the skills of the resumes that are part of that domain.
137
+
138
  # Create a JSON object where they keys are the domains and the values are a list containing the skills.
139
 
140
  # Return that JSON object only.
 
162
  # if __name__ == "__main__":
163
  # resumes = get_resumes()
164
  # print(resumes)
165
+ # for x in resumes:
166
+ # get_skills(x)