Tao Wu commited on
Commit
d0208b3
1 Parent(s): f08f602

update metadata

Browse files
Files changed (3) hide show
  1. app/app.py +2 -2
  2. app/data_process.py +2 -0
  3. app/embedding_setup.py +2 -8
app/app.py CHANGED
@@ -13,7 +13,7 @@ with open('/app/data/redis_data.json', 'r') as file:
13
 
14
  skill_details_mapping = {}
15
 
16
- df_course = pd.read_csv('/app/data/all_course_info.csv')
17
  # Function to retrieve documents based on selected skills
18
  def retrieve_documents(occupation,skills):
19
  output = []
@@ -36,7 +36,7 @@ def retrieve_documents(occupation,skills):
36
  print(query)
37
  docs = retriever.get_relevant_documents(query)
38
 
39
- partial_compare_docs = functools.partial(compare_docs_with_context, df_course=df_course, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
40
  sorted_docs = sorted(docs, key=functools.cmp_to_key(partial_compare_docs), reverse=True)
41
 
42
 
 
13
 
14
  skill_details_mapping = {}
15
 
16
+
17
  # Function to retrieve documents based on selected skills
18
  def retrieve_documents(occupation,skills):
19
  output = []
 
36
  print(query)
37
  docs = retriever.get_relevant_documents(query)
38
 
39
+ partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
40
  sorted_docs = sorted(docs, key=functools.cmp_to_key(partial_compare_docs), reverse=True)
41
 
42
 
app/data_process.py CHANGED
@@ -35,11 +35,13 @@ def build_occupation_query(occupation):
35
  occupation_name_de = occupation['preferredLabel'].get('de','')
36
  occupation_dsp = occupation['description'].get('de','').get('literal','')
37
  occupation_query = occupation_name_de +" " + occupation['preferredLabel'].get('en','')+" "+ occupation['description'].get('de','').get('literal','') + " "+ occupation_dsp
 
38
  if occupation['_links']['broaderIscoGroup']:
39
  for group in occupation['_links']['broaderIscoGroup']:
40
  occupation_query += " " + group['title']
41
  else:
42
  pass
 
43
  return occupation_name_de,occupation_dsp,occupation_query
44
 
45
  # Get occupations from a CSV
 
35
  occupation_name_de = occupation['preferredLabel'].get('de','')
36
  occupation_dsp = occupation['description'].get('de','').get('literal','')
37
  occupation_query = occupation_name_de +" " + occupation['preferredLabel'].get('en','')+" "+ occupation['description'].get('de','').get('literal','') + " "+ occupation_dsp
38
+ '''
39
  if occupation['_links']['broaderIscoGroup']:
40
  for group in occupation['_links']['broaderIscoGroup']:
41
  occupation_query += " " + group['title']
42
  else:
43
  pass
44
+ '''
45
  return occupation_name_de,occupation_dsp,occupation_query
46
 
47
  # Get occupations from a CSV
app/embedding_setup.py CHANGED
@@ -126,15 +126,9 @@ def evaluate(
126
  output = [_.split('Response:\n')[-1] for _ in output]
127
  return output, logits.tolist()
128
 
129
- def compare_docs_with_context(doc_a, doc_b, df_course, target_occupation_name, target_occupation_dsp,skill_gap):
130
- # Extract course details from the data frame
131
- course_a = df_course[df_course['course_id'] == int(doc_a.metadata['id'])].iloc[0]
132
- course_b = df_course[df_course['course_id'] == int(doc_b.metadata['id'])].iloc[0]
133
- print('comapring...')
134
- print(course_a['course_name'], course_b['course_name'])
135
- # Prepare the input for chain_re.invoke
136
 
137
- courses = f"First: name: {course_a['course_name']} description:{course_a['course_content_limited']} Second: name: {course_b['course_name']} description:{course_b['course_content_limited']}"
138
  #courses = f"First: name: {course_a['course_name']} skills:{course_a['course_skills_edu']} Second: name: {course_b['course_name']} skills:{course_b['course_skills_edu']}"
139
  target_occupation = f"name: {target_occupation_name} description: {target_occupation_dsp}"
140
  skill_gap = skill_gap
 
126
  output = [_.split('Response:\n')[-1] for _ in output]
127
  return output, logits.tolist()
128
 
129
+ def compare_docs_with_context(doc_a, doc_b, target_occupation_name, target_occupation_dsp,skill_gap):
 
 
 
 
 
 
130
 
131
+ courses = f"First: name: {doc_a.metadata['name']} description:{doc_a.metadata['description']} Second: name: {doc_b.metadata['name']} description:{doc_b.metadata['description']}"
132
  #courses = f"First: name: {course_a['course_name']} skills:{course_a['course_skills_edu']} Second: name: {course_b['course_name']} skills:{course_b['course_skills_edu']}"
133
  target_occupation = f"name: {target_occupation_name} description: {target_occupation_dsp}"
134
  skill_gap = skill_gap