Tao Wu commited on
Commit
6fc2fca
1 Parent(s): a5dc95f

add skills query

Browse files
Files changed (1) hide show
  1. app/app.py +6 -6
app/app.py CHANGED
@@ -20,7 +20,7 @@ def retrieve_documents(occupation,skills):
20
  output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
21
  oc_uri = occupations.get(occupation, "")
22
  skill_query = ''
23
- candidate_doc = []
24
  if isinstance(oc_uri, int):
25
  df = pd.read_csv("/app/data/berufe_info.csv")
26
  target_occupation = df[df['id'] == oc_uri]
@@ -33,20 +33,20 @@ def retrieve_documents(occupation,skills):
33
  target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
34
  for german_label in skills:
35
  skill_query += german_label + ' '
36
- skills_doc = retriever.get_relevant_documents(german_label)
37
- candidate_doc.extend(skills_doc[:2])
38
  query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
39
  llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
40
  print(query)
41
  docs = retriever.get_relevant_documents(query)
42
- candidate_doc.extend(docs[:5])
43
 
44
  #remove duplicates
45
  seen_course_ids = set()
46
  candidate_doc_unique = []
47
 
48
- for doc in candidate_doc:
49
- course_id = doc.metadata.get('course_id','')
50
  if course_id not in seen_course_ids:
51
  candidate_doc_unique.append(doc)
52
  seen_course_ids.add(course_id)
 
20
  output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
21
  oc_uri = occupations.get(occupation, "")
22
  skill_query = ''
23
+ candidate_docs = []
24
  if isinstance(oc_uri, int):
25
  df = pd.read_csv("/app/data/berufe_info.csv")
26
  target_occupation = df[df['id'] == oc_uri]
 
33
  target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
34
  for german_label in skills:
35
  skill_query += german_label + ' '
36
+ skills_docs = retriever.get_relevant_documents(german_label)
37
+ candidate_docs.extend(skills_docs[:2])
38
  query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
39
  llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
40
  print(query)
41
  docs = retriever.get_relevant_documents(query)
42
+ candidate_docs.extend(docs[:5])
43
 
44
  #remove duplicates
45
  seen_course_ids = set()
46
  candidate_doc_unique = []
47
 
48
+ for doc in candidate_docs:
49
+ course_id = doc.metadata.get('id','')
50
  if course_id not in seen_course_ids:
51
  candidate_doc_unique.append(doc)
52
  seen_course_ids.add(course_id)