Tao Wu commited on
Commit
e789d9b
1 Parent(s): ffd107a

add skills query

Browse files
Files changed (1) hide show
  1. app/app.py +15 -1
app/app.py CHANGED
@@ -20,6 +20,7 @@ def retrieve_documents(occupation,skills):
20
  output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
21
  oc_uri = occupations.get(occupation, "")
22
  skill_query = ''
 
23
  if isinstance(oc_uri, int):
24
  df = pd.read_csv("/app/data/berufe_info.csv")
25
  target_occupation = df[df['id'] == oc_uri]
@@ -32,13 +33,26 @@ def retrieve_documents(occupation,skills):
32
  target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
33
  for german_label in skills:
34
  skill_query += german_label + ' '
 
 
35
  query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
36
  llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
37
  print(query)
38
  docs = retriever.get_relevant_documents(query)
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
41
- sorted_docs = sorted(docs, key=functools.cmp_to_key(partial_compare_docs), reverse=True)
42
 
43
 
44
  batch_prompts = []
 
20
  output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
21
  oc_uri = occupations.get(occupation, "")
22
  skill_query = ''
23
+ candidate_doc = []
24
  if isinstance(oc_uri, int):
25
  df = pd.read_csv("/app/data/berufe_info.csv")
26
  target_occupation = df[df['id'] == oc_uri]
 
33
  target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
34
  for german_label in skills:
35
  skill_query += german_label + ' '
36
+ skills_doc = retriever.get_relevant_documents(german_label)
37
+ candidate_doc.extend(skills_doc[:2])
38
  query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
39
  llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
40
  print(query)
41
  docs = retriever.get_relevant_documents(query)
42
+ candidate_doc.extend(docs[:5])
43
+
44
+ #remove duplicates
45
+ seen_course_ids = set()
46
+ candidate_doc_unique = []
47
+
48
+ for doc in candidate_doc:
49
+ course_id = doc['metadata'].get('course_id')
50
+ if course_id not in seen_course_ids:
51
+ candidate_doc_unique.append(doc)
52
+ seen_course_ids.add(course_id)
53
 
54
  partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
55
+ sorted_docs = sorted(candidate_doc, key=functools.cmp_to_key(partial_compare_docs), reverse=True)
56
 
57
 
58
  batch_prompts = []