Spaces:
Paused
Paused
Tao Wu
commited on
Commit
•
e789d9b
1
Parent(s):
ffd107a
add skills query
Browse files- app/app.py +15 -1
app/app.py
CHANGED
@@ -20,6 +20,7 @@ def retrieve_documents(occupation,skills):
|
|
20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
21 |
oc_uri = occupations.get(occupation, "")
|
22 |
skill_query = ''
|
|
|
23 |
if isinstance(oc_uri, int):
|
24 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
25 |
target_occupation = df[df['id'] == oc_uri]
|
@@ -32,13 +33,26 @@ def retrieve_documents(occupation,skills):
|
|
32 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
33 |
for german_label in skills:
|
34 |
skill_query += german_label + ' '
|
|
|
|
|
35 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
36 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
37 |
print(query)
|
38 |
docs = retriever.get_relevant_documents(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
|
41 |
-
sorted_docs = sorted(
|
42 |
|
43 |
|
44 |
batch_prompts = []
|
|
|
20 |
output.append(f"<div style=\"text-align: center; font-size: 24px;\">Empfehlungsergebnisse:</div>")
|
21 |
oc_uri = occupations.get(occupation, "")
|
22 |
skill_query = ''
|
23 |
+
candidate_doc = []
|
24 |
if isinstance(oc_uri, int):
|
25 |
df = pd.read_csv("/app/data/berufe_info.csv")
|
26 |
target_occupation = df[df['id'] == oc_uri]
|
|
|
33 |
target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation)
|
34 |
for german_label in skills:
|
35 |
skill_query += german_label + ' '
|
36 |
+
skills_doc = retriever.get_relevant_documents(german_label)
|
37 |
+
candidate_doc.extend(skills_doc[:2])
|
38 |
query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query
|
39 |
llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query
|
40 |
print(query)
|
41 |
docs = retriever.get_relevant_documents(query)
|
42 |
+
candidate_doc.extend(docs[:5])
|
43 |
+
|
44 |
+
#remove duplicates
|
45 |
+
seen_course_ids = set()
|
46 |
+
candidate_doc_unique = []
|
47 |
+
|
48 |
+
for doc in candidate_doc:
|
49 |
+
course_id = doc['metadata'].get('course_id')
|
50 |
+
if course_id not in seen_course_ids:
|
51 |
+
candidate_doc_unique.append(doc)
|
52 |
+
seen_course_ids.add(course_id)
|
53 |
|
54 |
partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query)
|
55 |
+
sorted_docs = sorted(candidate_doc, key=functools.cmp_to_key(partial_compare_docs), reverse=True)
|
56 |
|
57 |
|
58 |
batch_prompts = []
|