import gradio as gr import pandas as pd import redis import json import requests from config import * import functools from embedding_setup import retriever, find_similar_occupation, compare_docs_with_context,generate_exp,generate_prompt_exp from data_process import get_occupations_from_csv, get_courses_from_BA, get_occupation_detial, build_occupation_query with open('/app/data/redis_data.json', 'r') as file: data_dict = json.load(file) #r = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB, decode_responses=True) skill_details_mapping = {} # Function to retrieve documents based on selected skills def retrieve_documents(occupation,skills): output = [] output.append(f"
Empfehlungsergebnisse:
") oc_uri = occupations.get(occupation, "") skill_query = '' candidate_docs = [] if isinstance(oc_uri, int): df = pd.read_csv("/app/data/berufe_info.csv") target_occupation = df[df['id'] == oc_uri] target_occupation_name = target_occupation['short name'].values[0] target_occupation_dsp = target_occupation['description'].values[0] target_occupation_query = target_occupation_name + ' ' + target_occupation_dsp target_occupation_query = target_occupation_query else: target_occupation = get_occupation_detial(oc_uri) target_occupation_name, target_occupation_dsp, target_occupation_query = build_occupation_query(target_occupation) for german_label in skills: skill_query += german_label + ' ' ocsk_query = target_occupation_name + ' ' + german_label skills_docs = retriever.get_relevant_documents(german_label) candidate_docs.extend(skills_docs[:2]) query = 'target occupation: ' + target_occupation_query + ' Skills gap:' + skill_query llama_query = 'info:' + target_occupation_name + ' ' + 'Skills gap:' + skill_query print(query) docs = retriever.get_relevant_documents(query) candidate_docs.extend(docs[:5]) #remove duplicates seen_course_ids = set() candidate_doc_unique = [] for doc in candidate_docs: course_id = doc.metadata.get('id','') if course_id not in seen_course_ids: candidate_doc_unique.append(doc) seen_course_ids.add(course_id) partial_compare_docs = functools.partial(compare_docs_with_context, target_occupation_name=target_occupation_name, target_occupation_dsp=target_occupation_dsp,skill_gap = skill_query) sorted_docs = sorted(candidate_doc_unique, key=functools.cmp_to_key(partial_compare_docs), reverse=True) batch_prompts = [] for doc in sorted_docs[:5]: doc_name = doc.metadata.get('name', 'Unnamed Document') doc_skill = doc.metadata.get('skills', '') input_text = f"target occupation: {llama_query}\n Recommended course: name: {doc_name}, learning objectives: {doc_skill[:2000]}" prompt = generate_prompt_exp(input_text) batch_prompts.append(prompt) # Evaluate the current batch of prompts batch_output = generate_exp(batch_prompts) output.append(f"Zielberuf: {target_occupation_name}") output.append(f"Qualifikationslücke: {skill_query}") output.append(f"Empfohlene Kurse:") for i in range(5): doc = sorted_docs[i] doc_name = doc.metadata.get('name', 'Unnamed Document') doc_url = doc.metadata.get('url', '#') doc_skill = doc.metadata.get('skills', '') output.append(f"{doc_name}") output.append(f"Empfehlungsgrund: {batch_output[i]}") output.append(f"
") return "
".join(output) def get_candidate_courses(occupation, skills): output = [] output.append(f"
Empfehlungsergebnisse:
") df_lookup = pd.read_csv('/app/data/kldb_isco_lookup.csv') df_berufe = pd.read_csv('/app/data/berufe_info.csv') occupation_codes = set() kldB_set = set() occupation_hrefs = set() BA_berufe = set() oc_uri = occupations.get(occupation, "") target_occupation = get_occupation_detial(oc_uri) target_occupation_query = build_occupation_query(target_occupation) for german_label in skills: skill = skill_details_mapping.get(german_label, {}) uri = f'https://ec.europa.eu/esco/api/resource/skill?selectedVersion=v1.0.9&language=en&uri={skill["uri"]}' try: skill_response = requests.get(uri) skill_response.raise_for_status() skill_json = skill_response.json() # Combine essential and optional occupations skill_related_occupations = (skill_json['_links'].get('isEssentialForOccupation', []) + skill_json['_links'].get('isOptionalForOccupation', [])) for occupation in skill_related_occupations: href = occupation.get('href') if href: occupation_hrefs.add(href) except requests.RequestException as e: print(f"Error while fetching skill details: {e}") for href in occupation_hrefs: try: occupation_response = requests.get(href) occupation_response.raise_for_status() occupation_details = occupation_response.json() code = occupation_details.get('code') if code: occupation_codes.add(code.split('.')[0]) except requests.RequestException as e: print(f"Error while fetching occupation details: {e}") for isco_code in occupation_codes: kldB_codes = df_lookup[df_lookup['isco08'] == int(isco_code)]['kldb2010'].values for code in kldB_codes: kldB_set.add(str(code)) dfs = [] for kldb in kldB_set: berufe = df_berufe[df_berufe['KldB codes']=='B '+kldb] dfs.append(berufe) merged_df = pd.concat(dfs, ignore_index=True) top_k_berufe = find_similar_occupation(target_occupation_query,merged_df,5,'cosine') for beruf in top_k_berufe: entry_requirement = beruf.metadata['entry_requirements'] corrected_json_string = entry_requirement.replace("'", '"') entry_requirement_json = json.loads(corrected_json_string) for js in entry_requirement_json: BA_berufe.add(str(js['data_idref'])) result = get_courses_from_BA(BA_berufe) courses = result for course in courses['_embedded']['termine']: output.append(f"{course['angebot']['titel']}") return "
".join(output) def get_occupation_skills(oc_uri): #skills_json = r.get(oc_uri) skills_json = data_dict.get(oc_uri, None) skill_labels = [] if skills_json: skills = json.loads(skills_json) for skill in skills: german_label = skill['preferredLabel']['de'] skill_details_mapping[german_label] = skill skill_labels.append(german_label) return skill_labels else: return skill_labels def get_occupation_skills_BA(oc_uri): df = pd.read_csv("/app/data/berufe_info.csv") essential_skills = df[df['id'] == oc_uri]['essential skills'].values optional_skills = df[df['id'] == oc_uri]['optional skills'].values combined_skills = essential_skills[0][:-1] + ',' + optional_skills[0][1:] combined_skills = combined_skills.replace("'", "\"") skills = json.loads(combined_skills) skill_labels = [] for skill in skills: german_label = skill['skill'] skill_details_mapping[german_label] = skill skill_labels.append(german_label) return skill_labels # Function to update the skills dropdown def update_skills(occupation): oc_uri = occupations.get(occupation, "") if isinstance(oc_uri, int): skills = get_occupation_skills_BA(oc_uri) return gr.Dropdown(skills,label="aktuelle Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie derzeit besitzen') else: skills = get_occupation_skills(oc_uri) return gr.Dropdown(skills,label="aktuelle Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie derzeit besitzen') return def update_skillgap(occupation, current_skills): oc_uri = occupations.get(occupation, "") if isinstance(oc_uri, int): ocupation_skills = get_occupation_skills_BA(oc_uri) else: ocupation_skills = get_occupation_skills(oc_uri) skill_gap = [skill for skill in ocupation_skills if skill not in current_skills] return gr.Dropdown(skill_gap, label="Qualifikationslücke", multiselect=True, info='Bitte wählen Sie die Fähigkeiten aus, die Sie lernen möchten.') if __name__ == "__main__": # Load occupations from CSV occupations_esco = get_occupations_from_csv(CSV_FILE_PATH) df = pd.read_csv("/app/data/berufe_info.csv") occupations_BA = df[['short name', 'id']].set_index('short name').to_dict()['id'] occupations = {**occupations_esco, **occupations_BA} # Gradio interface with gr.Blocks(title="MyEduLife Kursempfehlungssystem") as demo: occupation_dropdown = gr.Dropdown(list(occupations.keys()), label="Zielberuf",info='Bitte wählen Sie Ihren Zielberuf aus.') currentskill_dropdown = gr.Dropdown([],label="aktuelle Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie derzeit besitzen') sb_btn = gr.Button("Absenden") skillgap_dropdown = gr.Dropdown([],label="Fähigkeiten", multiselect=True,info='Bitte wählen Sie die Fähigkeiten aus, die Sie lernen möchten.') # Use gr.HTML to display the HTML content button = gr.Button("Kursempfehlungen") documents_output = gr.HTML() occupation_dropdown.change(update_skills, inputs=occupation_dropdown, outputs=currentskill_dropdown) sb_btn.click( update_skillgap, inputs=[occupation_dropdown,currentskill_dropdown], outputs=skillgap_dropdown ) button.click( retrieve_documents, inputs=[occupation_dropdown,skillgap_dropdown], outputs=documents_output ) print('Initialization completed') demo.launch(server_name="0.0.0.0", server_port=7860)