import os import time from operator import itemgetter from collections import Counter from langchain_community.document_loaders import PyPDFLoader, TextLoader from chainlit.types import AskFileResponse from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda from langchain.schema.runnable.config import RunnableConfig from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import ConversationalRetrievalChain, create_extraction_chain from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.llms import HuggingFaceEndpoint from langchain.chains import LLMChain from langchain_core.prompts import PromptTemplate from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain.schema import StrOutputParser from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT from langchain.chains.question_answering import load_qa_chain from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain_pinecone import PineconeVectorStore from pinecone import Pinecone from langchain.memory import ChatMessageHistory, ConversationBufferMemory import pandas as pd import numpy as np import chainlit as cl from chainlit.input_widget import Select, TextInput from chainlit import user_session from homeskills import homeRome, homeEsco from offres_emploi import Api from offres_emploi.utils import dt_to_str_iso import datetime import plotly.express as px import bcrypt import ast import json import requests import http.client from bs4 import BeautifulSoup from literalai import LiteralClient literal_client = LiteralClient(api_key=os.getenv("LITERAL_API_KEY")) literal_client.instrument_openai() @cl.password_auth_callback def auth_callback(username: str, password: str): auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN']) ident = next(d['ident'] for d in auth if d['ident'] == username) pwd = next(d['pwd'] for d in auth if d['ident'] == username) resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt())) resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt())) resultRole = next(d['role'] for d in auth if d['ident'] == username) if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc": return cl.User( identifier=ident + " : đŸ§‘â€đŸ’Œ Admin Datapcc", metadata={"role": "admin", "provider": "credentials"} ) elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc": return cl.User( identifier=ident + " : 🧑‍🎓 User Datapcc", metadata={"role": "user", "provider": "credentials"} ) def process_file(file: AskFileResponse): if file.type == "text/plain": Loader = TextLoader elif file.type == "application/pdf": Loader = PyPDFLoader text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) loader = Loader(file.path) documents = loader.load() docs = text_splitter.split_documents(documents) return docs def removeTags(all): for data in all(['style', 'script']): data.decompose() return ' '.join(all.stripped_strings) def localisation(): ListCentroids = [ { "ID": "01", "Longitude": 5.3245259, "Latitude":46.0666003 }, { "ID": "02", "Longitude": 3.5960246, "Latitude": 49.5519632 }, { "ID": "03", "Longitude": 3.065278, "Latitude": 46.4002783 }, { "ID": "04", "Longitude": 6.2237688, "Latitude": 44.1105837 }, { "ID": "05", "Longitude": 6.2018836, "Latitude": 44.6630487 }, { "ID": "06", "Longitude": 7.0755745, "Latitude":43.9463082 }, { "ID": "07", "Longitude": 4.3497308, "Latitude": 44.7626044 }, { "ID": "08", "Longitude": 4.6234893, "Latitude": 49.6473884 }, { "ID": "09", "Longitude": 1.6037147, "Latitude": 42.9696091 }, { "ID": "10", "Longitude": 4.1394954, "Latitude": 48.2963286 }, { "ID": "11", "Longitude": 2.3140163, "Latitude": 43.1111427 }, { "ID": "12", "Longitude": 2.7365234, "Latitude": 44.2786323 }, { "ID": "13", "Longitude": 5.0515492, "Latitude": 43.5539098 }, { "ID": "14", "Longitude": -0.3930779, "Latitude": 49.1024215 }, { "ID": "15", "Longitude": 2.6367657, "Latitude": 44.9643217 }, { "ID": "16", "Longitude": 0.180475, "Latitude": 45.706264 }, { "ID": "17", "Longitude": -0.7082589, "Latitude": 45.7629699 }, { "ID": "18", "Longitude": 2.5292424, "Latitude": 47.0926687 }, { "ID": "19", "Longitude": 1.8841811, "Latitude": 45.3622055 }, { "ID": "2A", "Longitude": 8.9906834, "Latitude": 41.8619761 }, { "ID": "2B", "Longitude": 9.275489, "Latitude": 42.372014 }, { "ID": "21", "Longitude": 4.7870471, "Latitude": 47.4736746 }, { "ID": "22", "Longitude": -2.9227591, "Latitude": 48.408402 }, { "ID": "23", "Longitude": 2.0265508, "Latitude": 46.0837382 }, { "ID": "24", "Longitude": 0.7140145, "Latitude": 45.1489678 }, { "ID": "25", "Longitude": 6.3991355, "Latitude": 47.1879451 }, { "ID": "26", "Longitude": 5.1717552, "Latitude": 44.8055408 }, { "ID": "27", "Longitude": 0.9488116, "Latitude": 49.1460288 }, { "ID": "28", "Longitude": 1.2793491, "Latitude": 48.3330017 }, { "ID": "29", "Longitude": -4.1577074, "Latitude": 48.2869945 }, { "ID": "30", "Longitude": 4.2650329, "Latitude": 43.9636468 }, { "ID": "31", "Longitude": 1.2728958, "Latitude": 43.3671081 }, { "ID": "32", "Longitude": 0.4220039, "Latitude": 43.657141 }, { "ID": "33", "Longitude": -0.5760716, "Latitude": 44.8406068 }, { "ID": "34", "Longitude": 3.4197556, "Latitude": 43.62585 }, { "ID": "35", "Longitude": -1.6443812, "Latitude": 48.1801254 }, { "ID": "36", "Longitude": 1.6509938, "Latitude": 46.7964222 }, { "ID": "37", "Longitude": 0.7085619, "Latitude": 47.2802601 }, { "ID": "38", "Longitude": 5.6230772, "Latitude": 45.259805 }, { "ID": "39", "Longitude": 5.612871, "Latitude": 46.7398138 }, { "ID": "40", "Longitude": -0.8771738, "Latitude": 44.0161251 }, { "ID": "41", "Longitude": 1.3989178, "Latitude": 47.5866519 }, { "ID": "42", "Longitude": 4.2262355, "Latitude": 45.7451186 }, { "ID": "43", "Longitude": 3.8118151, "Latitude": 45.1473029 }, { "ID": "44", "Longitude": -1.7642949, "Latitude": 47.4616509 }, { "ID": "45", "Longitude": 2.2372695, "Latitude": 47.8631395 }, { "ID": "46", "Longitude": 1.5732157, "Latitude": 44.6529284 }, { "ID": "47", "Longitude": 0.4788052, "Latitude": 44.4027215 }, { "ID": "48", "Longitude": 3.4991239, "Latitude": 44.5191573 }, { "ID": "49", "Longitude": -0.5136056, "Latitude": 47.3945201 }, { "ID": "50", "Longitude": -1.3203134, "Latitude": 49.0162072 }, { "ID": "51", "Longitude": 4.2966555, "Latitude": 48.9479636 }, { "ID": "52", "Longitude": 5.1325796, "Latitude": 48.1077196 }, { "ID": "53", "Longitude": -0.7073921, "Latitude": 48.1225795 }, { "ID": "54", "Longitude": 6.144792, "Latitude": 48.7995163 }, { "ID": "55", "Longitude": 5.2888292, "Latitude": 49.0074545 }, { "ID": "56", "Longitude": -2.8746938, "Latitude": 47.9239486 }, { "ID": "57", "Longitude": 6.5610683, "Latitude": 49.0399233 }, { "ID": "58", "Longitude": 3.5544332, "Latitude": 47.1122301 }, { "ID": "59", "Longitude": 3.2466616, "Latitude": 50.4765414 }, { "ID": "60", "Longitude": 2.4161734, "Latitude": 49.3852913 }, { "ID": "61", "Longitude": 0.2248368, "Latitude": 48.5558919 }, { "ID": "62", "Longitude": 2.2555152, "Latitude": 50.4646795 }, { "ID": "63", "Longitude": 3.1322144, "Latitude": 45.7471805 }, { "ID": "64", "Longitude": -0.793633, "Latitude": 43.3390984 }, { "ID": "65", "Longitude": 0.1478724, "Latitude": 43.0526238 }, { "ID": "66", "Longitude": 2.5239855, "Latitude": 42.5825094 }, { "ID": "67", "Longitude": 7.5962225, "Latitude": 48.662515 }, { "ID": "68", "Longitude": 7.2656284, "Latitude": 47.8586205 }, { "ID": "69", "Longitude": 4.6859896, "Latitude": 45.8714754 }, { "ID": "70", "Longitude": 6.1388571, "Latitude": 47.5904191 }, { "ID": "71", "Longitude": 4.6394021, "Latitude": 46.5951234 }, { "ID": "72", "Longitude": 0.1947322, "Latitude": 48.0041421 }, { "ID": "73", "Longitude": 6.4662232, "Latitude": 45.4956055 }, { "ID": "74", "Longitude": 6.3609606, "Latitude": 46.1045902 }, { "ID": "75", "Longitude": 2.3416082, "Latitude": 48.8626759 }, { "ID": "76", "Longitude": 1.025579, "Latitude": 49.6862911 }, { "ID": "77", "Longitude": 2.8977309, "Latitude": 48.5957831 }, { "ID": "78", "Longitude": 1.8080138, "Latitude": 48.7831982 }, { "ID": "79", "Longitude": -0.3159014, "Latitude": 46.5490257 }, { "ID": "80", "Longitude": 2.3380595, "Latitude": 49.9783317 }, { "ID": "81", "Longitude": 2.2072751, "Latitude": 43.8524305 }, { "ID": "82", "Longitude": 1.2649374, "Latitude": 44.1254902 }, { "ID": "83", "Longitude": 6.1486127, "Latitude": 43.5007903 }, { "ID": "84", "Longitude": 5.065418, "Latitude": 44.0001599 }, { "ID": "85", "Longitude": -1.3956692, "Latitude": 46.5929102 }, { "ID": "86", "Longitude": 0.4953679, "Latitude": 46.5719095 }, { "ID": "87", "Longitude": 1.2500647, "Latitude": 45.9018644 }, { "ID": "88", "Longitude": 6.349702, "Latitude": 48.1770451 }, { "ID": "89", "Longitude": 3.5634078, "Latitude": 47.8474664 }, { "ID": "90", "Longitude": 6.9498114, "Latitude": 47.6184394 }, { "ID": "91", "Longitude": 2.2714555, "Latitude": 48.5203114 }, { "ID": "92", "Longitude": 2.2407148, "Latitude": 48.835321 }, { "ID": "93", "Longitude": 2.4811577, "Latitude": 48.9008719 }, { "ID": "94", "Longitude": 2.4549766, "Latitude": 48.7832368 }, { "ID": "95", "Longitude": 2.1802056, "Latitude": 49.076488 }, { "ID": "974", "Longitude": 55.536384, "Latitude": -21.115141 }, { "ID": "973", "Longitude": -53.125782, "Latitude": 3.933889 }, { "ID": "972", "Longitude": -61.024174, "Latitude": 14.641528 }, { "ID": "971", "Longitude": -61.551, "Latitude": 16.265 } ] return ListCentroids def plotDemandeur(dataframe, coderome): df = dataframe.sort_values(by=['Indicateur']) fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', height=1000, title="Demandeurs d'emploi et offres d'emploi du code ROME : " + coderome, color='Indicateur', labels={'Valeur':'Nombre'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) return fig_demandeur def plotSalaire(dataframe): df = dataframe.sort_values(by=['salaire']) fig_demandeur = px.histogram(df, x='emploi', y='salaire', barmode='group', title="Salaires mĂ©dians", color='categorie', text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) return fig_demandeur def plotDifficulte(dataframe): if len(dataframe) == 0: title = "Aucune donnĂ©e difficultĂ© de recrutement renseignĂ©e!" else: title = "DifficultĂ© de recrutement" df = dataframe.sort_values(by=['Valeur']) fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', title=title, color='Indicateur', labels={'Valeur':'Pourcentage'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True) return fig_demandeur def plotRepartition(dataframe,title): df = dataframe.sort_values(by=['Valeur']) fig_repartition = px.pie(df, names='Indicateur', values='Valeur', color='Indicateur', title=title, labels={'Valeur':'pourcentage'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) return fig_repartition def removeTags(all): for data in all(['style', 'script']): data.decompose() return ''.join(all.stripped_strings) def htmlToDataframe(htmlTable): data = [] list_header = [] soup = BeautifulSoup(htmlTable,'html.parser') header = soup.find_all("table")[0].find("tr") for items in header: try: list_header.append(items.get_text()) except: continue HTML_data = soup.find_all("table")[0].find_all("tr")[1:] for element in HTML_data: sub_data = [] for sub_element in element: try: sub_data.append(sub_element.get_text()) except: continue data.append(sub_data) dataFrame = pd.DataFrame(data = data, columns = list_header) return dataFrame def datavisualisation_chiffres_cles_emplois(url): response = requests.get(url) soup = BeautifulSoup(response.text, "lxml") alldemandeurs = '' allsalaires = '' alldifficultes = '' allrepartitions = '' allentreprises = '' allembauches = soup.select('p.population_category') allnumembauchesfirst = soup.select('p.population_main-num.data') allnumembauches = removeTags(allnumembauchesfirst[0]).split('\xa0') allnumembauches = ''.join(allnumembauches) allnumoffres = removeTags(allnumembauchesfirst[1]).split('\xa0') allnumoffres = ''.join(allnumoffres) alldetailembauches = soup.select('p.hiring_text.ng-star-inserted') allnumevolutionembauches = soup.select('p.main.ng-star-inserted') alldetailevolutionembauches = soup.select('p.population_bubble-title') alldemandeurs = "" else: alldemandeurs += "" alldemandeurs += "" alldemandeurs += "" else: alldemandeurs += "" alldemandeurs += "" alldemandeurs += "
IndicateurValeur
" + removeTags(allembauches[0]) + " (" + removeTags(alldetailembauches[0]) + ");" if len(alldetailevolutionembauches) >= 1 and len(allnumevolutionembauches) >= 1: alldemandeurs += "\nÉvolution demandeurs d'emploi (" + removeTags(alldetailevolutionembauches[0]) + ": " + removeTags(allnumevolutionembauches[0]) + ")" + allnumembauches + "
" + removeTags(allembauches[1]) + " (" + removeTags(alldetailembauches[1]) + ");" if len(alldetailevolutionembauches) >= 2 and len(allnumevolutionembauches) >= 2: alldemandeurs += "\nÉvolution offres d'emploi (" + removeTags(alldetailevolutionembauches[1]) + ": " + removeTags(allnumevolutionembauches[1]) + ")" + allnumoffres + "
" allFAP = soup.select('tr.sectorTable__line.ng-star-inserted') allcategorie = soup.select('td.sectorTable__cell') alltypesalaires = soup.select('th.sectorTable__cell') allFAPsalaires = soup.select('p.sectorTable__cellValue') if len(allFAPsalaires) >= 3: allsalaires = "" allsalaires += "" allsalaires += "" allsalaires += "" if len(allFAP) >= 2 and len(allFAPsalaires) == 6: allsalaires += "" allsalaires += "" allsalaires += "" allsalaires += "
categorieemploisalaire
" + removeTags(alltypesalaires[1]) + "" + removeTags(allcategorie[0]) + "" + removeTags(allFAPsalaires[0]).replace('\xa0','').replace(' ','').replace('€','') + "
" + removeTags(alltypesalaires[2]) + "" + removeTags(allcategorie[0]) + "" + removeTags(allFAPsalaires[1]).replace('\xa0','').replace(' ','').replace('€','') + "
" + removeTags(alltypesalaires[3]) + "" + removeTags(allcategorie[0]) + "" + removeTags(allFAPsalaires[2]).replace('\xa0','').replace(' ','').replace('€','') + "
" + removeTags(alltypesalaires[1]) + "" + removeTags(allcategorie[4]) + "" + removeTags(allFAPsalaires[3]).replace('\xa0','').replace(' ','').replace('€','') + "
" + removeTags(alltypesalaires[2]) + "" + removeTags(allcategorie[4]) + "" + removeTags(allFAPsalaires[4]).replace('\xa0','').replace(' ','').replace('€','') + "
" + removeTags(alltypesalaires[3]) + "" + removeTags(allcategorie[4]) + "" + removeTags(allFAPsalaires[5]).replace('\xa0','').replace(' ','').replace('€','') + "
" alltypedifficultes = soup.select('.tabs-main-content_persp-col2-bar.ng-star-inserted') alldifficulte = soup.select('p.horizontal-graph_title') allpcdifficulte = soup.select('div.horizontal-graph_data') alldifficultes = "" for i in range(0,len(alltypedifficultes)): alldifficultes += "" alldifficultes += "
IndicateurValeur
" + removeTags(alldifficulte[i]) + "" + removeTags(allpcdifficulte[i]).replace('Pour le territoire principal FRANCE pour les ' + removeTags(alldifficulte[i]),'').replace('%','') + "
" alltyperepartitions = soup.select('div.hiring-contract_legende_item.ng-star-inserted') allrepartition = soup.select('p.hiring-contract_legende_item_label') allpcrepartition = soup.select('span.hiring-contract_legende_item-first') allrepartitions = "" for i in range(0,len(alltyperepartitions)): allrepartitions += "" allrepartitions += "
IndicateurValeur
" + removeTags(allrepartition[i]).replace('(' + removeTags(allpcrepartition[i]) + ')','') + "" + removeTags(allpcrepartition[i]).replace('%','').replace(',','.') + "
" allentrepriserepartitions = soup.select('div.horizontal-graph_pattern.sm-bubble_wrapper > span') allentreprise = soup.select('span.sr-only') allpcentreprise = soup.select('span.data.ng-star-inserted') allentreprises = "" for i in range(0,len(allentrepriserepartitions)): allentreprises += "" allentreprises += "
IndicateurValeur
" + removeTags(allentrepriserepartitions[i])[0:-4] + "" + removeTags(allentrepriserepartitions[i])[-4:].replace('%','').replace(',','.') + "
" return [alldemandeurs, allsalaires, alldifficultes, allrepartitions, allentreprises] def listToString(list): return str(list) def arrayOfSecteur(array): df_naf = pd.read_csv("./public/secteur_naf_v01.csv") df_naf = df_naf[['libelle_secteur_naf', 'code_rome']].copy() df_naf = df_naf.groupby(df_naf['code_rome'], as_index=False).agg({'libelle_secteur_naf': '; '.join}) df_rome = pd.DataFrame({'code_rome': array}) df_romeNaf = pd.merge(df_rome, df_naf, on="code_rome") df_secteur = df_romeNaf[['libelle_secteur_naf']].copy() return df_secteur.values.tolist() def arrayToString(array): arrayList = [] for i in range(0,len(array)): if listToString(array[i]).find("libelle")!=-1: arrayList.append(array[i]['libelle']) else: arrayList.append("; ") string = ', '.join(arrayList) return string + '; ' def searchByRome(rome,index): libelle = '' if rome.find(',') != -1: romeArray = rome.split(',') for i in range(0,len(romeArray)): codeRome = romeArray[i].strip() if i <= 5 and len(codeRome) == 5: all_docs = index.query( top_k=1, vector= [0] * 768, # embedding dimension namespace='', filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": codeRome}}, include_metadata=True ) libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome'] else: all_docs = index.query( top_k=1, vector= [0] * 768, # embedding dimension namespace='', filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": rome}}, include_metadata=True ) libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome'] return libelle @cl.author_rename def rename(orig_author: str): rename_dict = {"ConversationalRetrievalChain": "💬 Assistant conversationnel", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "ChaĂźne de documents", "LLMChain": "Agent", "HuggingFaceEndpoint": "Mistral AI đŸ€–"} return rename_dict.get(orig_author, orig_author) @cl.action_callback("datavizChiffresClesMetiers") async def on_action(action): romeListArray = ast.literal_eval(action.value) elements = [] for j in range(0, len(romeListArray)): table = datavisualisation_chiffres_cles_emplois("https://dataemploi.pole-emploi.fr/metier/chiffres-cles/NAT/FR/" + romeListArray[j]) plot_demandeur = plotDemandeur(htmlToDataframe(table[0]), romeListArray[j]) elements.append(cl.Plotly(name="chart_demandeur", figure=plot_demandeur, display="inline", size="large")) if len(table[1]) > 0: plot_salaire = plotSalaire(htmlToDataframe(table[1])) elements.append(cl.Plotly(name="chart_salaire", figure=plot_salaire, display="inline", size="large")) plot_difficulte = plotDifficulte(htmlToDataframe(table[2])) elements.append(cl.Plotly(name="chart_difficulte", figure=plot_difficulte, display="inline", size="large")) plot_repartitionContrat = plotRepartition(htmlToDataframe(table[3]), "RĂ©partition des embauches du mĂ©tier : type de contrat") elements.append(cl.Plotly(name="chart_repatitionContrat", figure=plot_repartitionContrat, display="inline", size="large")) plot_repartitionEntreprise = plotRepartition(htmlToDataframe(table[4]), "RĂ©partition des embauches du mĂ©tier : type entreprise") elements.append(cl.Plotly(name="chart_repartitionEntreprise", figure=plot_repartitionEntreprise, display="inline", size="large")) await cl.Message(content="Datavisualisation des chiffres clĂ©s des MĂ©tiers", elements=elements).send() @cl.action_callback("download") async def on_action(action): content = [] content.append(action.value) arrayContent = np.array(content) df = pd.DataFrame(arrayContent) with open('./' + action.description + '.txt', 'wb') as csv_file: df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8') elements = [ cl.File( name= action.description + ".txt", path="./" + action.description + ".txt", display="inline", ), ] await cl.Message( author="Datapcc : 🌐🌐🌐", content="[Lien] 🔗", elements=elements ).send() await action.remove() @cl.action_callback("saveMemory") async def on_action(action): buffer = cl.user_session.get("saveMemory") cl.user_session.set("saveMemory", buffer + action.value) await cl.Message( author="Datapcc : 🌐🌐🌐", content="đŸ—ƒïž Document sauvegardĂ© dans le buffer Memory!" ).send() await action.remove() @cl.cache def to_cache(file): #time.sleep(5) # Simulate a time-consuming process return "https://cipen.univ-gustave-eiffel.fr/fileadmin/CIPEN/datas/assets/docs/" + file + ".csv" @cl.set_chat_profiles async def chat_profile(): return [ cl.ChatProfile(name="Catalogue ROME - ROMESKILLS",markdown_description="Les compĂ©tences du catalogue ROME",icon="./public/favicon.png",), cl.ChatProfile(name="Classification ESCO - ESCOSKILLS",markdown_description="Les compĂ©tences de la classification ESCO",icon="./public/favicon.png",), ] @cl.on_chat_start async def start(): await cl.Avatar( name="You", path="./public/logo-ofipe.jpg", ).send() chat_profile = cl.user_session.get("chat_profile") chatProfile = chat_profile.split(' - ') if chatProfile[1] == 'ROMESKILLS': contextChat = await homeRome() categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYROME']) else: contextChat = await homeEsco() categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYESCO']) os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] repo_id = "mistralai/Mistral-7B-Instruct-v0.3" #repo_id = "mistralai/Mistral-Small-Instruct-2409" model = HuggingFaceEndpoint( repo_id=repo_id, max_new_tokens=6000, temperature=1.0, streaming=True ) if not cl.user_session.get("saveMemory"): cl.user_session.set("saveMemory", "") cl.user_session.set("memory", ConversationBufferMemory(return_messages=True)) memory = cl.user_session.get("memory") prompt = ChatPromptTemplate.from_messages( [ ( "system", f"Contexte : Vous ĂȘtes un spĂ©cialiste du marchĂ© de l'emploi en fonction du niveau de qualification, des compĂ©tences professionnelles, des compĂ©tences transversales, du salaire et de l'expĂ©rience. Vous ĂȘtes douĂ© pour faire des analyses du systĂšme travail sur les mĂ©tiers les plus demandĂ©s grĂące Ă  votre aptitude Ă  synthĂ©tiser les informations en fonction des critĂšres dĂ©finis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:26500]}. RĂ©ponds Ă  la question suivante de la maniĂšre la plus pertinente, la plus exhaustive et la plus dĂ©taillĂ©e possible, avec au minimum 3000 tokens jusqu'Ă  4000 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et rĂ©pondez uniquement en vous basant sur les informations fournies.", ), MessagesPlaceholder(variable_name="history"), ("human", "{question}, dans le contexte fourni."), ] ) runnable = ( RunnablePassthrough.assign( history=RunnableLambda(memory.load_memory_variables) | itemgetter("history") ) | prompt | model ) cl.user_session.set("runnable", runnable) @literal_client.step(type="run") async def construction_NCS(competenceList, chatProfile): context = await contexte(competenceList, chatProfile) emploisST = context.to_string(index = False) if chatProfile == 'ROMESKILLS': romeListArray = cl.user_session.get("codeRomeArray") stringLsitOfEntreprise = await creation_liste_entreprises(arrayOfSecteur(romeListArray)) ficheClesMetier = await document_chiffres_cles_emplois("https://dataemploi.francetravail.fr/metier/chiffres-cles/NAT/FR/", romeListArray) contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','') + ficheClesMetier finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() listEmplois = finals_df.values.tolist() stringEmplois = '' for i in range(0,len(listEmplois)): stringEmplois += "\n✔ Emploi : " + str(listEmplois[i][0]) + ";\n◉ Contrat : " + str(listEmplois[i][1]) + ";\n◉ CompĂ©tences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "◉ Salaire : " + str(listEmplois[i][6]) + ";\n◉ Qualification : " + str(listEmplois[i][5]).replace("'libelle'","\n‱ 'libelle") + ";\n◉ Localisation : " + str(listEmplois[i][7]) + ";\n◉ ExpĂ©rience : " + str(listEmplois[i][2]) + ";\n◉ Niveau de qualification : " + str(listEmplois[i][8]) + ";\n◉ Description de l'emploi : " + str(listEmplois[i][4]) + "\n" await cl.sleep(1) listEmplois_name = f"Liste des emplois" text_elements = [] text_elements.append( cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\n‱ 'code'"), name=listEmplois_name) ) await cl.Message(author="Datapcc : 🌐🌐🌐",content="đŸ‘šâ€đŸ’Œ Source France Travail : " + listEmplois_name, elements=text_elements).send() await cl.sleep(1) listEntreprise_name = f"Liste des entreprises" entreprise_elements = [] entreprise_elements.append( cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + stringLsitOfEntreprise, name=listEntreprise_name) ) await cl.Message(author="Datapcc : 🌐🌐🌐",content="🏭 Source Registre National des Entreprises : " + listEntreprise_name, elements=entreprise_elements).send() await cl.sleep(1) listClesMetier_name = f"Chiffres clĂ©s des emplois" text_ClesMetier = [] text_ClesMetier.append( cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + ficheClesMetier, name=listClesMetier_name) ) await cl.Message(author="Datapcc : 🌐🌐🌐",content="📈 Source France Travail : " + listClesMetier_name, elements=text_ClesMetier).send() await cl.sleep(1) datavizChiffresClesMetiers = [ cl.Action(name="datavizChiffresClesMetiers", value=str(romeListArray), description="Afficher la datavisualisation des chiffres clĂ©s des mĂ©tiers") ] await cl.Message(author="Datapcc : 🌐🌐🌐",content="📊 Afficher la datavisualisation des chiffres clĂ©s des mĂ©tiers", actions=datavizChiffresClesMetiers).send() await cl.sleep(1) codeArray = romeListArray ficheMetiers = [] for i in range(0,len(codeArray)): ficheMetiers = [ cl.File(name= "Fiche mĂ©tier " + codeArray[i],url="https://www.soi-tc.fr/assets/fiches_pe/FEM_" + codeArray[i] + ".pdf",display="inline",) ] await cl.Message( author="Datapcc : 🌐🌐🌐", content="[Fiches mĂ©tiers] 🔗", elements=ficheMetiers ).send() else: contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','') finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() listEmplois = finals_df.values.tolist() stringEmplois = '' for i in range(0,len(listEmplois)): stringEmplois += "\n✔ Emploi : " + str(listEmplois[i][0]) + ";\n◉ Contrat : " + str(listEmplois[i][1]) + ";\n◉ CompĂ©tences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "◉ Salaire : " + str(listEmplois[i][6]) + ";\n◉ Qualification : " + str(listEmplois[i][5]).replace("'libelle'","\n‱ 'libelle") + ";\n◉ Localisation : " + str(listEmplois[i][7]) + ";\n◉ ExpĂ©rience : " + str(listEmplois[i][2]) + ";\n◉ Niveau de qualification : " + str(listEmplois[i][8]) + ";\n◉ Description de l'emploi : " + str(listEmplois[i][4]) + "\n" await cl.sleep(1) listEmplois_name = f"Liste des emplois" text_elements = [] text_elements.append( cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\n‱ 'code'"), name=listEmplois_name) ) await cl.Message(author="Datapcc : 🌐🌐🌐",content="đŸ‘šâ€đŸ’Œ Source France Travail : " + listEmplois_name, elements=text_elements).send() cl.user_session.set("contextChatBot", contentChatBot[0:28875]) await datavisualisation_statistiques_emplois(context) return "datavisualisation des statistiques de l'emploi" @cl.step(type="run") async def recuperation_contexte(getNote): getContext = cl.user_session.get(getNote) return getNote + " :\n" + getContext @cl.step(type="retrieval") async def contexte(competence, chatProfile): #chat_profile = cl.user_session.get("chat_profile") #chatProfile = chat_profile.split(' - ') if chatProfile == 'ROMESKILLS': results = await creation_liste_code_Rome(competence, cl.user_session.get("categorie")) else: results = await creation_liste_skills_Esco(competence, cl.user_session.get("categorie")) await cl.sleep(1) romeListArray = cl.user_session.get("codeRomeArray") df_emplois = await API_France_Travail(romeListArray) return df_emplois @cl.step(type="tool") async def document_chiffres_cles_emplois(url, codes): all = "" codeArray = codes for i in range(0,len(codeArray)): response = requests.get(url + codeArray[i]) soup = BeautifulSoup(response.text, "html.parser") if soup.select('h1#titreMetier'): alltitre = soup.select('h1#titreMetier') allTitre = removeTags(alltitre[0]) else: allTitre = "" if soup.select('div.jobs_item-container-flex'): allembauches = soup.select('div.jobs_item-container-flex') allEmbauches = removeTags(allembauches[0]) else: allEmbauches = "" if soup.select('div.key-number_block.shadow.inset'): allsalaires = soup.select('div.key-number_block.shadow.inset') allSalaires = removeTags(allsalaires[0]) else: allSalaires = "" if soup.select('tbody.sectorTable__body'): allsalairesMedian = soup.select('tbody.sectorTable__body') allSalairesMedian = removeTags(allsalairesMedian[0]) else: allSalairesMedian = "" if soup.select('div.dynamism_canvas-wrapper > p.sr-only'): allDiff = soup.select('div.dynamism_canvas-wrapper > p.sr-only') alldiff = removeTags(allDiff[0]) else: alldiff = "" if soup.select('div.tabs-main-data_persp-col2'): allDiffOrigin = soup.select('div.tabs-main-data_persp-col2') alldiffOrigin = removeTags(allDiffOrigin[0]) else: alldiffOrigin = "" allTypeContrat = "" if soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted"): allContrat = soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted") for j in range(0,len(allContrat)): allTypeContrat = allTypeContrat + removeTags(allContrat[j]) + ", " if soup.find_all("div", class_="horizontal-graph_patterns"): allEntreprise = soup.find_all("div", class_="horizontal-graph_patterns") allentreprise = removeTags(allEntreprise[0]) else: allentreprise = "" all = all + "\n\nChiffres-clĂ©s MĂ©tier : \n**" + allTitre + "**:\n◉ Demandeurs d'emploi et Offres d'emploi : " + allEmbauches.replace("Plus de donnĂ©es sur les Demandeurs d'emploi","").replace("Plus de donnĂ©es","") + ".\n◉ Salaires proposĂ©s dans les offres : " + allSalaires + ".\n◉ Salaires mĂ©dians constatĂ©s : " + allSalairesMedian + ".\n◉ DifficultĂ©s de recrutement pour les entreprises : " + alldiff + ".\n◉ Origine des difficultĂ©s : " + alldiffOrigin + ".\n◉ RĂ©partition des embauches par type de contrat : " + allTypeContrat + ".\n◉ RĂ©partition des embauches par taille d'entreprise : " + allentreprise + "." return all @cl.step(type="tool") async def datavisualisation_statistiques_emplois(results_df): arraydataframe = [] arrayfirstdataframe = [] arraylocalisationdataframe = [] results = [] count = 0 if results_df.empty == False: count = count + 1 finals = results_df[['intitule','typeContratLibelle','experienceLibelle','competences','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy() finals["lieuTravail"] = finals["lieuTravail"].apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip()) finals_df = finals finals_df.dropna(subset=['qualitesProfessionnelles','formations','competences'], inplace=True) finals_df["competences"] = finals_df["competences"].apply(lambda x:[str(e['libelle']) for e in x]).apply(lambda x:'; '.join(map(str, x))) finals_df["qualitesProfessionnelles"] = finals_df["qualitesProfessionnelles"].apply(lambda x:[str(e['libelle']) + ": " + str(e['description']) for e in x]).apply(lambda x:'; '.join(map(str, x))) finals_df["formations"] = finals_df["formations"].apply(lambda x:[str(e['niveauLibelle']) for e in x]).apply(lambda x:'; '.join(map(str, x))) finals_df = finals_df.sort_values(by=['lieuTravail']) finals_localisation = results_df[['lieuTravail']].copy() finals_localisation["lieuTravail"] = finals_localisation["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip()) finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Fra'].index, inplace = True) finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'FRA'].index, inplace = True) finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Ile'].index, inplace = True) finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Mar'].index, inplace = True) finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Bou'].index, inplace = True) finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == '976'].index, inplace = True) arraylocalisationdataframe.append(finals_localisation) arrayfirstdataframe.append(results_df) if len(finals_df) != 0: arraydataframe.append(finals_df) first_df = pd.concat(arrayfirstdataframe) finals_df = pd.concat(arraydataframe) localisation_df = pd.concat(arraylocalisationdataframe) ######## Emplois ######## df_intitule = first_df.groupby('intitule').size().reset_index(name='obs') df_intitule = df_intitule.sort_values(by=['obs']) df_intitule = df_intitule.iloc[-25:] fig_intitule = px.bar(df_intitule, x='obs', y='intitule', orientation='h', color='obs', title="Les principaux emplois", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_intitule["intitule"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df_intitule["intitule"]], showlegend=False) ######## Types de contrat ######## df_contrat = first_df.groupby('typeContratLibelle').size().reset_index(name='obs') fig_contrat = px.pie(df_contrat, names='typeContratLibelle', values='obs', color='obs', title="Les types de contrat", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) df_secteur = first_df.groupby('secteurActiviteLibelle').size().reset_index(name='obs') df_secteur = df_secteur.sort_values(by=['obs']) df_secteur = df_secteur.iloc[-25:] fig_secteur = px.bar(df_secteur, x='obs', y='secteurActiviteLibelle', orientation='h', color='obs', title="Les principaux secteurs d'activités", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_secteur["secteurActiviteLibelle"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df_secteur["secteurActiviteLibelle"]], showlegend=False) ######## Compétences professionnelles ######## df1 = finals_df df1['competences'] = finals_df['competences'].str.split(';') df2 = df1.explode('competences') df2 = df2.groupby('competences').size().reset_index(name='obs') df2 = df2.sort_values(by=['obs']) df2 = df2.iloc[-20:] fig_competences = px.bar(df2, x='obs', y='competences', orientation='h', color='obs', title="Les principales compétences professionnelles", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df2["competences"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df2['competences']], showlegend=False) ######## Compétences transversales ######## df_transversales = finals_df df_transversales['qualitesProfessionnelles'] = finals_df['qualitesProfessionnelles'].str.split(';') df_comptransversales = df_transversales.explode('qualitesProfessionnelles') df_comptransversales = df_comptransversales.groupby('qualitesProfessionnelles').size().reset_index(name='obs') df_comptransversales = df_comptransversales.sort_values(by=['obs']) df_comptransversales = df_comptransversales.iloc[-20:] fig_transversales = px.bar(df_comptransversales, x='obs', y='qualitesProfessionnelles', orientation='h', color='obs', title="Les principales compétences transversales", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_comptransversales["qualitesProfessionnelles"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df_comptransversales["qualitesProfessionnelles"]], showlegend=False) ######## Niveaux de qualification ######## df_formations = finals_df.groupby('formations').size().reset_index(name='obs') fig_formations = px.pie(df_formations, names='formations', values='obs', color='obs', title="Les niveaux de qualification", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) ######## ExpĂ©riences professionnelles ######## df_experience = finals_df.groupby('experienceLibelle').size().reset_index(name='obs') fig_experience = px.pie(df_experience, names='experienceLibelle', values='obs', color='obs', title="Les expĂ©riences professionnelles", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple")) res = requests.get( "https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/spain-provinces.geojson" ) ######## localisation ######## ListCentroids = localisation() df_localisation = localisation_df.groupby('lieuTravail').size().reset_index(name='obs') df_localisation = df_localisation.sort_values(by=['lieuTravail']) df_localisation['longitude'] = df_localisation['lieuTravail'] df_localisation['latitude'] = df_localisation['lieuTravail'] df_localisation["longitude"] = df_localisation['longitude'].apply(lambda x:[loc['Longitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x))) df_localisation["longitude"] = pd.to_numeric(df_localisation["longitude"], downcast="float") df_localisation["latitude"] = df_localisation['latitude'].apply(lambda x:[loc['Latitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x))) df_localisation["latitude"] = pd.to_numeric(df_localisation["latitude"], downcast="float") fig_localisation = px.scatter_mapbox(df_localisation, lat="latitude", lon="longitude", hover_name="lieuTravail", size="obs").update_layout( mapbox={ "style": "carto-positron", "center": {"lon": 2, "lat" : 47}, "zoom": 4.5, "layers": [ { "source": res.json(), "type": "line", "color": "green", "line": {"width": 0}, } ], } ) elements = [] elements.append(cl.Plotly(name="chart_intitule", figure=fig_intitule, display="inline", size="large")) elements.append(cl.Plotly(name="chart_contrat", figure=fig_contrat, display="inline", size="large")) elements.append(cl.Plotly(name="chart_competences", figure=fig_competences, display="inline", size="large")) elements.append(cl.Plotly(name="chart_transversales", figure=fig_transversales, display="inline", size="large")) elements.append(cl.Plotly(name="chart_formations", figure=fig_formations, display="inline", size="large")) elements.append(cl.Plotly(name="chart_experience", figure=fig_experience, display="inline", size="large")) elements.append(cl.Plotly(name="chart_secteur", figure=fig_secteur, display="inline", size="large")) elements.append(cl.Plotly(name="chart_localisation", figure=fig_localisation, display="inline", size="large")) await cl.Message(content="Datavisualisation du marchĂ© de l'emploi", elements=elements).send() @cl.step(type="tool") async def API_France_Travail(romeListArray): client = await connexion_France_Travail() todayDate = datetime.datetime.today() month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1) start_dt = todayDate.replace(day=1, month=month, year=year) end_dt = datetime.datetime.today() results = [] for k in romeListArray: if k[0:1] == ' ': k = k[1:] params = {"motsCles": k.replace('/', '').replace('-', '').replace(',', '').replace(' ', ','),'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'} try: search_on_big_data = client.search(params=params) results += search_on_big_data["resultats"] except: print("Il n'y a pas d'offres d'emploi.") results_df = pd.DataFrame(results) return results_df @cl.step(type="tool") async def creation_liste_entreprises(arrayOfsecteur): docsearch = await connexion_vector_database() for j in range(0, len(arrayOfsecteur)): retrieve_comp = docsearch.similarity_search(arrayOfsecteur[j][0], k=1, filter={"categorie": {"$eq": "inpiSecteur"}}) codeSecteur = int(retrieve_comp[0].metadata['codefinal']) if len(str(codeSecteur)) == 7: codeSecteurSTR = "0" + str(codeSecteur) else: codeSecteurSTR = str(codeSecteur) token = await connexion_registre_national_entreprises() url = f"https://registre-national-entreprises.inpi.fr/api/companies?page=1&pageSize=500&codeCategory={codeSecteurSTR}" print(url) headers = {"Authorization": f"Bearer {token}"} response = requests.get(url, headers=headers) if response.status_code == 200: print('OK') documents = response.json() df = pd.DataFrame(documents) ficheEntreprise = '' for i in range (0, len(df)): if str(df['formality'][i]['content']).find('cessation') == -1 and str(df['formality'][i]['content']).find('personneMorale') != -1: if str(df['formality'][i]['content']['personneMorale']).find('adresseEntreprise') != -1: ficheEntreprise += "\n🏭 DĂ©nomination : " + str(df['formality'][i]['content']['personneMorale']['identite']['entreprise']['denomination']) + "; Code SIREN : " + str(df['formality'][i]['content']['personneMorale']['identite']['entreprise']['siren']) + "\n\tAdresse : " if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'numVoie'") != -1: ficheEntreprise += "n° " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['numVoie']) if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'typeVoie'") != -1: ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['typeVoie']) if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'voie'") != -1: ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['voie']) if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'complementLocalisation'") != -1: ficheEntreprise += ", " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['complementLocalisation'])+ "," if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'codePostal'") != -1: ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['codePostal']) if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'commune'") != -1: ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['commune']) if str(df['formality'][i]['content']['personneMorale']).find("autresEtablissements") != -1: ficheEntreprise += "\n\tActivitĂ©s : " + str(df['formality'][101]['content']['personneMorale']['autresEtablissements'][0]['activites'][0]['descriptionDetaillee']) + "\n\tCode APE : " + str(df['formality'][101]['content']['personneMorale']['autresEtablissements'][0]['activites'][0]['codeApe']) else: ficheEntreprise += "Il n'y a pas d'entreprises pour le secteur sĂ©lectionnĂ© : " + arrayOfsecteur[i][0] return ficheEntreprise @cl.step(type="tool") async def creation_liste_code_Rome(competence, categorie): docsearch = await connexion_vector_database() retrieve_comp = docsearch.similarity_search(competence, k=30, filter={"categorie": {"$eq": categorie}}) retrieve = pd.DataFrame(retrieve_comp) codeRome = [] competence = [] metier = [] for i in range(0,len(retrieve_comp)): codeRome.append(retrieve_comp[i].metadata['code_rome']) competence.append(retrieve_comp[i].metadata['libelle_competence']) metier.append(retrieve_comp[i].metadata['libelle_appellation_long']) results_df = pd.DataFrame({'codeRome': codeRome,'competence': competence, 'metier': metier}) arrayresults = results_df.values.tolist() displayresults = '| Code Rome | CompĂ©tence | MĂ©tier |\n| -------- | ------- | ------- |' for j in range(0, len(arrayresults)): displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' |' print(arrayresults[0][0] + arrayresults[0][1] + arrayresults[0][2]) await cl.Message(author="Datapcc : 🌐🌐🌐",content="Voici le rĂ©sultat de la recherche sĂ©mantique sur le catalogue Rome :\n" + displayresults).send() results_df = results_df.drop_duplicates(subset=["codeRome"]) results_df = results_df.head(5) codeRomeString = results_df["codeRome"].to_string(index = False) codeRome_list = results_df["codeRome"].tolist() actionRome = await cl.AskActionMessage( content="Etes-vous d'accord avec la sĂ©lection des 5 codes Rome automatiques issus de la recherche sĂ©mantique ? :\n" + codeRomeString.replace(' ',','), actions=[ cl.Action(name="continue", value="Offres d'emploi en temps rĂ©el", label="✅ Oui, je veux continuer vers l'extraction en temps rĂ©el des offres d'emploi"), cl.Action(name="cancel", value="Saisie des codes Rome", label="❌ Non, je veux saisir ma liste de codes Rome, sĂ©parĂ©s par des virgules"), ], timeout=3600 ).send() if actionRome and actionRome.get("name") == "continue": await cl.Message( content="Connexion Ă  France Travail, et rĂ©cupĂ©ration des offres d'emploi", ).send() cl.user_session.set("codeRomeArray", codeRome_list) else: actionsaisierome = await cl.AskUserMessage(content="Saisissez vos codes Rome dans le prompt? ⚠ Attention, indiquez seulement des codes Rome sĂ©parĂ©s par des virgules", timeout=3600).send() if actionsaisierome: await cl.Message( content=f"Votre saisie est : {actionsaisierome['output']}", ).send() stringCodeRome = actionsaisierome['output'].replace(' ','') stopWords = [';','.',':','!','|'] teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)] teststringCodeRome = bool(teststringCodeRome) if teststringCodeRome == False: arrayCodeRome = stringCodeRome.split(',') else: arrayCodeRome = codeRome_list await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre saisie est erronĂ©e. Nous continuons l'action avec les codes Rome sĂ©lectionnĂ©s automatiquement pour vous : " + codeRomeString).send() cl.user_session.set("codeRomeArray", arrayCodeRome) @cl.step(type="tool") async def creation_liste_skills_Esco(competence, categorie): docsearch = await connexion_vector_database() retrieve_comp = docsearch.similarity_search(competence, k=40, filter={"categorie": {"$eq": categorie}}) retrieve = pd.DataFrame(retrieve_comp) competence = [] description_competence = [] metier = [] description_metier = [] for i in range(0,len(retrieve_comp)): competence.append(retrieve_comp[i].metadata['compĂ©tence']) description_competence.append(retrieve_comp[i].metadata['description_compĂ©tence']) metier.append(retrieve_comp[i].metadata['mĂ©tier']) description_metier.append(retrieve_comp[i].metadata['description_mĂ©tier']) results_df = pd.DataFrame({'compĂ©tence': competence,'description_compĂ©tence': description_competence, 'mĂ©tier': metier, 'description_mĂ©tier': description_metier}) arrayresults = results_df.values.tolist() displayresults = '| CompĂ©tence | Description CompĂ©tence | MĂ©tier | Description MĂ©tier |\n| -------- | ------- | ------- | ------- |' for j in range(0, len(arrayresults)): displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' | ' + arrayresults[j][3] + ' |' await cl.Message(author="Datapcc : 🌐🌐🌐",content="Voici le rĂ©sultat de la recherche sĂ©mantique sur la classification ESCO :\n" + displayresults).send() results_df = results_df.drop_duplicates(subset=["mĂ©tier"]) results_df = results_df.head(10) codeRomeString = results_df["mĂ©tier"].to_string(index = False) codeRome_list = results_df["mĂ©tier"].tolist() actionRome = await cl.AskActionMessage( content="Etes-vous d'accord avec la sĂ©lection des 10 mĂ©tiers automatiques pour complĂ©ter la liste des compĂ©tences avec celles attendues sur le marchĂ© du travail? :\n" + codeRomeString, actions=[ cl.Action(name="continue", value="Offres d'emploi en temps rĂ©el", label="✅ Oui, je veux continuer vers l'extraction en temps rĂ©el des offres d'emploi"), cl.Action(name="cancel", value="Saisie des codes Rome", label="❌ Non, je veux saisir ma liste de mĂ©tiers, sĂ©parĂ©s par des points-virgules"), ], timeout=3600 ).send() if actionRome and actionRome.get("name") == "continue": await cl.Message( content="Connexion Ă  France Travail, et rĂ©cupĂ©ration des offres d'emploi", ).send() cl.user_session.set("codeRomeArray", codeRome_list) else: actionsaisierome = await cl.AskUserMessage(content="Saisissez vos mĂ©tiers dans le prompt? ⚠ Attention, indiquez seulement des mĂ©tiers sĂ©parĂ©s par des points-virgules", timeout=3600).send() if actionsaisierome: await cl.Message( content=f"Votre saisie est : {actionsaisierome['output']}", ).send() stringCodeRome = actionsaisierome['output'] stopWords = [';'] teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)] teststringCodeRome = bool(teststringCodeRome) if teststringCodeRome == True: arrayCodeRome = stringCodeRome.split(';') else: arrayCodeRome = codeRome_list await cl.Message(author="Datapcc : 🌐🌐🌐",content="Votre saisie est erronĂ©e. Nous continuons l'action avec les mĂ©tiers sĂ©lectionnĂ©s automatiquement pour vous : " + codeRomeString).send() cl.user_session.set("codeRomeArray", arrayCodeRome) @cl.step(type="tool") async def connexion_registre_national_entreprises(): url = "https://registre-national-entreprises.inpi.fr/api/sso/login" headers = {"Content-Type": "application/json"} data = {"username": os.environ['RNE_CLIENT_ID'], "password": os.environ['RNE_CLIENT_SECRET']} response = requests.post(url, json=data, headers=headers) if response.status_code == 200: return response.json()["token"] else: raise Exception(f"Échec de l'authentification. Code d'erreur : {response.status_code}") @cl.step(type="tool") async def connexion_France_Travail(): client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'], client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET']) return client @cl.step(type="tool") async def connexion_vector_database(): os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY'] os.environ['PINECONE_INDEX_NAME'] = os.environ['PINECONE_INDEX_NAME'] embeddings = HuggingFaceEmbeddings() docsearch = PineconeVectorStore.from_existing_index(os.environ['PINECONE_INDEX_NAME'], embeddings) return docsearch @cl.step(type="llm") async def IA(): os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] repo_id = "mistralai/Mistral-7B-Instruct-v0.3" llm = HuggingFaceEndpoint( repo_id=repo_id, max_new_tokens=5000, temperature=1.0, task="text2text-generation", streaming=True ) return llm @cl.on_settings_update async def setup_agent(settings): if not settings['competence'] and not settings['competenceInput']: await cl.Message( author="Datapcc : 🌐🌐🌐",content=f"⚠ Pas de contexte : {settings['competence']}\n⛔ Vous ne pouvez pas Ă©laborer de note sectorielle!" ).send() elif settings['competence'] and not settings['competenceInput']: await cl.Message( author="Datapcc : 🌐🌐🌐",content=f"👍 Changement de contexte : {settings['competence']}" ).send() competenceList = settings['competence'] cl.user_session.set("competenceFree", competenceList) else: await cl.Message( author="Datapcc : 🌐🌐🌐",content=f"👍 Changement de contexte : {settings['competenceInput']}" ).send() competenceList = settings['competenceInput'] cl.user_session.set("competenceFree", competenceList) if not cl.user_session.get("saveMemory"): cl.user_session.set("saveMemory", "") chat_profile = cl.user_session.get("chat_profile") chatProfile = chat_profile.split(' - ') await construction_NCS(competenceList, chatProfile[1]) contextChat = cl.user_session.get("contextChatBot") if not contextChat: contextChat = "Il n'y a pas de contexte." os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN'] repo_id = "mistralai/Mistral-7B-Instruct-v0.3" model = HuggingFaceEndpoint( repo_id=repo_id, max_new_tokens=3600, temperature=0.5, streaming=True ) memory = cl.user_session.get("memory") prompt = ChatPromptTemplate.from_messages( [ ( "system", f"Contexte : Vous ĂȘtes un spĂ©cialiste du marchĂ© de l'emploi en fonction du niveau de qualification, des compĂ©tences professionnelles, des compĂ©tences transversales, du salaire et de l'expĂ©rience. Vous ĂȘtes douĂ© pour faire des analyses du systĂšme travail sur les mĂ©tiers les plus demandĂ©s grĂące Ă  votre aptitude Ă  synthĂ©tiser les informations en fonction des critĂšres dĂ©finis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:28875]}. RĂ©ponds Ă  la question suivante de la maniĂšre la plus pertinente, la plus exhaustive et la plus dĂ©taillĂ©e possible, avec au minimum 3000 tokens jusqu'Ă  3600 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et rĂ©pondez uniquement en vous basant sur les informations fournies.", ), MessagesPlaceholder(variable_name="history"), ("human", "{question}, dans le contexte fourni."), ] ) runnable = ( RunnablePassthrough.assign( history=RunnableLambda(memory.load_memory_variables) | itemgetter("history") ) | prompt | model ) cl.user_session.set("runnable", runnable) @cl.on_message async def main(message: cl.Message): async with cl.Step(root=True, name="RĂ©ponse de Mistral", type="llm") as parent_step: parent_step.input = message.content chat_profile = cl.user_session.get("chat_profile") chatProfile = chat_profile.split(' - ') memory = cl.user_session.get("memory") runnable = cl.user_session.get("runnable") # type: Runnable msg = cl.Message(author="Datapcc : 🌐🌐🌐",content="") text_elements = [] answer = [] async for chunk in runnable.astream({"question": message.content}, config=RunnableConfig(callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)])): await parent_step.stream_token(chunk) await msg.stream_token(chunk) QA_context_name = f"Question-rĂ©ponse sur le contexte" text_elements.append( cl.Text(content="Question : " + message.content + "\n\nRĂ©ponse :\n" + msg.content, name=QA_context_name) ) actions = [ cl.Action(name="download", value="Question : " + message.content + "\n\nRĂ©ponse : " + msg.content, description="download_QA_emplois") ] await cl.Message(author="Datapcc : 🌐🌐🌐",content="Download", actions=actions).send() await cl.sleep(2) saves = [ cl.Action(name="saveToMemory", value="Question : " + message.content + "\n\nRĂ©ponse : " + msg.content, description="Mettre en mĂ©moire la rĂ©ponse Ă  votre requĂȘte") ] await cl.Message(author="Datapcc : 🌐🌐🌐",content="Mettre en mĂ©moire la rĂ©ponse Ă  votre requĂȘte", actions=saves).send() await cl.sleep(2) memories = [ cl.Action(name="download", value=cl.user_session.get('saveMemory'), description="download_referentiel") ] await cl.Message(author="Datapcc : 🌐🌐🌐",content="TĂ©lĂ©charger la mise en mĂ©moire de vos fiches", actions=memories).send() await cl.sleep(1.5) await cl.Message(author="Datapcc : 🌐🌐🌐",content="Contexte : " + QA_context_name, elements=text_elements).send() memory.chat_memory.add_user_message(message.content) memory.chat_memory.add_ai_message(msg.content)