import os
import time
from operator import itemgetter
from collections import Counter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from chainlit.types import AskFileResponse
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.schema.runnable import Runnable, RunnablePassthrough, RunnableLambda
from langchain.schema.runnable.config import RunnableConfig
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain, create_extraction_chain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema import StrOutputParser
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
import pandas as pd
import numpy as np
import chainlit as cl
from chainlit.input_widget import Select, TextInput
from chainlit import user_session
from homeskills import homeRome, homeEsco
from offres_emploi import Api
from offres_emploi.utils import dt_to_str_iso
import datetime
import plotly.express as px
import bcrypt
import ast
import json
import requests
import http.client
from bs4 import BeautifulSoup
from literalai import LiteralClient
literal_client = LiteralClient(api_key=os.getenv("LITERAL_API_KEY"))
literal_client.instrument_openai()
@cl.password_auth_callback
def auth_callback(username: str, password: str):
auth = json.loads(os.environ['CHAINLIT_AUTH_LOGIN'])
ident = next(d['ident'] for d in auth if d['ident'] == username)
pwd = next(d['pwd'] for d in auth if d['ident'] == username)
resultLogAdmin = bcrypt.checkpw(username.encode('utf-8'), bcrypt.hashpw(ident.encode('utf-8'), bcrypt.gensalt()))
resultPwdAdmin = bcrypt.checkpw(password.encode('utf-8'), bcrypt.hashpw(pwd.encode('utf-8'), bcrypt.gensalt()))
resultRole = next(d['role'] for d in auth if d['ident'] == username)
if resultLogAdmin and resultPwdAdmin and resultRole == "admindatapcc":
return cl.User(
identifier=ident + " : đ§âđŒ Admin Datapcc", metadata={"role": "admin", "provider": "credentials"}
)
elif resultLogAdmin and resultPwdAdmin and resultRole == "userdatapcc":
return cl.User(
identifier=ident + " : đ§âđ User Datapcc", metadata={"role": "user", "provider": "credentials"}
)
def process_file(file: AskFileResponse):
if file.type == "text/plain":
Loader = TextLoader
elif file.type == "application/pdf":
Loader = PyPDFLoader
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
loader = Loader(file.path)
documents = loader.load()
docs = text_splitter.split_documents(documents)
return docs
def removeTags(all):
for data in all(['style', 'script']):
data.decompose()
return ' '.join(all.stripped_strings)
def localisation():
ListCentroids = [
{ "ID": "01", "Longitude": 5.3245259, "Latitude":46.0666003 },
{ "ID": "02", "Longitude": 3.5960246, "Latitude": 49.5519632 },
{ "ID": "03", "Longitude": 3.065278, "Latitude": 46.4002783 },
{ "ID": "04", "Longitude": 6.2237688, "Latitude": 44.1105837 },
{ "ID": "05", "Longitude": 6.2018836, "Latitude": 44.6630487 },
{ "ID": "06", "Longitude": 7.0755745, "Latitude":43.9463082 },
{ "ID": "07", "Longitude": 4.3497308, "Latitude": 44.7626044 },
{ "ID": "08", "Longitude": 4.6234893, "Latitude": 49.6473884 },
{ "ID": "09", "Longitude": 1.6037147, "Latitude": 42.9696091 },
{ "ID": "10", "Longitude": 4.1394954, "Latitude": 48.2963286 },
{ "ID": "11", "Longitude": 2.3140163, "Latitude": 43.1111427 },
{ "ID": "12", "Longitude": 2.7365234, "Latitude": 44.2786323 },
{ "ID": "13", "Longitude": 5.0515492, "Latitude": 43.5539098 },
{ "ID": "14", "Longitude": -0.3930779, "Latitude": 49.1024215 },
{ "ID": "15", "Longitude": 2.6367657, "Latitude": 44.9643217 },
{ "ID": "16", "Longitude": 0.180475, "Latitude": 45.706264 },
{ "ID": "17", "Longitude": -0.7082589, "Latitude": 45.7629699 },
{ "ID": "18", "Longitude": 2.5292424, "Latitude": 47.0926687 },
{ "ID": "19", "Longitude": 1.8841811, "Latitude": 45.3622055 },
{ "ID": "2A", "Longitude": 8.9906834, "Latitude": 41.8619761 },
{ "ID": "2B", "Longitude": 9.275489, "Latitude": 42.372014 },
{ "ID": "21", "Longitude": 4.7870471, "Latitude": 47.4736746 },
{ "ID": "22", "Longitude": -2.9227591, "Latitude": 48.408402 },
{ "ID": "23", "Longitude": 2.0265508, "Latitude": 46.0837382 },
{ "ID": "24", "Longitude": 0.7140145, "Latitude": 45.1489678 },
{ "ID": "25", "Longitude": 6.3991355, "Latitude": 47.1879451 },
{ "ID": "26", "Longitude": 5.1717552, "Latitude": 44.8055408 },
{ "ID": "27", "Longitude": 0.9488116, "Latitude": 49.1460288 },
{ "ID": "28", "Longitude": 1.2793491, "Latitude": 48.3330017 },
{ "ID": "29", "Longitude": -4.1577074, "Latitude": 48.2869945 },
{ "ID": "30", "Longitude": 4.2650329, "Latitude": 43.9636468 },
{ "ID": "31", "Longitude": 1.2728958, "Latitude": 43.3671081 },
{ "ID": "32", "Longitude": 0.4220039, "Latitude": 43.657141 },
{ "ID": "33", "Longitude": -0.5760716, "Latitude": 44.8406068 },
{ "ID": "34", "Longitude": 3.4197556, "Latitude": 43.62585 },
{ "ID": "35", "Longitude": -1.6443812, "Latitude": 48.1801254 },
{ "ID": "36", "Longitude": 1.6509938, "Latitude": 46.7964222 },
{ "ID": "37", "Longitude": 0.7085619, "Latitude": 47.2802601 },
{ "ID": "38", "Longitude": 5.6230772, "Latitude": 45.259805 },
{ "ID": "39", "Longitude": 5.612871, "Latitude": 46.7398138 },
{ "ID": "40", "Longitude": -0.8771738, "Latitude": 44.0161251 },
{ "ID": "41", "Longitude": 1.3989178, "Latitude": 47.5866519 },
{ "ID": "42", "Longitude": 4.2262355, "Latitude": 45.7451186 },
{ "ID": "43", "Longitude": 3.8118151, "Latitude": 45.1473029 },
{ "ID": "44", "Longitude": -1.7642949, "Latitude": 47.4616509 },
{ "ID": "45", "Longitude": 2.2372695, "Latitude": 47.8631395 },
{ "ID": "46", "Longitude": 1.5732157, "Latitude": 44.6529284 },
{ "ID": "47", "Longitude": 0.4788052, "Latitude": 44.4027215 },
{ "ID": "48", "Longitude": 3.4991239, "Latitude": 44.5191573 },
{ "ID": "49", "Longitude": -0.5136056, "Latitude": 47.3945201 },
{ "ID": "50", "Longitude": -1.3203134, "Latitude": 49.0162072 },
{ "ID": "51", "Longitude": 4.2966555, "Latitude": 48.9479636 },
{ "ID": "52", "Longitude": 5.1325796, "Latitude": 48.1077196 },
{ "ID": "53", "Longitude": -0.7073921, "Latitude": 48.1225795 },
{ "ID": "54", "Longitude": 6.144792, "Latitude": 48.7995163 },
{ "ID": "55", "Longitude": 5.2888292, "Latitude": 49.0074545 },
{ "ID": "56", "Longitude": -2.8746938, "Latitude": 47.9239486 },
{ "ID": "57", "Longitude": 6.5610683, "Latitude": 49.0399233 },
{ "ID": "58", "Longitude": 3.5544332, "Latitude": 47.1122301 },
{ "ID": "59", "Longitude": 3.2466616, "Latitude": 50.4765414 },
{ "ID": "60", "Longitude": 2.4161734, "Latitude": 49.3852913 },
{ "ID": "61", "Longitude": 0.2248368, "Latitude": 48.5558919 },
{ "ID": "62", "Longitude": 2.2555152, "Latitude": 50.4646795 },
{ "ID": "63", "Longitude": 3.1322144, "Latitude": 45.7471805 },
{ "ID": "64", "Longitude": -0.793633, "Latitude": 43.3390984 },
{ "ID": "65", "Longitude": 0.1478724, "Latitude": 43.0526238 },
{ "ID": "66", "Longitude": 2.5239855, "Latitude": 42.5825094 },
{ "ID": "67", "Longitude": 7.5962225, "Latitude": 48.662515 },
{ "ID": "68", "Longitude": 7.2656284, "Latitude": 47.8586205 },
{ "ID": "69", "Longitude": 4.6859896, "Latitude": 45.8714754 },
{ "ID": "70", "Longitude": 6.1388571, "Latitude": 47.5904191 },
{ "ID": "71", "Longitude": 4.6394021, "Latitude": 46.5951234 },
{ "ID": "72", "Longitude": 0.1947322, "Latitude": 48.0041421 },
{ "ID": "73", "Longitude": 6.4662232, "Latitude": 45.4956055 },
{ "ID": "74", "Longitude": 6.3609606, "Latitude": 46.1045902 },
{ "ID": "75", "Longitude": 2.3416082, "Latitude": 48.8626759 },
{ "ID": "76", "Longitude": 1.025579, "Latitude": 49.6862911 },
{ "ID": "77", "Longitude": 2.8977309, "Latitude": 48.5957831 },
{ "ID": "78", "Longitude": 1.8080138, "Latitude": 48.7831982 },
{ "ID": "79", "Longitude": -0.3159014, "Latitude": 46.5490257 },
{ "ID": "80", "Longitude": 2.3380595, "Latitude": 49.9783317 },
{ "ID": "81", "Longitude": 2.2072751, "Latitude": 43.8524305 },
{ "ID": "82", "Longitude": 1.2649374, "Latitude": 44.1254902 },
{ "ID": "83", "Longitude": 6.1486127, "Latitude": 43.5007903 },
{ "ID": "84", "Longitude": 5.065418, "Latitude": 44.0001599 },
{ "ID": "85", "Longitude": -1.3956692, "Latitude": 46.5929102 },
{ "ID": "86", "Longitude": 0.4953679, "Latitude": 46.5719095 },
{ "ID": "87", "Longitude": 1.2500647, "Latitude": 45.9018644 },
{ "ID": "88", "Longitude": 6.349702, "Latitude": 48.1770451 },
{ "ID": "89", "Longitude": 3.5634078, "Latitude": 47.8474664 },
{ "ID": "90", "Longitude": 6.9498114, "Latitude": 47.6184394 },
{ "ID": "91", "Longitude": 2.2714555, "Latitude": 48.5203114 },
{ "ID": "92", "Longitude": 2.2407148, "Latitude": 48.835321 },
{ "ID": "93", "Longitude": 2.4811577, "Latitude": 48.9008719 },
{ "ID": "94", "Longitude": 2.4549766, "Latitude": 48.7832368 },
{ "ID": "95", "Longitude": 2.1802056, "Latitude": 49.076488 },
{ "ID": "974", "Longitude": 55.536384, "Latitude": -21.115141 },
{ "ID": "973", "Longitude": -53.125782, "Latitude": 3.933889 },
{ "ID": "972", "Longitude": -61.024174, "Latitude": 14.641528 },
{ "ID": "971", "Longitude": -61.551, "Latitude": 16.265 }
]
return ListCentroids
def plotDemandeur(dataframe, coderome):
df = dataframe.sort_values(by=['Indicateur'])
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', height=1000, title="Demandeurs d'emploi et offres d'emploi du code ROME : " + coderome, color='Indicateur', labels={'Valeur':'Nombre'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True)
return fig_demandeur
def plotSalaire(dataframe):
df = dataframe.sort_values(by=['salaire'])
fig_demandeur = px.histogram(df, x='emploi', y='salaire', barmode='group', title="Salaires médians", color='categorie', text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True)
return fig_demandeur
def plotDifficulte(dataframe):
if len(dataframe) == 0:
title = "Aucune donnée difficulté de recrutement renseignée!"
else:
title = "Difficulté de recrutement"
df = dataframe.sort_values(by=['Valeur'])
fig_demandeur = px.histogram(df, x='Indicateur', y='Valeur', title=title, color='Indicateur', labels={'Valeur':'Pourcentage'}, text_auto=True).update_layout(font=dict(size=9,color="RebeccaPurple"),autosize=True)
return fig_demandeur
def plotRepartition(dataframe,title):
df = dataframe.sort_values(by=['Valeur'])
fig_repartition = px.pie(df, names='Indicateur', values='Valeur', color='Indicateur', title=title, labels={'Valeur':'pourcentage'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
return fig_repartition
def removeTags(all):
for data in all(['style', 'script']):
data.decompose()
return ''.join(all.stripped_strings)
def htmlToDataframe(htmlTable):
data = []
list_header = []
soup = BeautifulSoup(htmlTable,'html.parser')
header = soup.find_all("table")[0].find("tr")
for items in header:
try:
list_header.append(items.get_text())
except:
continue
HTML_data = soup.find_all("table")[0].find_all("tr")[1:]
for element in HTML_data:
sub_data = []
for sub_element in element:
try:
sub_data.append(sub_element.get_text())
except:
continue
data.append(sub_data)
dataFrame = pd.DataFrame(data = data, columns = list_header)
return dataFrame
def datavisualisation_chiffres_cles_emplois(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
alldemandeurs = ''
allsalaires = ''
alldifficultes = ''
allrepartitions = ''
allentreprises = ''
allembauches = soup.select('p.population_category')
allnumembauchesfirst = soup.select('p.population_main-num.data')
allnumembauches = removeTags(allnumembauchesfirst[0]).split('\xa0')
allnumembauches = ''.join(allnumembauches)
allnumoffres = removeTags(allnumembauchesfirst[1]).split('\xa0')
allnumoffres = ''.join(allnumoffres)
alldetailembauches = soup.select('p.hiring_text.ng-star-inserted')
allnumevolutionembauches = soup.select('p.main.ng-star-inserted')
alldetailevolutionembauches = soup.select('p.population_bubble-title')
alldemandeurs = "
Indicateur | Valeur |
" + removeTags(allembauches[0]) + " (" + removeTags(alldetailembauches[0]) + ");"
if len(alldetailevolutionembauches) >= 1 and len(allnumevolutionembauches) >= 1:
alldemandeurs += "\nĂvolution demandeurs d'emploi (" + removeTags(alldetailevolutionembauches[0]) + ": " + removeTags(allnumevolutionembauches[0]) + ") | "
else:
alldemandeurs += ""
alldemandeurs += "" + allnumembauches + " |
"
alldemandeurs += "" + removeTags(allembauches[1]) + " (" + removeTags(alldetailembauches[1]) + ");"
if len(alldetailevolutionembauches) >= 2 and len(allnumevolutionembauches) >= 2:
alldemandeurs += "\nĂvolution offres d'emploi (" + removeTags(alldetailevolutionembauches[1]) + ": " + removeTags(allnumevolutionembauches[1]) + ") | "
else:
alldemandeurs += ""
alldemandeurs += "" + allnumoffres + " |
"
alldemandeurs += "
"
allFAP = soup.select('tr.sectorTable__line.ng-star-inserted')
allcategorie = soup.select('td.sectorTable__cell')
alltypesalaires = soup.select('th.sectorTable__cell')
allFAPsalaires = soup.select('p.sectorTable__cellValue')
if len(allFAPsalaires) >= 3:
allsalaires = "categorie | emploi | salaire |
"
allsalaires += "" + removeTags(alltypesalaires[1]) + " | " + removeTags(allcategorie[0]) + " | " + removeTags(allFAPsalaires[0]).replace('\xa0','').replace(' ','').replace('âŹ','') + " |
"
allsalaires += "" + removeTags(alltypesalaires[2]) + " | " + removeTags(allcategorie[0]) + " | " + removeTags(allFAPsalaires[1]).replace('\xa0','').replace(' ','').replace('âŹ','') + " |
"
allsalaires += "" + removeTags(alltypesalaires[3]) + " | " + removeTags(allcategorie[0]) + " | " + removeTags(allFAPsalaires[2]).replace('\xa0','').replace(' ','').replace('âŹ','') + " |
"
if len(allFAP) >= 2 and len(allFAPsalaires) == 6:
allsalaires += "" + removeTags(alltypesalaires[1]) + " | " + removeTags(allcategorie[4]) + " | " + removeTags(allFAPsalaires[3]).replace('\xa0','').replace(' ','').replace('âŹ','') + " |
"
allsalaires += "" + removeTags(alltypesalaires[2]) + " | " + removeTags(allcategorie[4]) + " | " + removeTags(allFAPsalaires[4]).replace('\xa0','').replace(' ','').replace('âŹ','') + " |
"
allsalaires += "" + removeTags(alltypesalaires[3]) + " | " + removeTags(allcategorie[4]) + " | " + removeTags(allFAPsalaires[5]).replace('\xa0','').replace(' ','').replace('âŹ','') + " |
"
allsalaires += "
"
alltypedifficultes = soup.select('.tabs-main-content_persp-col2-bar.ng-star-inserted')
alldifficulte = soup.select('p.horizontal-graph_title')
allpcdifficulte = soup.select('div.horizontal-graph_data')
alldifficultes = "Indicateur | Valeur |
"
for i in range(0,len(alltypedifficultes)):
alldifficultes += "" + removeTags(alldifficulte[i]) + " | " + removeTags(allpcdifficulte[i]).replace('Pour le territoire principal FRANCE pour les ' + removeTags(alldifficulte[i]),'').replace('%','') + " |
"
alldifficultes += "
"
alltyperepartitions = soup.select('div.hiring-contract_legende_item.ng-star-inserted')
allrepartition = soup.select('p.hiring-contract_legende_item_label')
allpcrepartition = soup.select('span.hiring-contract_legende_item-first')
allrepartitions = "Indicateur | Valeur |
"
for i in range(0,len(alltyperepartitions)):
allrepartitions += "" + removeTags(allrepartition[i]).replace('(' + removeTags(allpcrepartition[i]) + ')','') + " | " + removeTags(allpcrepartition[i]).replace('%','').replace(',','.') + " |
"
allrepartitions += "
"
allentrepriserepartitions = soup.select('div.horizontal-graph_pattern.sm-bubble_wrapper > span')
allentreprise = soup.select('span.sr-only')
allpcentreprise = soup.select('span.data.ng-star-inserted')
allentreprises = "Indicateur | Valeur |
"
for i in range(0,len(allentrepriserepartitions)):
allentreprises += "" + removeTags(allentrepriserepartitions[i])[0:-4] + " | " + removeTags(allentrepriserepartitions[i])[-4:].replace('%','').replace(',','.') + " |
"
allentreprises += "
"
return [alldemandeurs, allsalaires, alldifficultes, allrepartitions, allentreprises]
def listToString(list):
return str(list)
def arrayOfSecteur(array):
df_naf = pd.read_csv("./public/secteur_naf_v01.csv")
df_naf = df_naf[['libelle_secteur_naf', 'code_rome']].copy()
df_naf = df_naf.groupby(df_naf['code_rome'], as_index=False).agg({'libelle_secteur_naf': '; '.join})
df_rome = pd.DataFrame({'code_rome': array})
df_romeNaf = pd.merge(df_rome, df_naf, on="code_rome")
df_secteur = df_romeNaf[['libelle_secteur_naf']].copy()
return df_secteur.values.tolist()
def arrayToString(array):
arrayList = []
for i in range(0,len(array)):
if listToString(array[i]).find("libelle")!=-1:
arrayList.append(array[i]['libelle'])
else:
arrayList.append("; ")
string = ', '.join(arrayList)
return string + '; '
def searchByRome(rome,index):
libelle = ''
if rome.find(',') != -1:
romeArray = rome.split(',')
for i in range(0,len(romeArray)):
codeRome = romeArray[i].strip()
if i <= 5 and len(codeRome) == 5:
all_docs = index.query(
top_k=1,
vector= [0] * 768, # embedding dimension
namespace='',
filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": codeRome}},
include_metadata=True
)
libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome']
else:
all_docs = index.query(
top_k=1,
vector= [0] * 768, # embedding dimension
namespace='',
filter={"categorie": {"$eq": "rome"}, "rome":{"$eq": rome}},
include_metadata=True
)
libelle = libelle + " " + all_docs['matches'][0]['metadata']['libelle_rome']
return libelle
@cl.author_rename
def rename(orig_author: str):
rename_dict = {"ConversationalRetrievalChain": "đŹ Assistant conversationnel", "Retriever": "Agent conversationnel", "StuffDocumentsChain": "ChaĂźne de documents", "LLMChain": "Agent", "HuggingFaceEndpoint": "Mistral AI đ€"}
return rename_dict.get(orig_author, orig_author)
@cl.action_callback("datavizChiffresClesMetiers")
async def on_action(action):
romeListArray = ast.literal_eval(action.value)
elements = []
for j in range(0, len(romeListArray)):
table = datavisualisation_chiffres_cles_emplois("https://dataemploi.pole-emploi.fr/metier/chiffres-cles/NAT/FR/" + romeListArray[j])
plot_demandeur = plotDemandeur(htmlToDataframe(table[0]), romeListArray[j])
elements.append(cl.Plotly(name="chart_demandeur", figure=plot_demandeur, display="inline", size="large"))
if len(table[1]) > 0:
plot_salaire = plotSalaire(htmlToDataframe(table[1]))
elements.append(cl.Plotly(name="chart_salaire", figure=plot_salaire, display="inline", size="large"))
plot_difficulte = plotDifficulte(htmlToDataframe(table[2]))
elements.append(cl.Plotly(name="chart_difficulte", figure=plot_difficulte, display="inline", size="large"))
plot_repartitionContrat = plotRepartition(htmlToDataframe(table[3]), "Répartition des embauches du métier : type de contrat")
elements.append(cl.Plotly(name="chart_repatitionContrat", figure=plot_repartitionContrat, display="inline", size="large"))
plot_repartitionEntreprise = plotRepartition(htmlToDataframe(table[4]), "Répartition des embauches du métier : type entreprise")
elements.append(cl.Plotly(name="chart_repartitionEntreprise", figure=plot_repartitionEntreprise, display="inline", size="large"))
await cl.Message(content="Datavisualisation des chiffres clés des Métiers", elements=elements).send()
@cl.action_callback("download")
async def on_action(action):
content = []
content.append(action.value)
arrayContent = np.array(content)
df = pd.DataFrame(arrayContent)
with open('./' + action.description + '.txt', 'wb') as csv_file:
df.to_csv(path_or_buf=csv_file, index=False,header=False, encoding='utf-8')
elements = [
cl.File(
name= action.description + ".txt",
path="./" + action.description + ".txt",
display="inline",
),
]
await cl.Message(
author="Datapcc : đđđ", content="[Lien] đ", elements=elements
).send()
await action.remove()
@cl.action_callback("saveMemory")
async def on_action(action):
buffer = cl.user_session.get("saveMemory")
cl.user_session.set("saveMemory", buffer + action.value)
await cl.Message(
author="Datapcc : đđđ", content="đïž Document sauvegardĂ© dans le buffer Memory!"
).send()
await action.remove()
@cl.cache
def to_cache(file):
#time.sleep(5) # Simulate a time-consuming process
return "https://cipen.univ-gustave-eiffel.fr/fileadmin/CIPEN/datas/assets/docs/" + file + ".csv"
@cl.set_chat_profiles
async def chat_profile():
return [
cl.ChatProfile(name="Catalogue ROME - ROMESKILLS",markdown_description="Les compétences du catalogue ROME",icon="./public/favicon.png",),
cl.ChatProfile(name="Classification ESCO - ESCOSKILLS",markdown_description="Les compétences de la classification ESCO",icon="./public/favicon.png",),
]
@cl.on_chat_start
async def start():
await cl.Avatar(
name="You",
path="./public/logo-ofipe.jpg",
).send()
chat_profile = cl.user_session.get("chat_profile")
chatProfile = chat_profile.split(' - ')
if chatProfile[1] == 'ROMESKILLS':
contextChat = await homeRome()
categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYROME'])
else:
contextChat = await homeEsco()
categorie = cl.user_session.set("categorie", os.environ['PINECONE_API_KEYESCO'])
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
#repo_id = "mistralai/Mistral-Small-Instruct-2409"
model = HuggingFaceEndpoint(
repo_id=repo_id,
max_new_tokens=6000,
temperature=1.0,
streaming=True
)
if not cl.user_session.get("saveMemory"):
cl.user_session.set("saveMemory", "")
cl.user_session.set("memory", ConversationBufferMemory(return_messages=True))
memory = cl.user_session.get("memory")
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
f"Contexte : Vous ĂȘtes un spĂ©cialiste du marchĂ© de l'emploi en fonction du niveau de qualification, des compĂ©tences professionnelles, des compĂ©tences transversales, du salaire et de l'expĂ©rience. Vous ĂȘtes douĂ© pour faire des analyses du systĂšme travail sur les mĂ©tiers les plus demandĂ©s grĂące Ă votre aptitude Ă synthĂ©tiser les informations en fonction des critĂšres dĂ©finis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:26500]}. RĂ©ponds Ă la question suivante de la maniĂšre la plus pertinente, la plus exhaustive et la plus dĂ©taillĂ©e possible, avec au minimum 3000 tokens jusqu'Ă 4000 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et rĂ©pondez uniquement en vous basant sur les informations fournies.",
),
MessagesPlaceholder(variable_name="history"),
("human", "{question}, dans le contexte fourni."),
]
)
runnable = (
RunnablePassthrough.assign(
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history")
)
| prompt
| model
)
cl.user_session.set("runnable", runnable)
@literal_client.step(type="run")
async def construction_NCS(competenceList, chatProfile):
context = await contexte(competenceList, chatProfile)
emploisST = context.to_string(index = False)
if chatProfile == 'ROMESKILLS':
romeListArray = cl.user_session.get("codeRomeArray")
stringLsitOfEntreprise = await creation_liste_entreprises(arrayOfSecteur(romeListArray))
ficheClesMetier = await document_chiffres_cles_emplois("https://dataemploi.francetravail.fr/metier/chiffres-cles/NAT/FR/", romeListArray)
contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','') + ficheClesMetier
finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy()
listEmplois = finals_df.values.tolist()
stringEmplois = ''
for i in range(0,len(listEmplois)):
stringEmplois += "\nâïž Emploi : " + str(listEmplois[i][0]) + ";\nâ Contrat : " + str(listEmplois[i][1]) + ";\nâ CompĂ©tences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "â Salaire : " + str(listEmplois[i][6]) + ";\nâ Qualification : " + str(listEmplois[i][5]).replace("'libelle'","\nâą 'libelle") + ";\nâ Localisation : " + str(listEmplois[i][7]) + ";\nâ ExpĂ©rience : " + str(listEmplois[i][2]) + ";\nâ Niveau de qualification : " + str(listEmplois[i][8]) + ";\nâ Description de l'emploi : " + str(listEmplois[i][4]) + "\n"
await cl.sleep(1)
listEmplois_name = f"Liste des emplois"
text_elements = []
text_elements.append(
cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\nâą 'code'"), name=listEmplois_name)
)
await cl.Message(author="Datapcc : đđđ",content="đšâđŒ Source France Travail : " + listEmplois_name, elements=text_elements).send()
await cl.sleep(1)
listEntreprise_name = f"Liste des entreprises"
entreprise_elements = []
entreprise_elements.append(
cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + stringLsitOfEntreprise, name=listEntreprise_name)
)
await cl.Message(author="Datapcc : đđđ",content="đ Source Registre National des Entreprises : " + listEntreprise_name, elements=entreprise_elements).send()
await cl.sleep(1)
listClesMetier_name = f"Chiffres clés des emplois"
text_ClesMetier = []
text_ClesMetier.append(
cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + ficheClesMetier, name=listClesMetier_name)
)
await cl.Message(author="Datapcc : đđđ",content="đ Source France Travail : " + listClesMetier_name, elements=text_ClesMetier).send()
await cl.sleep(1)
datavizChiffresClesMetiers = [
cl.Action(name="datavizChiffresClesMetiers", value=str(romeListArray), description="Afficher la datavisualisation des chiffres clés des métiers")
]
await cl.Message(author="Datapcc : đđđ",content="đ Afficher la datavisualisation des chiffres clĂ©s des mĂ©tiers", actions=datavizChiffresClesMetiers).send()
await cl.sleep(1)
codeArray = romeListArray
ficheMetiers = []
for i in range(0,len(codeArray)):
ficheMetiers = [
cl.File(name= "Fiche métier " + codeArray[i],url="https://www.soi-tc.fr/assets/fiches_pe/FEM_" + codeArray[i] + ".pdf",display="inline",)
]
await cl.Message(
author="Datapcc : đđđ", content="[Fiches mĂ©tiers] đ", elements=ficheMetiers
).send()
else:
contentChatBot = str(emploisST).replace('[','').replace(']','').replace('{','').replace('}','')
finals_df = context[['intitule','typeContratLibelle','experienceLibelle','competences','description','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy()
listEmplois = finals_df.values.tolist()
stringEmplois = ''
for i in range(0,len(listEmplois)):
stringEmplois += "\nâïž Emploi : " + str(listEmplois[i][0]) + ";\nâ Contrat : " + str(listEmplois[i][1]) + ";\nâ CompĂ©tences professionnelles : " + str(listEmplois[i][3]) + ";\n" + "â Salaire : " + str(listEmplois[i][6]) + ";\nâ Qualification : " + str(listEmplois[i][5]).replace("'libelle'","\nâą 'libelle") + ";\nâ Localisation : " + str(listEmplois[i][7]) + ";\nâ ExpĂ©rience : " + str(listEmplois[i][2]) + ";\nâ Niveau de qualification : " + str(listEmplois[i][8]) + ";\nâ Description de l'emploi : " + str(listEmplois[i][4]) + "\n"
await cl.sleep(1)
listEmplois_name = f"Liste des emplois"
text_elements = []
text_elements.append(
cl.Text(content="Question : " + competenceList + "\n\nRĂ©ponse :\n" + stringEmplois.replace('[','').replace(']','').replace('{','').replace('}','').replace("'code'","\nâą 'code'"), name=listEmplois_name)
)
await cl.Message(author="Datapcc : đđđ",content="đšâđŒ Source France Travail : " + listEmplois_name, elements=text_elements).send()
cl.user_session.set("contextChatBot", contentChatBot[0:28875])
await datavisualisation_statistiques_emplois(context)
return "datavisualisation des statistiques de l'emploi"
@cl.step(type="run")
async def recuperation_contexte(getNote):
getContext = cl.user_session.get(getNote)
return getNote + " :\n" + getContext
@cl.step(type="retrieval")
async def contexte(competence, chatProfile):
#chat_profile = cl.user_session.get("chat_profile")
#chatProfile = chat_profile.split(' - ')
if chatProfile == 'ROMESKILLS':
results = await creation_liste_code_Rome(competence, cl.user_session.get("categorie"))
else:
results = await creation_liste_skills_Esco(competence, cl.user_session.get("categorie"))
await cl.sleep(1)
romeListArray = cl.user_session.get("codeRomeArray")
df_emplois = await API_France_Travail(romeListArray)
return df_emplois
@cl.step(type="tool")
async def document_chiffres_cles_emplois(url, codes):
all = ""
codeArray = codes
for i in range(0,len(codeArray)):
response = requests.get(url + codeArray[i])
soup = BeautifulSoup(response.text, "html.parser")
if soup.select('h1#titreMetier'):
alltitre = soup.select('h1#titreMetier')
allTitre = removeTags(alltitre[0])
else:
allTitre = ""
if soup.select('div.jobs_item-container-flex'):
allembauches = soup.select('div.jobs_item-container-flex')
allEmbauches = removeTags(allembauches[0])
else:
allEmbauches = ""
if soup.select('div.key-number_block.shadow.inset'):
allsalaires = soup.select('div.key-number_block.shadow.inset')
allSalaires = removeTags(allsalaires[0])
else:
allSalaires = ""
if soup.select('tbody.sectorTable__body'):
allsalairesMedian = soup.select('tbody.sectorTable__body')
allSalairesMedian = removeTags(allsalairesMedian[0])
else:
allSalairesMedian = ""
if soup.select('div.dynamism_canvas-wrapper > p.sr-only'):
allDiff = soup.select('div.dynamism_canvas-wrapper > p.sr-only')
alldiff = removeTags(allDiff[0])
else:
alldiff = ""
if soup.select('div.tabs-main-data_persp-col2'):
allDiffOrigin = soup.select('div.tabs-main-data_persp-col2')
alldiffOrigin = removeTags(allDiffOrigin[0])
else:
alldiffOrigin = ""
allTypeContrat = ""
if soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted"):
allContrat = soup.find_all("div", class_="hiring-contract_legende_item ng-star-inserted")
for j in range(0,len(allContrat)):
allTypeContrat = allTypeContrat + removeTags(allContrat[j]) + ", "
if soup.find_all("div", class_="horizontal-graph_patterns"):
allEntreprise = soup.find_all("div", class_="horizontal-graph_patterns")
allentreprise = removeTags(allEntreprise[0])
else:
allentreprise = ""
all = all + "\n\nChiffres-clĂ©s MĂ©tier : \n**" + allTitre + "**:\nâ Demandeurs d'emploi et Offres d'emploi : " + allEmbauches.replace("Plus de donnĂ©es sur les Demandeurs d'emploi","").replace("Plus de donnĂ©es","") + ".\nâ Salaires proposĂ©s dans les offres : " + allSalaires + ".\nâ Salaires mĂ©dians constatĂ©s : " + allSalairesMedian + ".\nâ DifficultĂ©s de recrutement pour les entreprises : " + alldiff + ".\nâ Origine des difficultĂ©s : " + alldiffOrigin + ".\nâ RĂ©partition des embauches par type de contrat : " + allTypeContrat + ".\nâ RĂ©partition des embauches par taille d'entreprise : " + allentreprise + "."
return all
@cl.step(type="tool")
async def datavisualisation_statistiques_emplois(results_df):
arraydataframe = []
arrayfirstdataframe = []
arraylocalisationdataframe = []
results = []
count = 0
if results_df.empty == False:
count = count + 1
finals = results_df[['intitule','typeContratLibelle','experienceLibelle','competences','qualitesProfessionnelles','salaire','lieuTravail','formations']].copy()
finals["lieuTravail"] = finals["lieuTravail"].apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip())
finals_df = finals
finals_df.dropna(subset=['qualitesProfessionnelles','formations','competences'], inplace=True)
finals_df["competences"] = finals_df["competences"].apply(lambda x:[str(e['libelle']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
finals_df["qualitesProfessionnelles"] = finals_df["qualitesProfessionnelles"].apply(lambda x:[str(e['libelle']) + ": " + str(e['description']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
finals_df["formations"] = finals_df["formations"].apply(lambda x:[str(e['niveauLibelle']) for e in x]).apply(lambda x:'; '.join(map(str, x)))
finals_df = finals_df.sort_values(by=['lieuTravail'])
finals_localisation = results_df[['lieuTravail']].copy()
finals_localisation["lieuTravail"] = finals_localisation["lieuTravail"].apply(lambda x: np.array(x)).apply(lambda x: x['libelle']).apply(lambda x: x[0:3]).apply(lambda x: x.strip())
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Fra'].index, inplace = True)
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'FRA'].index, inplace = True)
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Ile'].index, inplace = True)
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Mar'].index, inplace = True)
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == 'Bou'].index, inplace = True)
finals_localisation.drop(finals_localisation[finals_localisation['lieuTravail'] == '976'].index, inplace = True)
arraylocalisationdataframe.append(finals_localisation)
arrayfirstdataframe.append(results_df)
if len(finals_df) != 0:
arraydataframe.append(finals_df)
first_df = pd.concat(arrayfirstdataframe)
finals_df = pd.concat(arraydataframe)
localisation_df = pd.concat(arraylocalisationdataframe)
######## Emplois ########
df_intitule = first_df.groupby('intitule').size().reset_index(name='obs')
df_intitule = df_intitule.sort_values(by=['obs'])
df_intitule = df_intitule.iloc[-25:]
fig_intitule = px.bar(df_intitule, x='obs', y='intitule', orientation='h', color='obs', title="Les principaux emplois", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_intitule["intitule"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df_intitule["intitule"]], showlegend=False)
######## Types de contrat ########
df_contrat = first_df.groupby('typeContratLibelle').size().reset_index(name='obs')
fig_contrat = px.pie(df_contrat, names='typeContratLibelle', values='obs', color='obs', title="Les types de contrat", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
df_secteur = first_df.groupby('secteurActiviteLibelle').size().reset_index(name='obs')
df_secteur = df_secteur.sort_values(by=['obs'])
df_secteur = df_secteur.iloc[-25:]
fig_secteur = px.bar(df_secteur, x='obs', y='secteurActiviteLibelle', orientation='h', color='obs', title="Les principaux secteurs d'activités", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_secteur["secteurActiviteLibelle"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df_secteur["secteurActiviteLibelle"]], showlegend=False)
######## Compétences professionnelles ########
df1 = finals_df
df1['competences'] = finals_df['competences'].str.split(';')
df2 = df1.explode('competences')
df2 = df2.groupby('competences').size().reset_index(name='obs')
df2 = df2.sort_values(by=['obs'])
df2 = df2.iloc[-20:]
fig_competences = px.bar(df2, x='obs', y='competences', orientation='h', color='obs', title="Les principales compétences professionnelles", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df2["competences"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df2['competences']], showlegend=False)
######## Compétences transversales ########
df_transversales = finals_df
df_transversales['qualitesProfessionnelles'] = finals_df['qualitesProfessionnelles'].str.split(';')
df_comptransversales = df_transversales.explode('qualitesProfessionnelles')
df_comptransversales = df_comptransversales.groupby('qualitesProfessionnelles').size().reset_index(name='obs')
df_comptransversales = df_comptransversales.sort_values(by=['obs'])
df_comptransversales = df_comptransversales.iloc[-20:]
fig_transversales = px.bar(df_comptransversales, x='obs', y='qualitesProfessionnelles', orientation='h', color='obs', title="Les principales compétences transversales", labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(font=dict(size=10,color="RebeccaPurple"),autosize=True).update_traces(hovertemplate=df_comptransversales["qualitesProfessionnelles"] + '
Nombre : %{x}', y=[y[:100] + "..." for y in df_comptransversales["qualitesProfessionnelles"]], showlegend=False)
######## Niveaux de qualification ########
df_formations = finals_df.groupby('formations').size().reset_index(name='obs')
fig_formations = px.pie(df_formations, names='formations', values='obs', color='obs', title="Les niveaux de qualification", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
######## Expériences professionnelles ########
df_experience = finals_df.groupby('experienceLibelle').size().reset_index(name='obs')
fig_experience = px.pie(df_experience, names='experienceLibelle', values='obs', color='obs', title="Les expériences professionnelles", labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe).update_traces(textposition='inside', textinfo='percent+label').update_layout(font=dict(size=10,color="RebeccaPurple"))
res = requests.get(
"https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/spain-provinces.geojson"
)
######## localisation ########
ListCentroids = localisation()
df_localisation = localisation_df.groupby('lieuTravail').size().reset_index(name='obs')
df_localisation = df_localisation.sort_values(by=['lieuTravail'])
df_localisation['longitude'] = df_localisation['lieuTravail']
df_localisation['latitude'] = df_localisation['lieuTravail']
df_localisation["longitude"] = df_localisation['longitude'].apply(lambda x:[loc['Longitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x)))
df_localisation["longitude"] = pd.to_numeric(df_localisation["longitude"], downcast="float")
df_localisation["latitude"] = df_localisation['latitude'].apply(lambda x:[loc['Latitude'] for loc in ListCentroids if loc['ID'] == x]).apply(lambda x:''.join(map(str, x)))
df_localisation["latitude"] = pd.to_numeric(df_localisation["latitude"], downcast="float")
fig_localisation = px.scatter_mapbox(df_localisation, lat="latitude", lon="longitude", hover_name="lieuTravail", size="obs").update_layout(
mapbox={
"style": "carto-positron",
"center": {"lon": 2, "lat" : 47},
"zoom": 4.5,
"layers": [
{
"source": res.json(),
"type": "line",
"color": "green",
"line": {"width": 0},
}
],
}
)
elements = []
elements.append(cl.Plotly(name="chart_intitule", figure=fig_intitule, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_contrat", figure=fig_contrat, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_competences", figure=fig_competences, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_transversales", figure=fig_transversales, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_formations", figure=fig_formations, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_experience", figure=fig_experience, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_secteur", figure=fig_secteur, display="inline", size="large"))
elements.append(cl.Plotly(name="chart_localisation", figure=fig_localisation, display="inline", size="large"))
await cl.Message(content="Datavisualisation du marché de l'emploi", elements=elements).send()
@cl.step(type="tool")
async def API_France_Travail(romeListArray):
client = await connexion_France_Travail()
todayDate = datetime.datetime.today()
month, year = (todayDate.month-1, todayDate.year) if todayDate.month != 1 else (12, todayDate.year-1)
start_dt = todayDate.replace(day=1, month=month, year=year)
end_dt = datetime.datetime.today()
results = []
for k in romeListArray:
if k[0:1] == ' ':
k = k[1:]
params = {"motsCles": k.replace('/', '').replace('-', '').replace(',', '').replace(' ', ','),'minCreationDate': dt_to_str_iso(start_dt),'maxCreationDate': dt_to_str_iso(end_dt),'range':'0-149'}
try:
search_on_big_data = client.search(params=params)
results += search_on_big_data["resultats"]
except:
print("Il n'y a pas d'offres d'emploi.")
results_df = pd.DataFrame(results)
return results_df
@cl.step(type="tool")
async def creation_liste_entreprises(arrayOfsecteur):
docsearch = await connexion_vector_database()
for j in range(0, len(arrayOfsecteur)):
retrieve_comp = docsearch.similarity_search(arrayOfsecteur[j][0], k=1, filter={"categorie": {"$eq": "inpiSecteur"}})
codeSecteur = int(retrieve_comp[0].metadata['codefinal'])
if len(str(codeSecteur)) == 7:
codeSecteurSTR = "0" + str(codeSecteur)
else:
codeSecteurSTR = str(codeSecteur)
token = await connexion_registre_national_entreprises()
url = f"https://registre-national-entreprises.inpi.fr/api/companies?page=1&pageSize=500&codeCategory={codeSecteurSTR}"
print(url)
headers = {"Authorization": f"Bearer {token}"}
response = requests.get(url, headers=headers)
if response.status_code == 200:
print('OK')
documents = response.json()
df = pd.DataFrame(documents)
ficheEntreprise = ''
for i in range (0, len(df)):
if str(df['formality'][i]['content']).find('cessation') == -1 and str(df['formality'][i]['content']).find('personneMorale') != -1:
if str(df['formality'][i]['content']['personneMorale']).find('adresseEntreprise') != -1:
ficheEntreprise += "\nđ DĂ©nomination : " + str(df['formality'][i]['content']['personneMorale']['identite']['entreprise']['denomination']) + "; Code SIREN : " + str(df['formality'][i]['content']['personneMorale']['identite']['entreprise']['siren']) + "\n\tAdresse : "
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'numVoie'") != -1:
ficheEntreprise += "n° " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['numVoie'])
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'typeVoie'") != -1:
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['typeVoie'])
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'voie'") != -1:
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['voie'])
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'complementLocalisation'") != -1:
ficheEntreprise += ", " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['complementLocalisation'])+ ","
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'codePostal'") != -1:
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['codePostal'])
if str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']).find("'commune'") != -1:
ficheEntreprise += " " + str(df['formality'][i]['content']['personneMorale']['adresseEntreprise']['adresse']['commune'])
if str(df['formality'][i]['content']['personneMorale']).find("autresEtablissements") != -1:
ficheEntreprise += "\n\tActivités : " + str(df['formality'][i]['content']['personneMorale']['autresEtablissements'][0]['activites'][0]['descriptionDetaillee']) + "\n\tCode APE : " + str(df['formality'][i]['content']['personneMorale']['autresEtablissements'][0]['activites'][0]['codeApe'])
else:
ficheEntreprise += "Il n'y a pas d'entreprises pour le secteur sélectionné : " + arrayOfsecteur[j][0]
return ficheEntreprise
@cl.step(type="tool")
async def creation_liste_code_Rome(competence, categorie):
docsearch = await connexion_vector_database()
retrieve_comp = docsearch.similarity_search(competence, k=30, filter={"categorie": {"$eq": categorie}})
retrieve = pd.DataFrame(retrieve_comp)
codeRome = []
competence = []
metier = []
for i in range(0,len(retrieve_comp)):
codeRome.append(retrieve_comp[i].metadata['code_rome'])
competence.append(retrieve_comp[i].metadata['libelle_competence'])
metier.append(retrieve_comp[i].metadata['libelle_appellation_long'])
results_df = pd.DataFrame({'codeRome': codeRome,'competence': competence, 'metier': metier})
arrayresults = results_df.values.tolist()
displayresults = '| Code Rome | Compétence | Métier |\n| -------- | ------- | ------- |'
for j in range(0, len(arrayresults)):
displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' |'
print(arrayresults[0][0] + arrayresults[0][1] + arrayresults[0][2])
await cl.Message(author="Datapcc : đđđ",content="Voici le rĂ©sultat de la recherche sĂ©mantique sur le catalogue Rome :\n" + displayresults).send()
results_df = results_df.drop_duplicates(subset=["codeRome"])
results_df = results_df.head(5)
codeRomeString = results_df["codeRome"].to_string(index = False)
codeRome_list = results_df["codeRome"].tolist()
actionRome = await cl.AskActionMessage(
content="Etes-vous d'accord avec la sélection des 5 codes Rome automatiques issus de la recherche sémantique ? :\n" + codeRomeString.replace(' ',','),
actions=[
cl.Action(name="continue", value="Offres d'emploi en temps rĂ©el", label="â
Oui, je veux continuer vers l'extraction en temps réel des offres d'emploi"),
cl.Action(name="cancel", value="Saisie des codes Rome", label="â Non, je veux saisir ma liste de codes Rome, sĂ©parĂ©s par des virgules"),
], timeout=3600
).send()
if actionRome and actionRome.get("name") == "continue":
await cl.Message(
content="Connexion à France Travail, et récupération des offres d'emploi",
).send()
cl.user_session.set("codeRomeArray", codeRome_list)
else:
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos codes Rome dans le prompt? â ïž Attention, indiquez seulement des codes Rome sĂ©parĂ©s par des virgules", timeout=3600).send()
if actionsaisierome:
await cl.Message(
content=f"Votre saisie est : {actionsaisierome['output']}",
).send()
stringCodeRome = actionsaisierome['output'].replace(' ','')
stopWords = [';','.',':','!','|']
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)]
teststringCodeRome = bool(teststringCodeRome)
if teststringCodeRome == False:
arrayCodeRome = stringCodeRome.split(',')
else:
arrayCodeRome = codeRome_list
await cl.Message(author="Datapcc : đđđ",content="Votre saisie est erronĂ©e. Nous continuons l'action avec les codes Rome sĂ©lectionnĂ©s automatiquement pour vous : " + codeRomeString).send()
cl.user_session.set("codeRomeArray", arrayCodeRome)
@cl.step(type="tool")
async def creation_liste_skills_Esco(competence, categorie):
docsearch = await connexion_vector_database()
retrieve_comp = docsearch.similarity_search(competence, k=40, filter={"categorie": {"$eq": categorie}})
retrieve = pd.DataFrame(retrieve_comp)
competence = []
description_competence = []
metier = []
description_metier = []
for i in range(0,len(retrieve_comp)):
competence.append(retrieve_comp[i].metadata['compétence'])
description_competence.append(retrieve_comp[i].metadata['description_compétence'])
metier.append(retrieve_comp[i].metadata['métier'])
description_metier.append(retrieve_comp[i].metadata['description_métier'])
results_df = pd.DataFrame({'compétence': competence,'description_compétence': description_competence, 'métier': metier, 'description_métier': description_metier})
arrayresults = results_df.values.tolist()
displayresults = '| Compétence | Description Compétence | Métier | Description Métier |\n| -------- | ------- | ------- | ------- |'
for j in range(0, len(arrayresults)):
displayresults += '\n| ' + arrayresults[j][0] + ' | ' + arrayresults[j][1] + ' | ' + arrayresults[j][2] + ' | ' + arrayresults[j][3] + ' |'
await cl.Message(author="Datapcc : đđđ",content="Voici le rĂ©sultat de la recherche sĂ©mantique sur la classification ESCO :\n" + displayresults).send()
results_df = results_df.drop_duplicates(subset=["métier"])
results_df = results_df.head(10)
codeRomeString = results_df["métier"].to_string(index = False)
codeRome_list = results_df["métier"].tolist()
actionRome = await cl.AskActionMessage(
content="Etes-vous d'accord avec la sélection des 10 métiers automatiques pour compléter la liste des compétences avec celles attendues sur le marché du travail? :\n" + codeRomeString,
actions=[
cl.Action(name="continue", value="Offres d'emploi en temps rĂ©el", label="â
Oui, je veux continuer vers l'extraction en temps réel des offres d'emploi"),
cl.Action(name="cancel", value="Saisie des codes Rome", label="â Non, je veux saisir ma liste de mĂ©tiers, sĂ©parĂ©s par des points-virgules"),
], timeout=3600
).send()
if actionRome and actionRome.get("name") == "continue":
await cl.Message(
content="Connexion à France Travail, et récupération des offres d'emploi",
).send()
cl.user_session.set("codeRomeArray", codeRome_list)
else:
actionsaisierome = await cl.AskUserMessage(content="Saisissez vos mĂ©tiers dans le prompt? â ïž Attention, indiquez seulement des mĂ©tiers sĂ©parĂ©s par des points-virgules", timeout=3600).send()
if actionsaisierome:
await cl.Message(
content=f"Votre saisie est : {actionsaisierome['output']}",
).send()
stringCodeRome = actionsaisierome['output']
stopWords = [';']
teststringCodeRome = [ele for ele in stopWords if(ele in stringCodeRome)]
teststringCodeRome = bool(teststringCodeRome)
if teststringCodeRome == True:
arrayCodeRome = stringCodeRome.split(';')
else:
arrayCodeRome = codeRome_list
await cl.Message(author="Datapcc : đđđ",content="Votre saisie est erronĂ©e. Nous continuons l'action avec les mĂ©tiers sĂ©lectionnĂ©s automatiquement pour vous : " + codeRomeString).send()
cl.user_session.set("codeRomeArray", arrayCodeRome)
@cl.step(type="tool")
async def connexion_registre_national_entreprises():
url = "https://registre-national-entreprises.inpi.fr/api/sso/login"
headers = {"Content-Type": "application/json"}
data = {"username": os.environ['RNE_CLIENT_ID'], "password": os.environ['RNE_CLIENT_SECRET']}
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
return response.json()["token"]
else:
raise Exception(f"Ăchec de l'authentification. Code d'erreur : {response.status_code}")
@cl.step(type="tool")
async def connexion_France_Travail():
client = Api(client_id=os.environ['POLE_EMPLOI_CLIENT_ID'],
client_secret=os.environ['POLE_EMPLOI_CLIENT_SECRET'])
return client
@cl.step(type="tool")
async def connexion_vector_database():
os.environ['PINECONE_API_KEY'] = os.environ['PINECONE_API_KEY']
os.environ['PINECONE_INDEX_NAME'] = os.environ['PINECONE_INDEX_NAME']
embeddings = HuggingFaceEmbeddings()
docsearch = PineconeVectorStore.from_existing_index(os.environ['PINECONE_INDEX_NAME'], embeddings)
return docsearch
@cl.step(type="llm")
async def IA():
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
llm = HuggingFaceEndpoint(
repo_id=repo_id, max_new_tokens=5000, temperature=1.0, task="text2text-generation", streaming=True
)
return llm
@cl.on_settings_update
async def setup_agent(settings):
if not settings['competence'] and not settings['competenceInput']:
await cl.Message(
author="Datapcc : đđđ",content=f"â ïž Pas de contexte : {settings['competence']}\nâ Vous ne pouvez pas Ă©laborer de note sectorielle!"
).send()
elif settings['competence'] and not settings['competenceInput']:
await cl.Message(
author="Datapcc : đđđ",content=f"đ Changement de contexte : {settings['competence']}"
).send()
competenceList = settings['competence']
cl.user_session.set("competenceFree", competenceList)
else:
await cl.Message(
author="Datapcc : đđđ",content=f"đ Changement de contexte : {settings['competenceInput']}"
).send()
competenceList = settings['competenceInput']
cl.user_session.set("competenceFree", competenceList)
if not cl.user_session.get("saveMemory"):
cl.user_session.set("saveMemory", "")
chat_profile = cl.user_session.get("chat_profile")
chatProfile = chat_profile.split(' - ')
await construction_NCS(competenceList, chatProfile[1])
contextChat = cl.user_session.get("contextChatBot")
if not contextChat:
contextChat = "Il n'y a pas de contexte."
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ['HUGGINGFACEHUB_API_TOKEN']
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
model = HuggingFaceEndpoint(
repo_id=repo_id,
max_new_tokens=3600,
temperature=0.5,
streaming=True
)
memory = cl.user_session.get("memory")
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
f"Contexte : Vous ĂȘtes un spĂ©cialiste du marchĂ© de l'emploi en fonction du niveau de qualification, des compĂ©tences professionnelles, des compĂ©tences transversales, du salaire et de l'expĂ©rience. Vous ĂȘtes douĂ© pour faire des analyses du systĂšme travail sur les mĂ©tiers les plus demandĂ©s grĂące Ă votre aptitude Ă synthĂ©tiser les informations en fonction des critĂšres dĂ©finis ci-avant. En fonction des informations suivantes et du contexte suivant seulement et strictement. Contexte : {contextChat[0:28875]}. RĂ©ponds Ă la question suivante de la maniĂšre la plus pertinente, la plus exhaustive et la plus dĂ©taillĂ©e possible, avec au minimum 3000 tokens jusqu'Ă 3600 tokens, seulement et strictement dans le contexte et les informations fournies. Essayez donc de comprendre en profondeur le contexte et rĂ©pondez uniquement en vous basant sur les informations fournies.",
),
MessagesPlaceholder(variable_name="history"),
("human", "{question}, dans le contexte fourni."),
]
)
runnable = (
RunnablePassthrough.assign(
history=RunnableLambda(memory.load_memory_variables) | itemgetter("history")
)
| prompt
| model
)
cl.user_session.set("runnable", runnable)
@cl.on_message
async def main(message: cl.Message):
async with cl.Step(root=True, name="RĂ©ponse de Mistral", type="llm") as parent_step:
parent_step.input = message.content
chat_profile = cl.user_session.get("chat_profile")
chatProfile = chat_profile.split(' - ')
memory = cl.user_session.get("memory")
runnable = cl.user_session.get("runnable") # type: Runnable
msg = cl.Message(author="Datapcc : đđđ",content="")
text_elements = []
answer = []
async for chunk in runnable.astream({"question": message.content},
config=RunnableConfig(callbacks=[cl.AsyncLangchainCallbackHandler(stream_final_answer=True)])):
await parent_step.stream_token(chunk)
await msg.stream_token(chunk)
QA_context_name = f"Question-réponse sur le contexte"
text_elements.append(
cl.Text(content="Question : " + message.content + "\n\nRĂ©ponse :\n" + msg.content, name=QA_context_name)
)
actions = [
cl.Action(name="download", value="Question : " + message.content + "\n\nRĂ©ponse : " + msg.content, description="download_QA_emplois")
]
await cl.Message(author="Datapcc : đđđ",content="Download", actions=actions).send()
await cl.sleep(2)
saves = [
cl.Action(name="saveToMemory", value="Question : " + message.content + "\n\nRĂ©ponse : " + msg.content, description="Mettre en mĂ©moire la rĂ©ponse Ă votre requĂȘte")
]
await cl.Message(author="Datapcc : đđđ",content="Mettre en mĂ©moire la rĂ©ponse Ă votre requĂȘte", actions=saves).send()
await cl.sleep(2)
memories = [
cl.Action(name="download", value=cl.user_session.get('saveMemory'), description="download_referentiel")
]
await cl.Message(author="Datapcc : đđđ",content="TĂ©lĂ©charger la mise en mĂ©moire de vos fiches", actions=memories).send()
await cl.sleep(1.5)
await cl.Message(author="Datapcc : đđđ",content="Contexte : " + QA_context_name, elements=text_elements).send()
memory.chat_memory.add_user_message(message.content)
memory.chat_memory.add_ai_message(msg.content)