File size: 3,829 Bytes
f762e1b
cf47d83
f762e1b
2c1b805
 
69e3a41
 
3472573
 
 
 
 
85ec4d4
5e5f699
2c1b805
f762e1b
ea3c34e
275dee5
70ed6f0
2c1b805
 
f762e1b
a3bc7ec
f762e1b
2c1b805
763be08
f762e1b
 
a3bc7ec
f762e1b
 
 
ea3c34e
f762e1b
c2b4dad
f762e1b
105c4c8
2c1b805
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d37a28d
2c1b805
 
 
 
 
763be08
2c1b805
 
 
f762e1b
2c1b805
f762e1b
2c1b805
2b5a681
d37a28d
 
2b5a681
f762e1b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
import streamlit as st
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from huggingface_hub import login

try:
    fireworks_token = st.secrets["HUGGINGFACE_FIREWORKS_APIKEY"]
    st.write("Token encontrado:", fireworks_token)
except KeyError:
    st.error("La clave 'HUGGINGFACE_FIREWORKS_APIKEY' no est谩 configurada en secrets.toml.")


# Configurar modelo Llama 3.1
model_id = "meta-llama/Llama-3.1-1B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")

# Crear pipeline con Fireworks
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=1024)
llm_pipeline = HuggingFacePipeline(pipeline=pipe)

# Interfaz de Streamlit
st.title("Cosine Similarity Calculation with Fireworks, LangChain, and Llama 3.1")

# Subir archivo CSV
uploaded_file = st.file_uploader("Sube un archivo CSV con la columna 'job_title':", type=["csv"])

if uploaded_file is not None:
    # Cargar el CSV en un DataFrame
    df = pd.read_csv(uploaded_file)
    
    if 'job_title' in df.columns:
        query = 'aspiring human resources specialist'
        job_titles = df['job_title'].tolist()

        # Definir el prompt para usar Fireworks para c谩lculo de similitud de coseno
        # Crear el prompt mejorado para Fireworks
        prompt_template = PromptTemplate(
        template=(
            "You are an AI model with access to external embeddings services. Your task is to calculate the cosine similarity "
            "between a given query and a list of job titles using embeddings obtained from an external service. "
            "Follow these steps to complete the task:\n\n"
            "1. Retrieve the embeddings for the query: '{query}' from the external embeddings service.\n"
            "2. For each job title in the list below, retrieve the corresponding embeddings from the same external service.\n"
            "3. Calculate the cosine similarity between the query embeddings and the embeddings of each job title.\n"
            "4. Return the results in the following format:\n"
            "   - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
            "   - Job Title: [Job Title], Score: [Cosine Similarity Score]\n"
            "   ...\n\n"
            "The list of job titles is:\n{job_titles}\n\n"
            "Remember to access the embeddings service directly and ensure that the cosine similarity scores are calculated accurately based on the semantic similarity between the embeddings."
        ),
    input_variables=["query", "job_titles"]
)

        # Crear el LLMChain para manejar la interacci贸n con Fireworks
        llm_chain = LLMChain(
            llm=llm_pipeline,
            prompt=prompt_template
        )

        # Ejecutar la generaci贸n con el LLM
        if st.button("Calcular Similitud de Coseno"):
            with st.spinner("Calculando similitudes con Fireworks y Llama 3.1..."):
                try:
                    result = llm_chain.run({"query": query, "job_titles": job_titles})
                    st.write("Respuesta del modelo:")
                    st.write(result)

                    # Simular la asignaci贸n de puntajes en la columna 'Score' (basado en la respuesta del modelo)
                    df['Score'] = [0.95] * len(df)  # Simulaci贸n para la demostraci贸n

                    # Mostrar el dataframe actualizado
                    st.write("DataFrame con los puntajes de similitud:")
                    st.write(df)
                except Exception as e:
                    st.error(f"Error durante la generaci贸n: {e}")
    else:
        st.error("La columna 'job_title' no se encuentra en el archivo CSV.")