|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import pandas as pd |
|
import transformers |
|
import torch |
|
from sentence_transformers import SentenceTransformer, util |
|
""" |
|
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference |
|
""" |
|
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
|
|
|
|
|
sbert_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') |
|
|
|
|
|
def initiate_pipeline(): |
|
model = "meta-llama/Meta-Llama-3-8B-Instruct" |
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
return transformers.pipeline( |
|
"text-generation", |
|
model=model, |
|
model_kwargs={"torch_dtype": torch.bfloat16}, |
|
device=device, |
|
) |
|
|
|
|
|
llama_model = initiate_pipeline() |
|
|
|
|
|
qa_data = pd.read_csv("rag_juri_cv.csv") |
|
|
|
|
|
def retrieve_top_k(query, k=5): |
|
|
|
questions = qa_data['QUESTION'].tolist() |
|
|
|
|
|
question_embeddings = sbert_model.encode(questions, convert_to_tensor=True) |
|
query_embedding = sbert_model.encode(query, convert_to_tensor=True) |
|
|
|
|
|
cosine_scores = util.pytorch_cos_sim(query_embedding, question_embeddings).flatten() |
|
|
|
|
|
top_k_indices = torch.topk(cosine_scores, k=k).indices.cpu() |
|
|
|
|
|
top_k_qa = qa_data.iloc[top_k_indices] |
|
|
|
return top_k_qa |
|
|
|
def chatbot(query): |
|
|
|
top_k_qa = retrieve_top_k(query) |
|
|
|
|
|
prefix = """ |
|
<|begin_of_text|><|start_header_id|>user<|end_header_id|>You are a chatbot specialized in answering questions about Juri Grosjean's CV. |
|
Please only use the information provided in the context to answer the question. |
|
Here is the question to answer: |
|
""" + query + "\n\n" |
|
|
|
context = "This is the context information to answer the question:\n" |
|
for index, row in top_k_qa.iterrows(): |
|
context += f"Information {index}: {row['ANSWER']}\n\n" |
|
|
|
suffix = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>" |
|
|
|
prompt = prefix + context + suffix |
|
|
|
|
|
outputs = llama_model( |
|
prompt, |
|
max_new_tokens=500, |
|
do_sample=True, |
|
temperature=0.6, |
|
top_p=0.9, |
|
) |
|
|
|
|
|
output = outputs[0]["generated_text"] |
|
return output.split("assistant")[-1].strip() |
|
|
|
|
|
|
|
demo = gr.Interface( |
|
fn=chatbot, |
|
inputs=gr.Textbox(lines=5, placeholder="Ask a question about Juri Grosjean's CV"), |
|
outputs="text" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |