File size: 4,512 Bytes
fd308b2
 
bd17978
 
 
 
 
 
 
 
 
 
 
 
 
fd308b2
 
 
 
f205119
ea0b14f
54e5254
f51926a
 
3a180c0
 
f51926a
3a180c0
f51926a
 
 
e573dd2
 
 
38746ce
f51926a
e573dd2
38746ce
 
f51926a
e573dd2
38746ce
f51926a
 
38746ce
 
f51926a
 
38746ce
f51926a
 
 
e573dd2
f51926a
 
54e5254
 
 
78be166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95ae9d4
 
 
 
 
 
 
 
 
 
 
 
270c181
 
95ae9d4
 
 
78be166
95ae9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd308b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
from huggingface_hub import InferenceClient
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
import chromadb
from chromadb.config import Settings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
import requests

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
#notebook_login()


import os
#from huggingface_hub import HfApi
from huggingface_hub import login

#notebook_login()

username = 'islasher'



# Authenticate with Hugging Face
login()

# Fetch the API token secret
#secret_name = "HF_API_TOKEN"
#secret_value = api.secrets.get(username, secret_name)

# Retrieve the API token
#api_token = secret_value["value"]

# Check if the API token is set
#if api_token is None:
 #   raise ValueError(f"Failed to retrieve API token from Hugging Face secret {secret_name}")

# Authenticate with Hugging Face using the API token
#login(token=api_token)




#token_access = HF_API_TOKEN
#headers = {"Authorization": f"Bearer {token_access}"}



model_id = 'mistralai/Mistral-7B-Instruct-v0.1'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    max_new_tokens=200
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto", max_new_tokens=200)


def respond(message, history, system_message, max_tokens, temperature, top_p):
    
    URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf"
    response = requests.get(URL)
    open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content)
    loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf")
    documents = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    all_splits = text_splitter.split_documents(documents)
    
    model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
    # model_kwargs = {"device": "cuda"}
    embeddings = HuggingFaceEmbeddings(model_name=model_name)#, model_kwargs=model_kwargs)
    
    vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

    pipeline=query_pipeline
    query = message
    docs = vectordb.similarity_search_with_score(query)
    context = []
    for doc, score in docs:
        if score < 7:
            doc_details = doc.to_json()['kwargs']
            context.append(doc_details['page_content'])
    if len(context) != 0:
        messages = [
            {"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}]
        prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50,
                           top_p=top_p)
        answer = outputs[0]["generated_text"]
        return answer[answer.rfind("[/INST]") + 8:], docs
    else:
        return "No tengo información para responder a esta pregunta", docs




"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()