File size: 4,688 Bytes
fd308b2
 
bd17978
 
 
 
 
 
 
 
 
 
 
 
 
fd308b2
 
 
 
f205119
ea0b14f
54e5254
f51926a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54e5254
 
 
78be166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95ae9d4
 
 
 
 
 
 
 
 
 
 
 
270c181
 
95ae9d4
 
 
78be166
95ae9d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd308b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
from huggingface_hub import InferenceClient
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
import chromadb
from chromadb.config import Settings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
import requests

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
#notebook_login()


import os
from transformers.hf_api import HfApi


username = 'islasher'

api_token_secret_name = "HF_API_TOKEN"  # Name of your secret on Hugging Face
api_secret_url = f"https://huggingface.co/api/users/{username}/secrets/{api_token_secret_name}"

# Retrieve the API token from the Hugging Face API
response = requests.get(api_secret_url)
response.raise_for_status()  # Raise an exception for any HTTP error

# Extract the API token from the response
api_token = response.json()["value"]

# Check if the API token is set
if api_token is None:
    raise ValueError(f"Failed to retrieve API token from the Hugging Face API for {api_token_secret_name}")

# Authenticate with Hugging Face using the API token
api = HfApi()
api.login(token=api_token)



#token_access = HF_API_TOKEN
#headers = {"Authorization": f"Bearer {token_access}"}



model_id = 'mistralai/Mistral-7B-Instruct-v0.1'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    max_new_tokens=200
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto", max_new_tokens=200)


def respond(message, history, system_message, max_tokens, temperature, top_p):
    
    URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf"
    response = requests.get(URL)
    open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content)
    loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf")
    documents = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    all_splits = text_splitter.split_documents(documents)
    
    model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
    # model_kwargs = {"device": "cuda"}
    embeddings = HuggingFaceEmbeddings(model_name=model_name)#, model_kwargs=model_kwargs)
    
    vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

    pipeline=query_pipeline
    query = message
    docs = vectordb.similarity_search_with_score(query)
    context = []
    for doc, score in docs:
        if score < 7:
            doc_details = doc.to_json()['kwargs']
            context.append(doc_details['page_content'])
    if len(context) != 0:
        messages = [
            {"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}]
        prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50,
                           top_p=top_p)
        answer = outputs[0]["generated_text"]
        return answer[answer.rfind("[/INST]") + 8:], docs
    else:
        return "No tengo información para responder a esta pregunta", docs




"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()