File size: 4,512 Bytes
fd308b2 bd17978 fd308b2 f205119 ea0b14f 54e5254 f51926a 3a180c0 f51926a 3a180c0 f51926a e573dd2 38746ce f51926a e573dd2 38746ce f51926a e573dd2 38746ce f51926a 38746ce f51926a 38746ce f51926a e573dd2 f51926a 54e5254 78be166 95ae9d4 270c181 95ae9d4 78be166 95ae9d4 fd308b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
from huggingface_hub import InferenceClient
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
import chromadb
from chromadb.config import Settings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
import requests
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
#notebook_login()
import os
#from huggingface_hub import HfApi
from huggingface_hub import login
#notebook_login()
username = 'islasher'
# Authenticate with Hugging Face
login()
# Fetch the API token secret
#secret_name = "HF_API_TOKEN"
#secret_value = api.secrets.get(username, secret_name)
# Retrieve the API token
#api_token = secret_value["value"]
# Check if the API token is set
#if api_token is None:
# raise ValueError(f"Failed to retrieve API token from Hugging Face secret {secret_name}")
# Authenticate with Hugging Face using the API token
#login(token=api_token)
#token_access = HF_API_TOKEN
#headers = {"Authorization": f"Bearer {token_access}"}
model_id = 'mistralai/Mistral-7B-Instruct-v0.1'
model_config = transformers.AutoConfig.from_pretrained(
model_id,
max_new_tokens=200
)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
config=model_config,
quantization_config=bnb_config,
device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
query_pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float16,
device_map="auto", max_new_tokens=200)
def respond(message, history, system_message, max_tokens, temperature, top_p):
URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf"
response = requests.get(URL)
open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content)
loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
# model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name)#, model_kwargs=model_kwargs)
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")
pipeline=query_pipeline
query = message
docs = vectordb.similarity_search_with_score(query)
context = []
for doc, score in docs:
if score < 7:
doc_details = doc.to_json()['kwargs']
context.append(doc_details['page_content'])
if len(context) != 0:
messages = [
{"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}]
prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50,
top_p=top_p)
answer = outputs[0]["generated_text"]
return answer[answer.rfind("[/INST]") + 8:], docs
else:
return "No tengo información para responder a esta pregunta", docs
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |