File size: 4,688 Bytes
fd308b2 bd17978 fd308b2 f205119 ea0b14f 54e5254 f51926a 54e5254 78be166 95ae9d4 270c181 95ae9d4 78be166 95ae9d4 fd308b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
from huggingface_hub import InferenceClient
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
import chromadb
from chromadb.config import Settings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
import requests
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
#client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
#notebook_login()
import os
from transformers.hf_api import HfApi
username = 'islasher'
api_token_secret_name = "HF_API_TOKEN" # Name of your secret on Hugging Face
api_secret_url = f"https://huggingface.co/api/users/{username}/secrets/{api_token_secret_name}"
# Retrieve the API token from the Hugging Face API
response = requests.get(api_secret_url)
response.raise_for_status() # Raise an exception for any HTTP error
# Extract the API token from the response
api_token = response.json()["value"]
# Check if the API token is set
if api_token is None:
raise ValueError(f"Failed to retrieve API token from the Hugging Face API for {api_token_secret_name}")
# Authenticate with Hugging Face using the API token
api = HfApi()
api.login(token=api_token)
#token_access = HF_API_TOKEN
#headers = {"Authorization": f"Bearer {token_access}"}
model_id = 'mistralai/Mistral-7B-Instruct-v0.1'
model_config = transformers.AutoConfig.from_pretrained(
model_id,
max_new_tokens=200
)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
config=model_config,
quantization_config=bnb_config,
device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
query_pipeline = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float16,
device_map="auto", max_new_tokens=200)
def respond(message, history, system_message, max_tokens, temperature, top_p):
URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf"
response = requests.get(URL)
open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content)
loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
# model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(model_name=model_name)#, model_kwargs=model_kwargs)
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")
pipeline=query_pipeline
query = message
docs = vectordb.similarity_search_with_score(query)
context = []
for doc, score in docs:
if score < 7:
doc_details = doc.to_json()['kwargs']
context.append(doc_details['page_content'])
if len(context) != 0:
messages = [
{"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}]
prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50,
top_p=top_p)
answer = outputs[0]["generated_text"]
return answer[answer.rfind("[/INST]") + 8:], docs
else:
return "No tengo información para responder a esta pregunta", docs
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |