islasher commited on
Commit
95ae9d4
1 Parent(s): 029fedb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -31
app.py CHANGED
@@ -6,38 +6,45 @@ For more information on `huggingface_hub` Inference API support, please check th
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
 
42
  """
43
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
10
+ import requests
11
+ from pdf.loader import PyPDFLoader
12
+
13
+ URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf"
14
+ response = requests.get(URL)
15
+ open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content)
16
+ loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf")
17
+ documents = loader.load()
18
+
19
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
20
+ all_splits = text_splitter.split_documents(documents)
21
+
22
+ model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
23
+ model_kwargs = {"device": "cuda"}
24
+ embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
25
+
26
+ vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")
27
+
28
+ query = message
29
+ docs = vectordb.similarity_search_with_score(query)
30
+ context = []
31
+ for doc, score in docs:
32
+ if score < 7:
33
+ doc_details = doc.to_json()['kwargs']
34
+ context.append(doc_details['page_content'])
35
+ if len(context) != 0:
36
+ messages = [
37
+ {"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}]
38
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
39
+ outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50,
40
+ top_p=top_p)
41
+ answer = outputs[0]["generated_text"]
42
+ return answer[answer.rfind("[/INST]") + 8:], docs
43
+ else:
44
+ return "No tengo información para responder a esta pregunta", docs
45
+
46
+
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  """
50
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface