Upload 21 files
Browse files- .gitattributes +2 -0
- README.md +1 -10
- app.py +11 -12
- ingest2.py +38 -0
- pdf_folder/Circular25julio2023ReligionyAtencionEducativa.pdf +0 -0
- pdf_folder/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf +0 -0
- requirements.txt +1 -2
- stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/data_level0.bin +3 -0
- stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/header.bin +0 -0
- stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/length.bin +0 -0
- stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/link_lists.bin +0 -0
- stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/chroma.sqlite3 +0 -0
- stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/data_level0.bin +3 -0
- stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/header.bin +0 -0
- stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/length.bin +0 -0
- stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/link_lists.bin +0 -0
- stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/chroma.sqlite3 +0 -0
.gitattributes
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
# Auto detect text files and perform LF normalization
|
2 |
* text=auto
|
|
|
|
|
3 |
zephyr-7b-alpha.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
# Auto detect text files and perform LF normalization
|
2 |
* text=auto
|
3 |
+
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/data_level0.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/data_level0.bin filter=lfs diff=lfs merge=lfs -text
|
5 |
zephyr-7b-alpha.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,15 +1,6 @@
|
|
1 |
-
---
|
2 |
-
title: ConserGPT
|
3 |
-
sdk: gradio
|
4 |
-
emoji: 🗿
|
5 |
-
colorFrom: red
|
6 |
-
colorTo: yellow
|
7 |
-
pinned: true
|
8 |
-
---
|
9 |
-
|
10 |
# Zephyr-7B-beta-RAG-Demo
|
11 |
Zephyr 7B beta RAG Demo inside a Gradio app powered by BGE Embeddings, ChromaDB, and Zephyr 7B Alpha.
|
12 |
|
13 |
Download zephyr-7b-alpha.Q5_K_S.gguf in this link : https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/tree/main
|
14 |
|
15 |
-
https://view.genial.ly/65805d10850fa600146ed98b/presentation-consergpt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Zephyr-7B-beta-RAG-Demo
|
2 |
Zephyr 7B beta RAG Demo inside a Gradio app powered by BGE Embeddings, ChromaDB, and Zephyr 7B Alpha.
|
3 |
|
4 |
Download zephyr-7b-alpha.Q5_K_S.gguf in this link : https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/tree/main
|
5 |
|
6 |
+
https://view.genial.ly/65805d10850fa600146ed98b/presentation-consergpt
|
app.py
CHANGED
@@ -26,7 +26,7 @@ config = {
|
|
26 |
|
27 |
llm = CTransformers(
|
28 |
model=local_llm,
|
29 |
-
model_type="
|
30 |
lib="avx2", # for CPU use
|
31 |
**config
|
32 |
)
|
@@ -34,7 +34,6 @@ llm = CTransformers(
|
|
34 |
print("LLM Initialized...")
|
35 |
|
36 |
|
37 |
-
|
38 |
prompt_template = """Utiliza la siguiente información para responder a la pregunta del usuario.
|
39 |
Si no sabes la respuesta, di simplemente que no la sabes, no intentes inventarte una respuesta.
|
40 |
|
@@ -55,23 +54,23 @@ embeddings = HuggingFaceBgeEmbeddings(
|
|
55 |
encode_kwargs=encode_kwargs
|
56 |
)
|
57 |
|
58 |
-
loader = PyPDFLoader(
|
59 |
-
|
60 |
-
documents = loader.load()
|
61 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
62 |
-
|
63 |
-
texts = text_splitter.split_documents(documents)
|
64 |
|
65 |
-
vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
|
66 |
-
|
67 |
|
68 |
-
print("Vector Store Created.......")
|
69 |
|
70 |
|
71 |
prompt = PromptTemplate(template=prompt_template,
|
72 |
input_variables=['context', 'question'])
|
73 |
load_vector_store = Chroma(
|
74 |
-
persist_directory="stores/ConserGPT", embedding_function=embeddings)
|
75 |
retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
|
76 |
|
77 |
print("######################################################################")
|
|
|
26 |
|
27 |
llm = CTransformers(
|
28 |
model=local_llm,
|
29 |
+
model_type="zephyr",
|
30 |
lib="avx2", # for CPU use
|
31 |
**config
|
32 |
)
|
|
|
34 |
print("LLM Initialized...")
|
35 |
|
36 |
|
|
|
37 |
prompt_template = """Utiliza la siguiente información para responder a la pregunta del usuario.
|
38 |
Si no sabes la respuesta, di simplemente que no la sabes, no intentes inventarte una respuesta.
|
39 |
|
|
|
54 |
encode_kwargs=encode_kwargs
|
55 |
)
|
56 |
|
57 |
+
# loader = PyPDFLoader(
|
58 |
+
# "./Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf")
|
59 |
+
# documents = loader.load()
|
60 |
+
# text_splitter = RecursiveCharacterTextSplitter(
|
61 |
+
# chunk_size=1000, chunk_overlap=100)
|
62 |
+
# texts = text_splitter.split_documents(documents)
|
63 |
|
64 |
+
# vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
|
65 |
+
# "hnsw:space": "cosine"}, persist_directory="stores/ConserGPT")
|
66 |
|
67 |
+
# print("Vector Store Created.......")
|
68 |
|
69 |
|
70 |
prompt = PromptTemplate(template=prompt_template,
|
71 |
input_variables=['context', 'question'])
|
72 |
load_vector_store = Chroma(
|
73 |
+
persist_directory="stores/ConserGPT/", embedding_function=embeddings)
|
74 |
retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
|
75 |
|
76 |
print("######################################################################")
|
ingest2.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain.vectorstores import Chroma
|
4 |
+
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
5 |
+
from langchain.document_loaders import PyPDFLoader
|
6 |
+
|
7 |
+
model_name = "BAAI/bge-large-en"
|
8 |
+
model_kwargs = {'device': 'cpu'}
|
9 |
+
encode_kwargs = {'normalize_embeddings': False}
|
10 |
+
embeddings = HuggingFaceBgeEmbeddings(
|
11 |
+
model_name=model_name,
|
12 |
+
model_kwargs=model_kwargs,
|
13 |
+
encode_kwargs=encode_kwargs
|
14 |
+
)
|
15 |
+
|
16 |
+
pdf_folder = "./pdf_folder" # Ruta a la carpeta que contiene los archivos PDF
|
17 |
+
output_folder = "stores/ConserGPT" # Carpeta de salida para los vector stores
|
18 |
+
|
19 |
+
# Crear el directorio de salida si no existe
|
20 |
+
os.makedirs(output_folder, exist_ok=True)
|
21 |
+
|
22 |
+
# Iterar a través de los archivos PDF en la carpeta
|
23 |
+
for pdf_file in os.listdir(pdf_folder):
|
24 |
+
if pdf_file.endswith(".pdf"):
|
25 |
+
pdf_path = os.path.join(pdf_folder, pdf_file)
|
26 |
+
|
27 |
+
loader = PyPDFLoader(pdf_path)
|
28 |
+
documents = loader.load()
|
29 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
30 |
+
chunk_size=1000, chunk_overlap=100)
|
31 |
+
texts = text_splitter.split_documents(documents)
|
32 |
+
|
33 |
+
vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
|
34 |
+
"hnsw:space": "cosine"}, persist_directory=os.path.join(output_folder, f"{pdf_file}_store"))
|
35 |
+
|
36 |
+
print(f"Vector Store created for {pdf_file}")
|
37 |
+
|
38 |
+
print("All Vector Stores Created.......")
|
pdf_folder/Circular25julio2023ReligionyAtencionEducativa.pdf
ADDED
Binary file (230 kB). View file
|
|
pdf_folder/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf
ADDED
Binary file (143 kB). View file
|
|
requirements.txt
CHANGED
@@ -2,8 +2,7 @@ chainlit
|
|
2 |
ctransformers
|
3 |
torch
|
4 |
sentence_transformers
|
5 |
-
chromadb
|
6 |
-
langchain-community
|
7 |
langchain
|
8 |
pypdf
|
9 |
PyPDF2
|
|
|
2 |
ctransformers
|
3 |
torch
|
4 |
sentence_transformers
|
5 |
+
chromadb
|
|
|
6 |
langchain
|
7 |
pypdf
|
8 |
PyPDF2
|
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95042e844cfb77b20e578cf65635282a99d7c4dd20e589ac062f38bc389f8e58
|
3 |
+
size 4236000
|
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/header.bin
ADDED
Binary file (100 Bytes). View file
|
|
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/length.bin
ADDED
Binary file (4 kB). View file
|
|
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/link_lists.bin
ADDED
File without changes
|
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/chroma.sqlite3
ADDED
Binary file (651 kB). View file
|
|
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95042e844cfb77b20e578cf65635282a99d7c4dd20e589ac062f38bc389f8e58
|
3 |
+
size 4236000
|
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/header.bin
ADDED
Binary file (100 Bytes). View file
|
|
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/length.bin
ADDED
Binary file (4 kB). View file
|
|
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/link_lists.bin
ADDED
File without changes
|
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/chroma.sqlite3
ADDED
Binary file (496 kB). View file
|
|