lruizap commited on
Commit
7dfd89d
1 Parent(s): eb9c5ff

Upload 21 files

Browse files
Files changed (17) hide show
  1. .gitattributes +2 -0
  2. README.md +1 -10
  3. app.py +11 -12
  4. ingest2.py +38 -0
  5. pdf_folder/Circular25julio2023ReligionyAtencionEducativa.pdf +0 -0
  6. pdf_folder/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf +0 -0
  7. requirements.txt +1 -2
  8. stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/data_level0.bin +3 -0
  9. stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/header.bin +0 -0
  10. stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/length.bin +0 -0
  11. stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/link_lists.bin +0 -0
  12. stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/chroma.sqlite3 +0 -0
  13. stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/data_level0.bin +3 -0
  14. stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/header.bin +0 -0
  15. stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/length.bin +0 -0
  16. stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/link_lists.bin +0 -0
  17. stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/chroma.sqlite3 +0 -0
.gitattributes CHANGED
@@ -1,3 +1,5 @@
1
  # Auto detect text files and perform LF normalization
2
  * text=auto
 
 
3
  zephyr-7b-alpha.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
 
1
  # Auto detect text files and perform LF normalization
2
  * text=auto
3
+ stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/data_level0.bin filter=lfs diff=lfs merge=lfs -text
4
+ stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/data_level0.bin filter=lfs diff=lfs merge=lfs -text
5
  zephyr-7b-alpha.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,15 +1,6 @@
1
- ---
2
- title: ConserGPT
3
- sdk: gradio
4
- emoji: 🗿
5
- colorFrom: red
6
- colorTo: yellow
7
- pinned: true
8
- ---
9
-
10
  # Zephyr-7B-beta-RAG-Demo
11
  Zephyr 7B beta RAG Demo inside a Gradio app powered by BGE Embeddings, ChromaDB, and Zephyr 7B Alpha.
12
 
13
  Download zephyr-7b-alpha.Q5_K_S.gguf in this link : https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/tree/main
14
 
15
- https://view.genial.ly/65805d10850fa600146ed98b/presentation-consergpt
 
 
 
 
 
 
 
 
 
 
1
  # Zephyr-7B-beta-RAG-Demo
2
  Zephyr 7B beta RAG Demo inside a Gradio app powered by BGE Embeddings, ChromaDB, and Zephyr 7B Alpha.
3
 
4
  Download zephyr-7b-alpha.Q5_K_S.gguf in this link : https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/tree/main
5
 
6
+ https://view.genial.ly/65805d10850fa600146ed98b/presentation-consergpt
app.py CHANGED
@@ -26,7 +26,7 @@ config = {
26
 
27
  llm = CTransformers(
28
  model=local_llm,
29
- model_type="mistral",
30
  lib="avx2", # for CPU use
31
  **config
32
  )
@@ -34,7 +34,6 @@ llm = CTransformers(
34
  print("LLM Initialized...")
35
 
36
 
37
-
38
  prompt_template = """Utiliza la siguiente información para responder a la pregunta del usuario.
39
  Si no sabes la respuesta, di simplemente que no la sabes, no intentes inventarte una respuesta.
40
 
@@ -55,23 +54,23 @@ embeddings = HuggingFaceBgeEmbeddings(
55
  encode_kwargs=encode_kwargs
56
  )
57
 
58
- loader = PyPDFLoader(
59
- "./Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf")
60
- documents = loader.load()
61
- text_splitter = RecursiveCharacterTextSplitter(
62
- chunk_size=1000, chunk_overlap=100)
63
- texts = text_splitter.split_documents(documents)
64
 
65
- vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
66
- "hnsw:space": "cosine"}, persist_directory="stores/ConserGPT")
67
 
68
- print("Vector Store Created.......")
69
 
70
 
71
  prompt = PromptTemplate(template=prompt_template,
72
  input_variables=['context', 'question'])
73
  load_vector_store = Chroma(
74
- persist_directory="stores/ConserGPT", embedding_function=embeddings)
75
  retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
76
 
77
  print("######################################################################")
 
26
 
27
  llm = CTransformers(
28
  model=local_llm,
29
+ model_type="zephyr",
30
  lib="avx2", # for CPU use
31
  **config
32
  )
 
34
  print("LLM Initialized...")
35
 
36
 
 
37
  prompt_template = """Utiliza la siguiente información para responder a la pregunta del usuario.
38
  Si no sabes la respuesta, di simplemente que no la sabes, no intentes inventarte una respuesta.
39
 
 
54
  encode_kwargs=encode_kwargs
55
  )
56
 
57
+ # loader = PyPDFLoader(
58
+ # "./Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf")
59
+ # documents = loader.load()
60
+ # text_splitter = RecursiveCharacterTextSplitter(
61
+ # chunk_size=1000, chunk_overlap=100)
62
+ # texts = text_splitter.split_documents(documents)
63
 
64
+ # vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
65
+ # "hnsw:space": "cosine"}, persist_directory="stores/ConserGPT")
66
 
67
+ # print("Vector Store Created.......")
68
 
69
 
70
  prompt = PromptTemplate(template=prompt_template,
71
  input_variables=['context', 'question'])
72
  load_vector_store = Chroma(
73
+ persist_directory="stores/ConserGPT/", embedding_function=embeddings)
74
  retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
75
 
76
  print("######################################################################")
ingest2.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.vectorstores import Chroma
4
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
5
+ from langchain.document_loaders import PyPDFLoader
6
+
7
+ model_name = "BAAI/bge-large-en"
8
+ model_kwargs = {'device': 'cpu'}
9
+ encode_kwargs = {'normalize_embeddings': False}
10
+ embeddings = HuggingFaceBgeEmbeddings(
11
+ model_name=model_name,
12
+ model_kwargs=model_kwargs,
13
+ encode_kwargs=encode_kwargs
14
+ )
15
+
16
+ pdf_folder = "./pdf_folder" # Ruta a la carpeta que contiene los archivos PDF
17
+ output_folder = "stores/ConserGPT" # Carpeta de salida para los vector stores
18
+
19
+ # Crear el directorio de salida si no existe
20
+ os.makedirs(output_folder, exist_ok=True)
21
+
22
+ # Iterar a través de los archivos PDF en la carpeta
23
+ for pdf_file in os.listdir(pdf_folder):
24
+ if pdf_file.endswith(".pdf"):
25
+ pdf_path = os.path.join(pdf_folder, pdf_file)
26
+
27
+ loader = PyPDFLoader(pdf_path)
28
+ documents = loader.load()
29
+ text_splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=1000, chunk_overlap=100)
31
+ texts = text_splitter.split_documents(documents)
32
+
33
+ vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
34
+ "hnsw:space": "cosine"}, persist_directory=os.path.join(output_folder, f"{pdf_file}_store"))
35
+
36
+ print(f"Vector Store created for {pdf_file}")
37
+
38
+ print("All Vector Stores Created.......")
pdf_folder/Circular25julio2023ReligionyAtencionEducativa.pdf ADDED
Binary file (230 kB). View file
 
pdf_folder/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf ADDED
Binary file (143 kB). View file
 
requirements.txt CHANGED
@@ -2,8 +2,7 @@ chainlit
2
  ctransformers
3
  torch
4
  sentence_transformers
5
- chromadb
6
- langchain-community
7
  langchain
8
  pypdf
9
  PyPDF2
 
2
  ctransformers
3
  torch
4
  sentence_transformers
5
+ chromadb
 
6
  langchain
7
  pypdf
8
  PyPDF2
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95042e844cfb77b20e578cf65635282a99d7c4dd20e589ac062f38bc389f8e58
3
+ size 4236000
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/header.bin ADDED
Binary file (100 Bytes). View file
 
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/length.bin ADDED
Binary file (4 kB). View file
 
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/8bfd0a18-a06b-40ba-91ae-3f3ed1b0d1a3/link_lists.bin ADDED
File without changes
stores/ConserGPT/Circular25julio2023ReligionyAtencionEducativa.pdf_store/chroma.sqlite3 ADDED
Binary file (651 kB). View file
 
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95042e844cfb77b20e578cf65635282a99d7c4dd20e589ac062f38bc389f8e58
3
+ size 4236000
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/header.bin ADDED
Binary file (100 Bytes). View file
 
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/length.bin ADDED
Binary file (4 kB). View file
 
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/61c07000-ae3f-47ca-94a7-80c0ed5dfd53/link_lists.bin ADDED
File without changes
stores/ConserGPT/Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf_store/chroma.sqlite3 ADDED
Binary file (496 kB). View file