lruizap commited on
Commit
56ea8b1
1 Parent(s): 531b87a

Upload ConserGPT

Browse files
.gitattributes CHANGED
@@ -1,35 +1,2 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ stores
2
+ ConserGPT
3
+ zephyr-7b-alpha.Q5_K_S.gguf
Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf ADDED
Binary file (143 kB). View file
 
README.md CHANGED
@@ -1,13 +1,6 @@
1
- ---
2
- title: ConserGPT
3
- emoji: 📈
4
- colorFrom: indigo
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 4.13.0
8
- app_file: app.py
9
- pinned: false
10
- license: other
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
+ # Zephyr-7B-beta-RAG-Demo
2
+ Zephyr 7B beta RAG Demo inside a Gradio app powered by BGE Embeddings, ChromaDB, and Zephyr 7B Alpha.
 
 
 
 
 
 
 
 
 
3
 
4
+ Download zephyr-7b-alpha.Q5_K_S.gguf in this link : https://huggingface.co/TheBloke/zephyr-7B-alpha-GGUF/tree/main
5
+
6
+ https://view.genial.ly/65805d10850fa600146ed98b/presentation-consergpt
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+
4
+ from langchain.llms import CTransformers
5
+ from langchain.prompts import PromptTemplate
6
+
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
10
+
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.document_loaders import PyPDFLoader
13
+
14
+
15
+ local_llm = "zephyr-7b-alpha.Q5_K_S.gguf"
16
+
17
+ config = {
18
+ 'max_new_tokens': 1024,
19
+ 'repetition_penalty': 1.1,
20
+ 'temperature': 0.1,
21
+ 'top_k': 50,
22
+ 'top_p': 0.9,
23
+ 'stream': True,
24
+ 'threads': int(os.cpu_count() / 2)
25
+ }
26
+
27
+ llm = CTransformers(
28
+ model=local_llm,
29
+ model_type="mistral",
30
+ lib="avx2", # for CPU use
31
+ **config
32
+ )
33
+
34
+ print("LLM Initialized...")
35
+
36
+
37
+ prompt_template = """Utiliza la siguiente información para responder a la pregunta del usuario.
38
+ Si no sabes la respuesta, di simplemente que no la sabes, no intentes inventarte una respuesta.
39
+
40
+ Contexto: {context}
41
+ Pregunta: {question}
42
+
43
+ Devuelve sólo la respuesta útil que aparece a continuación y nada más.
44
+ Responde siempre en castellano
45
+ Respuesta útil:
46
+ """
47
+
48
+ model_name = "BAAI/bge-large-en"
49
+ model_kwargs = {'device': 'cpu'}
50
+ encode_kwargs = {'normalize_embeddings': False}
51
+ embeddings = HuggingFaceBgeEmbeddings(
52
+ model_name=model_name,
53
+ model_kwargs=model_kwargs,
54
+ encode_kwargs=encode_kwargs
55
+ )
56
+
57
+ loader = PyPDFLoader(
58
+ "./Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf")
59
+ documents = loader.load()
60
+ text_splitter = RecursiveCharacterTextSplitter(
61
+ chunk_size=1000, chunk_overlap=100)
62
+ texts = text_splitter.split_documents(documents)
63
+
64
+ vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
65
+ "hnsw:space": "cosine"}, persist_directory="stores/ConserGPT")
66
+
67
+ print("Vector Store Created.......")
68
+
69
+
70
+ prompt = PromptTemplate(template=prompt_template,
71
+ input_variables=['context', 'question'])
72
+ load_vector_store = Chroma(
73
+ persist_directory="stores/ConserGPT", embedding_function=embeddings)
74
+ retriever = load_vector_store.as_retriever(search_kwargs={"k": 1})
75
+
76
+ print("######################################################################")
77
+
78
+ chain_type_kwargs = {"prompt": prompt}
79
+
80
+
81
+ sample_prompts = ["En caso de empate entre el alumnado de alguna especialidad de la enseñanza profesionales de música, ¿Qué criterios se aplicarían para dar el premio?",
82
+ "¿Qué requisitos debe reunir un alumno candidato al premio extraordinario de enseñanzas profesionales de música?", "¿Cuál es la fecha de publicación en el BOE de la Orden ECD/1611/2015, del 29 de julio, del Ministerio de Educación, Cultura y Deporte?"]
83
+
84
+
85
+ def get_response(input):
86
+ query = input
87
+ chain_type_kwargs = {"prompt": prompt}
88
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,
89
+ return_source_documents=True, chain_type_kwargs=chain_type_kwargs, verbose=True)
90
+ response = qa(query)
91
+ return response["result"]
92
+
93
+
94
+ input = gr.Text(
95
+ label="Prompt",
96
+ show_label=False,
97
+ max_lines=1,
98
+ placeholder="Enter your prompt",
99
+ container=False,
100
+ )
101
+
102
+ iface = gr.Interface(fn=get_response,
103
+ inputs=input,
104
+ outputs="text",
105
+ title="ConserGPT",
106
+ description="This is a RAG implementation based on Zephyr 7B Alpha LLM.",
107
+ examples=sample_prompts,
108
+ allow_flagging='never'
109
+ )
110
+
111
+ iface.launch(share=True)
ingest.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.vectorstores import Chroma
4
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
5
+ from langchain.document_loaders import PyPDFLoader
6
+
7
+ model_name = "BAAI/bge-large-en"
8
+ model_kwargs = {'device': 'cpu'}
9
+ encode_kwargs = {'normalize_embeddings': False}
10
+ embeddings = HuggingFaceBgeEmbeddings(
11
+ model_name=model_name,
12
+ model_kwargs=model_kwargs,
13
+ encode_kwargs=encode_kwargs
14
+ )
15
+
16
+ loader = PyPDFLoader(
17
+ "./Instruccion26septiembre2023PremiosExtraordinariosMusica.pdf")
18
+ documents = loader.load()
19
+ text_splitter = RecursiveCharacterTextSplitter(
20
+ chunk_size=1000, chunk_overlap=100)
21
+ texts = text_splitter.split_documents(documents)
22
+
23
+ vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={
24
+ "hnsw:space": "cosine"}, persist_directory="stores/ConserGPT")
25
+
26
+ print("Vector Store Created.......")
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ chainlit
2
+ ctransformers
3
+ torch
4
+ sentence_transformers
5
+ chromadb
6
+ langchain
7
+ pypdf
8
+ PyPDF2
9
+ gradio
10
+ transformers
11
+ accelerate