Spaces:

langtech-dev
/

MLhouse-RAG

Running

App Files Files Community

acumplid commited on 14 days ago

Commit

a8bf50c

•

1 Parent(s): a6487f4

base app

Browse files

Files changed (9) hide show

.gitignore +5 -0
README.md +6 -5
app.py +266 -0
handler.py +14 -0
input_reader.py +22 -0
rag.py +165 -0
rag_image.jpg +0 -0
requirements.txt +14 -0
utils.py +33 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+/venv
+/venv/*
+.env
+__pycache__
+__pycache__/*

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
-title: MLhouse RAG
-emoji: 😻
-colorFrom: blue
-colorTo: pink
 sdk: gradio
-sdk_version: 5.8.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: MLhouse-RAG
+emoji: 💻
+colorFrom: indigo
+colorTo: yellow
 sdk: gradio
+sdk_version: 4.24.0
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+import os
+import gradio as gr
+from gradio.components import Textbox, Button, Slider, Checkbox
+from AinaTheme import theme
+from urllib.error import HTTPError
+from rag import RAG
+from utils import setup
+MAX_NEW_TOKENS = 700
+SHOW_MODEL_PARAMETERS_IN_UI = os.environ.get("SHOW_MODEL_PARAMETERS_IN_UI", default="True") == "True"
+setup()
+rag = RAG(
+    hf_token=os.getenv("HF_TOKEN"),
+    embeddings_model=os.getenv("EMBEDDINGS"),
+    repo_name=os.getenv("REPO_NAME"),
+)
+# model_name=os.getenv("MODEL"),
+# rerank_model=os.getenv("RERANK_MODEL"),
+# rerank_number_contexts=int(os.getenv("RERANK_NUMBER_CONTEXTS"))
+def generate(prompt, model_parameters):
+    try:
+        output, context, source = rag.get_response(prompt, model_parameters)
+        return output, context, source
+    except HTTPError as err:
+        if err.code == 400:
+            gr.Warning(
+                "The inference endpoint is only available Monday through Friday, from 08:00 to 20:00 CET."
+            )
+    except:
+        gr.Warning(
+            "Inference endpoint is not available right now. Please try again later."
+        )
+    return None, None, None
+def submit_input(input_, num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature):
+    if input_.strip() == "":
+        gr.Warning("Not possible to inference an empty input")
+        return None
+    model_parameters = {
+        "NUM_CHUNKS": num_chunks,
+        "max_new_tokens": max_new_tokens,
+        "repetition_penalty": repetition_penalty,
+        "top_k": top_k,
+        "top_p": top_p,
+        "do_sample": do_sample,
+        "temperature": temperature
+    }
+    output, context, source = generate(input_, model_parameters)
+    sources_markup = ""
+    for url in source:
+        sources_markup += f'<a href="{url}" target="_blank">{url}</a><br>'
+    return output, sources_markup, context
+    # return output.strip(), sources_markup, context
+def change_interactive(text):
+    if len(text) == 0:
+        return gr.update(interactive=True), gr.update(interactive=False)
+    return gr.update(interactive=True), gr.update(interactive=True)
+def clear():
+    return (
+        None,
+        None,
+        None,
+        None,
+        gr.Slider(value=2.0),
+        gr.Slider(value=MAX_NEW_TOKENS),
+        gr.Slider(value=1.0),
+        gr.Slider(value=50),
+        gr.Slider(value=0.99),
+        gr.Checkbox(value=False),
+        gr.Slider(value=0.35),
+    )
+def gradio_app():
+    with gr.Blocks(theme=theme) as demo:
+        with gr.Row():
+            with gr.Column(scale=0.1):
+                gr.Image("rag_image.jpg", elem_id="flor-banner", scale=1, height=256, width=256, show_label=False, show_download_button = False, show_share_button = False)
+            with gr.Column():
+                gr.Markdown(
+                    """# Demo de Retrieval-Augmented Generation per documents legals
+                    🔍 **Retrieval-Augmented Generation** (RAG) és una tecnologia d'IA que permet interrogar un repositori de documents amb preguntes
+                    en llenguatge natural, i combina tècniques de recuperació d'informació avançades amb models generatius per redactar una resposta
+                    fent servir només la informació existent en els documents del repositori.
+                    🎯 **Objectiu:** Aquest és un demostrador amb la normativa vigent publicada al Diari Oficial de la Generalitat de Catalunya, en el
+                    repositori del EADOP (Entitat Autònoma del Diari Oficial i de Publicacions). Aquesta versió explora prop de 2000 documents en català,
+                    i genera la resposta fent servir el model Salamandra-7b-aligned-EADOP, el model BSC-LT/salamandra-7b-instruct alineat amb el dataset de alinia/EADOP-RAG-out-of-domain.
+                    ⚠️ **Advertencies**: Aquesta versió és experimental. El contingut generat per aquest model no està supervisat i pot ser incorrecte.
+                    Si us plau, tingueu-ho en compte quan exploreu aquest recurs.
+                    """
+                )
+        with gr.Row(equal_height=True):
+            with gr.Column(variant="panel"):
+                input_ = Textbox(
+                    lines=11,
+                    label="Input",
+                    placeholder="Quina és la finalitat del Servei Meteorològic de Catalunya?",
+                    # value = "Quina és la finalitat del Servei Meteorològic de Catalunya?"
+                )
+                with gr.Row(variant="panel"):
+                    clear_btn = Button(
+                        "Clear",
+                    )
+                    submit_btn = Button("Submit", variant="primary", interactive=False)
+                with gr.Row(variant="panel"):
+                    with gr.Accordion("Model parameters", open=False, visible=SHOW_MODEL_PARAMETERS_IN_UI):
+                        num_chunks = Slider(
+                            minimum=1,
+                            maximum=6,
+                            step=1,
+                            value=2,
+                            label="Number of chunks"
+                        )
+                        max_new_tokens = Slider(
+                            minimum=50,
+                            maximum=2000,
+                            step=1,
+                            value=MAX_NEW_TOKENS,
+                            label="Max tokens"
+                        )
+                        repetition_penalty = Slider(
+                            minimum=0.1,
+                            maximum=2.0,
+                            step=0.1,
+                            value=1.0,
+                            label="Repetition penalty"
+                        )
+                        top_k = Slider(
+                            minimum=1,
+                            maximum=100,
+                            step=1,
+                            value=50,
+                            label="Top k"
+                        )
+                        top_p = Slider(
+                            minimum=0.01,
+                            maximum=0.99,
+                            value=0.99,
+                            label="Top p"
+                        )
+                        do_sample = Checkbox(
+                            value=False,
+                            label="Do sample"
+                        )
+                        temperature = Slider(
+                            minimum=0.1,
+                            maximum=1,
+                            value=0.35,
+                            label="Temperature"
+                        )
+                        parameters_compontents = [num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature]
+            with gr.Column(variant="panel"):
+                output = Textbox(
+                    lines=10,
+                    label="Output",
+                    interactive=False,
+                    show_copy_button=True
+                )
+                with gr.Accordion("Sources and context:", open=False):
+                    source_context = gr.Markdown(
+                        label="Sources",
+                        show_label=False,
+                    )
+                    with gr.Accordion("See full context evaluation:", open=False):
+                        context_evaluation = gr.Markdown(
+                            label="Full context",
+                            show_label=False,
+                            # interactive=False,
+                            # autoscroll=False,
+                            # show_copy_button=True
+                        )
+        input_.change(
+            fn=change_interactive,
+            inputs=[input_],
+            outputs=[clear_btn, submit_btn],
+            api_name=False,
+        )
+        input_.change(
+            fn=None,
+            inputs=[input_],
+            api_name=False,
+            js="""(i, m) => {
+            document.getElementById('inputlenght').textContent = i.length + '  '
+            document.getElementById('inputlenght').style.color =  (i.length > m) ? "#ef4444" : "";
+        }""",
+        )
+        clear_btn.click(
+            fn=clear,
+            inputs=[],
+            outputs=[input_, output, source_context, context_evaluation] + parameters_compontents,
+              queue=False,
+              api_name=False
+        )
+        submit_btn.click(
+            fn=submit_input,
+            inputs=[input_]+ parameters_compontents,
+            outputs=[output, source_context, context_evaluation],
+            api_name="get-results"
+        )
+        with gr.Row():
+            with gr.Column(scale=0.5):
+                gr.Examples(
+                    examples=[
+                        ["""Què és l'EADOP (Entitat Autònoma del Diari Oficial i de Publicacions)?"""],
+                    ],
+                    inputs=input_,
+                    outputs=[output, source_context, context_evaluation],
+                    fn=submit_input,
+                )
+                gr.Examples(
+                    examples=[
+                        ["""Què diu el decret sobre la senyalització de les begudes alcohòliques i el tabac a Catalunya?"""],
+                    ],
+                    inputs=input_,
+                    outputs=[output, source_context, context_evaluation],
+                    fn=submit_input,
+                )
+                gr.Examples(
+                    examples=[
+                        ["""Com es pot inscriure una persona al Registre de catalans i catalanes residents a l'exterior?"""],
+                    ],
+                    inputs=input_,
+                    outputs=[output, source_context, context_evaluation],
+                    fn=submit_input,
+                )
+                gr.Examples(
+                    examples=[
+                        ["""Quina és la finalitat del Servei Meterològic de Catalunya ?"""],
+                    ],
+                    inputs=input_,
+                    outputs=[output, source_context, context_evaluation],
+                    fn=submit_input,
+                )
+        demo.launch(show_api=True)
+if __name__ == "__main__":
+    gradio_app()

handler.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import json
+class ContentHandler():
+    content_type = "application/json"
+    accepts = "application/json"
+    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
+        return input_str.encode('utf-8')
+    def transform_output(self, output: bytes) -> str:
+        response_json = json.loads(output.read().decode("utf-8"))
+        return response_json[0]["generated_text"]

input_reader.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from typing import List
+from llama_index.core.constants import DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE
+from llama_index.core.readers import SimpleDirectoryReader
+from llama_index.core.schema import Document
+from llama_index.core import Settings
+class InputReader:
+    def __init__(self, input_dir: str) -> None:
+        self.reader = SimpleDirectoryReader(input_dir=input_dir)
+    def parse_documents(
+        self,
+        show_progress: bool = True,
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
+        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+    ) -> List[Document]:
+        Settings.chunk_size = chunk_size
+        Settings.chunk_overlap = chunk_overlap
+        documents = self.reader.load_data(show_progress=show_progress)
+        return documents

rag.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import logging
+import os
+import requests
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from openai import OpenAI
+from huggingface_hub import snapshot_download
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+class RAG:
+    NO_ANSWER_MESSAGE: str = "Ho sento, no he pogut respondre la teva pregunta."
+    # Download the vectorstore from Hugging Face Hub
+    def __init__(self, hf_token, embeddings_model, repo_name):
+        vectorstore = snapshot_download(repo_name)
+        # self.model_name = model_name
+        self.hf_token = hf_token
+        # self.rerank_model = rerank_model
+        # self.rerank_number_contexts = rerank_number_contexts
+        # load vectore store
+        embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
+        self.vectore_store = FAISS.load_local(vectorstore, embeddings, allow_dangerous_deserialization=True)#, allow_dangerous_deserialization=True)
+        logging.info("RAG loaded!")
+    # def rerank_contexts(self, instruction, contexts, number_of_contexts=1):
+    #     """
+    #     Rerank the contexts based on their relevance to the given instruction.
+    #     """
+    #     rerank_model = self.rerank_model
+    #     tokenizer = AutoTokenizer.from_pretrained(rerank_model)
+    #     model = AutoModelForSequenceClassification.from_pretrained(rerank_model)
+    #     def get_score(query, passage):
+    #         """Calculate the relevance score of a passage with respect to a query."""
+    #         inputs = tokenizer(query, passage, return_tensors='pt', truncation=True, padding=True, max_length=512)
+    #         with torch.no_grad():
+    #             outputs = model(**inputs)
+    #         logits = outputs.logits
+    #         score = logits.view(-1, ).float()
+    #         return score
+    #     scores = [get_score(instruction, c[0].page_content) for c in contexts]
+    #     combined = list(zip(contexts, scores))
+    #     sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
+    #     sorted_texts, _ = zip(*sorted_combined)
+    #     return sorted_texts[:number_of_contexts]
+    def get_context(self, instruction, number_of_contexts=2):
+        """Retrieve the most relevant contexts for a given instruction."""
+        documentos = self.vectore_store.similarity_search_with_score(instruction, k=4)
+        # documentos = self.rerank_contexts(instruction, documentos, number_of_contexts=number_of_contexts)
+        print("Reranked documents")
+        return documentos
+    def predict_dolly(self, instruction, context, model_parameters):
+        api_key = os.getenv("HF_TOKEN")
+        headers = {
+        "Accept" : "application/json",
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+        }
+        query = f"### Instruction\n{instruction}\n\n### Context\n{context}\n\n### Answer\n "
+        #prompt = "You are a helpful assistant. Answer the question using only the context you are provided with. If it is not possible to do it with the context, just say 'I can't answer'. <|endoftext|>"
+        payload = {
+        "inputs": query,
+        "parameters": model_parameters
+        }
+        response = requests.post(self.model_name, headers=headers, json=payload)
+        return response.json()[0]["generated_text"].split("###")[-1][8:]
+    def predict_completion(self, instruction, context, model_parameters):
+        client = OpenAI(
+                base_url=os.getenv("MODEL"),
+                api_key=os.getenv("HF_TOKEN")
+            )
+        query = f"Context:\n{context}\n\nQuestion:\n{instruction}"
+        chat_completion = client.chat.completions.create(
+            model="tgi",
+            messages=[
+                {"role": "user", "content": instruction}
+            ],
+            temperature=model_parameters["temperature"],
+            max_tokens=model_parameters["max_new_tokens"],
+            stream=False,
+            stop=["<|im_end|>"],
+            extra_body = {
+                "presence_penalty": model_parameters["repetition_penalty"] - 2,
+                "do_sample": False
+            }
+        )
+        response = chat_completion.choices[0].message.content
+        return response
+    def beautiful_context(self, docs):
+        text_context = ""
+        full_context = ""
+        source_context = []
+        for doc in docs:
+            text_context += doc[0].page_content
+            full_context += doc[0].page_content + "\n"
+            full_context += doc[0].metadata["Títol de la norma"] + "\n\n"
+            full_context += doc[0].metadata["url"] + "\n\n"
+            source_context.append(doc[0].metadata["url"])
+        return text_context, full_context, source_context
+    def get_response(self, prompt: str, model_parameters: dict) -> str:
+        try:
+            docs = self.get_context(prompt, model_parameters["NUM_CHUNKS"])
+            text_context, full_context, source = self.beautiful_context(docs)
+            del model_parameters["NUM_CHUNKS"]
+            # response = self.predict_completion(prompt, text_context, model_parameters)
+            response = "Output"
+            if not response:
+                return self.NO_ANSWER_MESSAGE
+            return response, full_context, source
+        except Exception as err:
+            print(err)

rag_image.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+gradio==4.29.0
+huggingface-hub==0.23.4
+python-dotenv==1.0.0
+llama-index==0.10.14
+llama-index-embeddings-huggingface==0.2.2
+llama-index-llms-huggingface==0.2.4
+sentence-transformers==2.7.0
+langchain
+faiss-cpu
+aina-gradio-theme==2.3
+langchain-community==0.2.1
+langchain-core==0.2.1
+openai==1.35.12

utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+import warnings
+from dotenv import load_dotenv
+from rag import RAG
+USER_INPUT = 100
+def setup():
+    load_dotenv()
+    warnings.filterwarnings("ignore")
+    logging.addLevelName(USER_INPUT, "USER_INPUT")
+    logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO)
+def interactive(model: RAG):
+    logging.info("Write `exit` when you want to stop the model.")
+    print()
+    query = ""
+    while query.lower() != "exit":
+        logging.log(USER_INPUT, "Write the query or `exit`:")
+        query = input()
+        if query.lower() == "exit":
+            break
+        response = model.get_response(query)
+        print(response, end="\n\n")