File size: 8,185 Bytes
8b15eea
3cb8374
8b15eea
 
 
 
165d244
8b15eea
5f75644
8b15eea
91f49a8
8b15eea
 
 
 
 
 
 
 
 
 
 
 
 
 
df1aa0b
4e966cd
df1aa0b
8de88bd
0665e63
3cb8374
f1ecb2c
8de88bd
df1aa0b
8b15eea
 
 
 
 
 
8de88bd
df1aa0b
8b15eea
 
 
 
 
8de88bd
f1ecb2c
37df3b9
8de88bd
 
 
 
 
 
8b15eea
 
df1aa0b
 
 
3cb8374
8b15eea
 
3cb8374
df1aa0b
 
 
 
 
 
 
 
8b15eea
 
 
7bda49c
 
 
8b15eea
f1ecb2c
5d0067c
 
 
 
 
 
 
 
 
 
1a7a096
e093898
 
ed9e6a6
 
5d0067c
8b15eea
f1ecb2c
 
 
 
 
 
 
 
8b15eea
5d0067c
e093898
 
8de88bd
8b15eea
 
 
 
 
 
 
 
 
 
 
 
 
 
cf11d3f
8b15eea
 
 
 
f1ecb2c
8b15eea
f1ecb2c
 
 
 
 
 
 
f863672
8de88bd
8b15eea
 
 
 
 
 
 
 
 
 
 
 
 
e093898
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe2edcc
8de88bd
8b15eea
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import logging
from functools import partial
from pathlib import Path
from time import perf_counter

import gradio as gr
from gradio_rich_textbox import RichTextbox
from jinja2 import Environment, FileSystemLoader
from transformers import AutoTokenizer

from backend.query_llm import check_endpoint_status, generate
from backend.semantic_search import retriever

proj_dir = Path(__file__).parent
# Setting up the logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set up the template environment with the templates directory
env = Environment(loader=FileSystemLoader(proj_dir / 'templates'))

# Load the templates directly from the environment
template = env.get_template('template.j2')
template_html = env.get_template('template_html.j2')

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained('derek-thomas/jais-13b-chat-hf')

# Examples
examples = ['من كان طرفي معركة اكتيوم البحرية؟',
            'لم السماء زرقاء؟',
            "من فاز بكأس العالم للرجال في عام 2014؟", ]


def add_text(history, text):
    history = [] if history is None else history
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)


def bot(history, hyde=False):
    top_k = 5
    query = history[-1][0]

    logger.warning('Retrieving documents...')
    # Retrieve documents relevant to query
    document_start = perf_counter()
    if hyde:
        hyde_document = generate(f"Write a wikipedia article intro paragraph to answer this query: {query}").split(
                '### Response: [|AI|]')[-1]

        logger.warning(hyde_document)
        documents = retriever(hyde_document, top_k=top_k)
    else:
        documents = retriever(query, top_k=top_k)
    document_time = perf_counter() - document_start
    logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')

    # Function to count tokens
    def count_tokens(text):
        return len(tokenizer.encode(text))

    # Create Prompt
    prompt = template.render(documents=documents, query=query)

    # Check if the prompt is too long
    token_count = count_tokens(prompt)
    while token_count > 2048:
        # Shorten your documents here. This is just a placeholder for the logic you'd use.
        documents.pop()  # Remove the last document
        prompt = template.render(documents=documents, query=query)  # Re-render the prompt
        token_count = count_tokens(prompt)  # Re-count tokens

    prompt_html = template_html.render(documents=documents, query=query)

    history[-1][1] = ""
    response = generate(prompt)
    history[-1][1] = response.split('### Response: [|AI|]')[-1]
    return history, prompt_html


intro_md = """
# Arabic RAG
This is a project to demonstrate Retreiver Augmented Generation (RAG) in Arabic and English. It uses 
[Arabic Wikipedia](https://ar.wikipedia.org/wiki) as a base to answer questions you have. 
A retriever ([sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/discussions/8))
 will find the articles relevant to your query and include them in a prompt so the reader ([core42/jais-13b-chat](https://huggingface.co/core42/jais-13b-chat)) 
 can then answer your questions on it.
 
You can see the prompt clearly displayed below the chatbot to understand what is going to the LLM. 

# Read this if you get an error
I'm using [Inference Endpoint's](https://huggingface.co/inference-endpoints) 
[Scale to Zero](https://huggingface.co/docs/inference-endpoints/main/en/autoscaling#scaling-to-0) to save money on GPUs.
If the staus is "scaledToZero" click **Wake Up Endpoint** to wake it up. You will get an `error` and it will take
 ~4 minutes to wake up. This is expected, if you dont like it please give me a free GPU with enough VRAM. 
"""


def process_example(text, history=[]):
    history = history + [[text, None]]
    return bot(history)


# hyde_prompt_html = gr.HTML()

with gr.Blocks() as demo:
    gr.Markdown(intro_md)
    endpoint_status = RichTextbox(check_endpoint_status, label="Inference Endpoint Status", every=1)
    wakeup_endpoint = gr.Button('Click to Wake Up Endpoint')
    with gr.Tab("Arabic-RAG"):
        chatbot = gr.Chatbot(
                [],
                elem_id="chatbot",
                avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
                               'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
                bubble_full_width=False,
                show_copy_button=True,
                show_share_button=True,
                )

        with gr.Row():
            txt = gr.Textbox(
                    scale=3,
                    show_label=False,
                    placeholder="Enter query in Arabic or English and press enter",
                    container=False,
                    )
            txt_btn = gr.Button(value="Submit text", scale=1)

        # gr.Examples(examples, txt)
        prompt_html = gr.HTML()
        gr.Examples(
                examples=examples,
                inputs=txt,
                outputs=[chatbot, prompt_html],
                fn=process_example,
                cache_examples=True, )
        # prompt_html.render()

        # Turn off interactivity while generating if you click
        txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
                bot, chatbot, [chatbot, prompt_html])

        # Turn it back on
        txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

        # Turn off interactivity while generating if you hit enter
        txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
                bot, chatbot, [chatbot, prompt_html])

        # Turn it back on
        txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

    # Easy to turn this on when I want to
    # with gr.Tab("Arabic-RAG + HyDE"):
    #     hyde_chatbot = gr.Chatbot(
    #             [],
    #             elem_id="chatbot",
    #             avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
    #                            'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
    #             bubble_full_width=False,
    #             show_copy_button=True,
    #             show_share_button=True,
    #             )
    #
    #     with gr.Row():
    #         hyde_txt = gr.Textbox(
    #                 scale=3,
    #                 show_label=False,
    #                 placeholder="Enter text and press enter",
    #                 container=False,
    #                 )
    #         hyde_txt_btn = gr.Button(value="Submit text", scale=1)
    #
    #     hyde_prompt_html = gr.HTML()
    #     gr.Examples(
    #             examples=examples,
    #             inputs=hyde_txt,
    #             outputs=[hyde_chatbot, hyde_prompt_html],
    #             fn=process_example,
    #             cache_examples=True, )
    #     # prompt_html.render()
    #     # Turn off interactivity while generating if you click
    #     hyde_txt_msg = hyde_txt_btn.click(add_text, [hyde_chatbot, hyde_txt], [hyde_chatbot, hyde_txt],
    #                                       queue=False).then(
    #             partial(bot, hyde=True), [hyde_chatbot], [hyde_chatbot, hyde_prompt_html])
    #
    #     # Turn it back on
    #     hyde_txt_msg.then(lambda: gr.Textbox(interactive=True), None, [hyde_txt], queue=False)
    #
    #     # Turn off interactivity while generating if you hit enter
    #     hyde_txt_msg = hyde_txt.submit(add_text, [hyde_chatbot, hyde_txt], [hyde_chatbot, hyde_txt], queue=False).then(
    #             partial(bot, hyde=True), [hyde_chatbot], [hyde_chatbot, hyde_prompt_html])
    #
    #     # Turn it back on
    #     hyde_txt_msg.then(lambda: gr.Textbox(interactive=True), None, [hyde_txt], queue=False)
    wakeup_endpoint.click(partial(generate,'Wakeup'))

demo.queue()
demo.launch(debug=True)