Spaces:
Build error
Build error
Commit
•
3cb8374
1
Parent(s):
9a4e478
Making updates to be more Arabic
Browse files- app.py +9 -13
- templates/template.j2 +2 -2
- templates/template_html.j2 +9 -9
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
from functools import partial
|
2 |
import logging
|
|
|
3 |
from pathlib import Path
|
4 |
from time import perf_counter
|
5 |
|
@@ -26,9 +26,9 @@ template_html = env.get_template('template_html.j2')
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained('derek-thomas/jais-13b-chat-hf')
|
27 |
|
28 |
# Examples
|
29 |
-
examples = ['
|
30 |
-
'
|
31 |
-
|
32 |
|
33 |
|
34 |
def add_text(history, text):
|
@@ -45,10 +45,7 @@ def bot(history, hyde=False):
|
|
45 |
# Retrieve documents relevant to query
|
46 |
document_start = perf_counter()
|
47 |
if hyde:
|
48 |
-
hyde_document = ""
|
49 |
-
generator = generate(f"Write a wikipedia article intro paragraph to answer this query: {query}")
|
50 |
-
for output_chunk in generator:
|
51 |
-
hyde_document = output_chunk
|
52 |
|
53 |
logger.warning(hyde_document)
|
54 |
documents = retriever(hyde_document, top_k=top_k)
|
@@ -57,14 +54,13 @@ def bot(history, hyde=False):
|
|
57 |
document_time = perf_counter() - document_start
|
58 |
logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
|
59 |
|
60 |
-
|
61 |
# Function to count tokens
|
62 |
def count_tokens(text):
|
63 |
return len(tokenizer.encode(text))
|
64 |
-
|
65 |
# Create Prompt
|
66 |
prompt = template.render(documents=documents, query=query)
|
67 |
-
|
68 |
# Check if the prompt is too long
|
69 |
token_count = count_tokens(prompt)
|
70 |
while token_count > 2048:
|
@@ -119,7 +115,6 @@ with gr.Blocks() as demo:
|
|
119 |
# Turn it back on
|
120 |
txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
|
121 |
|
122 |
-
|
123 |
with gr.Tab("Arabic-RAG + HyDE"):
|
124 |
hyde_chatbot = gr.Chatbot(
|
125 |
[],
|
@@ -143,7 +138,8 @@ with gr.Blocks() as demo:
|
|
143 |
gr.Examples(examples, hyde_txt)
|
144 |
hyde_prompt_html = gr.HTML()
|
145 |
# Turn off interactivity while generating if you click
|
146 |
-
hyde_txt_msg = hyde_txt_btn.click(add_text, [hyde_chatbot, hyde_txt], [hyde_chatbot, hyde_txt],
|
|
|
147 |
partial(bot, hyde=True), [hyde_chatbot], [hyde_chatbot, hyde_prompt_html])
|
148 |
|
149 |
# Turn it back on
|
|
|
|
|
1 |
import logging
|
2 |
+
from functools import partial
|
3 |
from pathlib import Path
|
4 |
from time import perf_counter
|
5 |
|
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained('derek-thomas/jais-13b-chat-hf')
|
27 |
|
28 |
# Examples
|
29 |
+
examples = ['ما هي عاصمة الصين؟',
|
30 |
+
'لم السماء زرقاء؟',
|
31 |
+
"من فاز بكأس العالم للرجال في عام 2014؟",]
|
32 |
|
33 |
|
34 |
def add_text(history, text):
|
|
|
45 |
# Retrieve documents relevant to query
|
46 |
document_start = perf_counter()
|
47 |
if hyde:
|
48 |
+
hyde_document = generate(f"Write a wikipedia article intro paragraph to answer this query: {query}")[-1]
|
|
|
|
|
|
|
49 |
|
50 |
logger.warning(hyde_document)
|
51 |
documents = retriever(hyde_document, top_k=top_k)
|
|
|
54 |
document_time = perf_counter() - document_start
|
55 |
logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
|
56 |
|
|
|
57 |
# Function to count tokens
|
58 |
def count_tokens(text):
|
59 |
return len(tokenizer.encode(text))
|
60 |
+
|
61 |
# Create Prompt
|
62 |
prompt = template.render(documents=documents, query=query)
|
63 |
+
|
64 |
# Check if the prompt is too long
|
65 |
token_count = count_tokens(prompt)
|
66 |
while token_count > 2048:
|
|
|
115 |
# Turn it back on
|
116 |
txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
|
117 |
|
|
|
118 |
with gr.Tab("Arabic-RAG + HyDE"):
|
119 |
hyde_chatbot = gr.Chatbot(
|
120 |
[],
|
|
|
138 |
gr.Examples(examples, hyde_txt)
|
139 |
hyde_prompt_html = gr.HTML()
|
140 |
# Turn off interactivity while generating if you click
|
141 |
+
hyde_txt_msg = hyde_txt_btn.click(add_text, [hyde_chatbot, hyde_txt], [hyde_chatbot, hyde_txt],
|
142 |
+
queue=False).then(
|
143 |
partial(bot, hyde=True), [hyde_chatbot], [hyde_chatbot, hyde_prompt_html])
|
144 |
|
145 |
# Turn it back on
|
templates/template.j2
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
### Instruction:
|
2 |
-
### Context
|
3 |
{% for doc in documents %}
|
4 |
---
|
5 |
{{ doc.content }}
|
|
|
1 |
+
### Instruction: استخدم المستندات الفريدة التالية في قسم السياق للإجابة على الاستعلام في النهاية. إذا كنت لا تعرف الإجابة، قل فقط أنك لا تعرف، ولا تحاول اختلاق إجابة.
|
2 |
+
### Context
|
3 |
{% for doc in documents %}
|
4 |
---
|
5 |
{{ doc.content }}
|
templates/template_html.j2
CHANGED
@@ -70,25 +70,25 @@
|
|
70 |
</head>
|
71 |
<body>
|
72 |
<div class="prose svelte-1ybaih5" id="component-6">
|
73 |
-
<h2
|
74 |
-
|
75 |
<hr>
|
76 |
-
<h2
|
77 |
-
<span class="instructions"
|
78 |
-
style="font-weight: bold;"
|
79 |
-
<h2
|
80 |
{% for doc in documents %}
|
81 |
<details class="doc-box" dir="rtl">
|
82 |
<summary>
|
83 |
<b>وثيقة_{{ loop.index }} | <a href="{{ doc.meta.url }}">{{ doc.meta.title }}</a>:</b> <span
|
84 |
class="doc-short">{{ doc.content[:50] }}...</span>
|
85 |
</summary>
|
86 |
-
<div class="doc-full"
|
87 |
</details>
|
88 |
{% endfor %}
|
89 |
|
90 |
-
<h2
|
91 |
-
<span class="query">{{ query }}</span>
|
92 |
</div>
|
93 |
|
94 |
<script>
|
|
|
70 |
</head>
|
71 |
<body>
|
72 |
<div class="prose svelte-1ybaih5" id="component-6">
|
73 |
+
<h2>اِسْتَدْعَى</h2>
|
74 |
+
فيما يلي الموجه الذي تم تقديمه للنموذج.
|
75 |
<hr>
|
76 |
+
<h2>تعليمات</h2>
|
77 |
+
<span class="instructions" dir="rtl">استخدم الأجزاء التالية من السياق للإجابة على السؤال في النهاية. <brإذا كنت لا تعرف الإجابة، فقط قل أنك لا تعرف،<span
|
78 |
+
style="font-weight: bold;">لا تحاول اختلاق إجابة.</span></span><br>
|
79 |
+
<h2>سياق</h2>
|
80 |
{% for doc in documents %}
|
81 |
<details class="doc-box" dir="rtl">
|
82 |
<summary>
|
83 |
<b>وثيقة_{{ loop.index }} | <a href="{{ doc.meta.url }}">{{ doc.meta.title }}</a>:</b> <span
|
84 |
class="doc-short">{{ doc.content[:50] }}...</span>
|
85 |
</summary>
|
86 |
+
<div class="doc-full">{{ doc.content }}</div>
|
87 |
</details>
|
88 |
{% endfor %}
|
89 |
|
90 |
+
<h2>استفسار</h2>
|
91 |
+
<span class="query" dir="rtl">{{ query }}</span>
|
92 |
</div>
|
93 |
|
94 |
<script>
|