derek-thomas HF staff commited on
Commit
3cb8374
1 Parent(s): 9a4e478

Making updates to be more Arabic

Browse files
Files changed (3) hide show
  1. app.py +9 -13
  2. templates/template.j2 +2 -2
  3. templates/template_html.j2 +9 -9
app.py CHANGED
@@ -1,5 +1,5 @@
1
- from functools import partial
2
  import logging
 
3
  from pathlib import Path
4
  from time import perf_counter
5
 
@@ -26,9 +26,9 @@ template_html = env.get_template('template_html.j2')
26
  tokenizer = AutoTokenizer.from_pretrained('derek-thomas/jais-13b-chat-hf')
27
 
28
  # Examples
29
- examples = ['What is the capital of China?',
30
- 'Why is the sky blue?',
31
- 'Who won the mens world cup in 2014?', ]
32
 
33
 
34
  def add_text(history, text):
@@ -45,10 +45,7 @@ def bot(history, hyde=False):
45
  # Retrieve documents relevant to query
46
  document_start = perf_counter()
47
  if hyde:
48
- hyde_document = ""
49
- generator = generate(f"Write a wikipedia article intro paragraph to answer this query: {query}")
50
- for output_chunk in generator:
51
- hyde_document = output_chunk
52
 
53
  logger.warning(hyde_document)
54
  documents = retriever(hyde_document, top_k=top_k)
@@ -57,14 +54,13 @@ def bot(history, hyde=False):
57
  document_time = perf_counter() - document_start
58
  logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
59
 
60
-
61
  # Function to count tokens
62
  def count_tokens(text):
63
  return len(tokenizer.encode(text))
64
-
65
  # Create Prompt
66
  prompt = template.render(documents=documents, query=query)
67
-
68
  # Check if the prompt is too long
69
  token_count = count_tokens(prompt)
70
  while token_count > 2048:
@@ -119,7 +115,6 @@ with gr.Blocks() as demo:
119
  # Turn it back on
120
  txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
121
 
122
-
123
  with gr.Tab("Arabic-RAG + HyDE"):
124
  hyde_chatbot = gr.Chatbot(
125
  [],
@@ -143,7 +138,8 @@ with gr.Blocks() as demo:
143
  gr.Examples(examples, hyde_txt)
144
  hyde_prompt_html = gr.HTML()
145
  # Turn off interactivity while generating if you click
146
- hyde_txt_msg = hyde_txt_btn.click(add_text, [hyde_chatbot, hyde_txt], [hyde_chatbot, hyde_txt], queue=False).then(
 
147
  partial(bot, hyde=True), [hyde_chatbot], [hyde_chatbot, hyde_prompt_html])
148
 
149
  # Turn it back on
 
 
1
  import logging
2
+ from functools import partial
3
  from pathlib import Path
4
  from time import perf_counter
5
 
 
26
  tokenizer = AutoTokenizer.from_pretrained('derek-thomas/jais-13b-chat-hf')
27
 
28
  # Examples
29
+ examples = ['ما هي عاصمة الصين؟',
30
+ 'لم السماء زرقاء؟',
31
+ "من فاز بكأس العالم للرجال في عام 2014؟",]
32
 
33
 
34
  def add_text(history, text):
 
45
  # Retrieve documents relevant to query
46
  document_start = perf_counter()
47
  if hyde:
48
+ hyde_document = generate(f"Write a wikipedia article intro paragraph to answer this query: {query}")[-1]
 
 
 
49
 
50
  logger.warning(hyde_document)
51
  documents = retriever(hyde_document, top_k=top_k)
 
54
  document_time = perf_counter() - document_start
55
  logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...')
56
 
 
57
  # Function to count tokens
58
  def count_tokens(text):
59
  return len(tokenizer.encode(text))
60
+
61
  # Create Prompt
62
  prompt = template.render(documents=documents, query=query)
63
+
64
  # Check if the prompt is too long
65
  token_count = count_tokens(prompt)
66
  while token_count > 2048:
 
115
  # Turn it back on
116
  txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
117
 
 
118
  with gr.Tab("Arabic-RAG + HyDE"):
119
  hyde_chatbot = gr.Chatbot(
120
  [],
 
138
  gr.Examples(examples, hyde_txt)
139
  hyde_prompt_html = gr.HTML()
140
  # Turn off interactivity while generating if you click
141
+ hyde_txt_msg = hyde_txt_btn.click(add_text, [hyde_chatbot, hyde_txt], [hyde_chatbot, hyde_txt],
142
+ queue=False).then(
143
  partial(bot, hyde=True), [hyde_chatbot], [hyde_chatbot, hyde_prompt_html])
144
 
145
  # Turn it back on
templates/template.j2 CHANGED
@@ -1,5 +1,5 @@
1
- ### Instruction: Use the following unique documents in the Context section to answer the Query at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
2
- ### Context
3
  {% for doc in documents %}
4
  ---
5
  {{ doc.content }}
 
1
+ ### Instruction: استخدم المستندات الفريدة التالية في قسم السياق للإجابة على الاستعلام في النهاية. إذا كنت لا تعرف الإجابة، قل فقط أنك لا تعرف، ولا تحاول اختلاق إجابة.
2
+ ### Context
3
  {% for doc in documents %}
4
  ---
5
  {{ doc.content }}
templates/template_html.j2 CHANGED
@@ -70,25 +70,25 @@
70
  </head>
71
  <body>
72
  <div class="prose svelte-1ybaih5" id="component-6">
73
- <h2>Prompt</h2>
74
- Below is the prompt that is given to the model.
75
  <hr>
76
- <h2>Instructions</h2>
77
- <span class="instructions">Use the following pieces of context to answer the question at the end.<br>If you don't know the answer, just say that you don't know, <span
78
- style="font-weight: bold;">don't try to make up an answer.</span></span><br>
79
- <h2>Context</h2>
80
  {% for doc in documents %}
81
  <details class="doc-box" dir="rtl">
82
  <summary>
83
  <b>وثيقة_{{ loop.index }} | <a href="{{ doc.meta.url }}">{{ doc.meta.title }}</a>:</b> <span
84
  class="doc-short">{{ doc.content[:50] }}...</span>
85
  </summary>
86
- <div class="doc-full"><a href="{{ doc.meta.url }}">{{ doc.meta.title }}</a>: {{ doc.content }}</div>
87
  </details>
88
  {% endfor %}
89
 
90
- <h2>Query</h2>
91
- <span class="query">{{ query }}</span>
92
  </div>
93
 
94
  <script>
 
70
  </head>
71
  <body>
72
  <div class="prose svelte-1ybaih5" id="component-6">
73
+ <h2>اِسْتَدْعَى</h2>
74
+ فيما يلي الموجه الذي تم تقديمه للنموذج.
75
  <hr>
76
+ <h2>تعليمات</h2>
77
+ <span class="instructions" dir="rtl">استخدم الأجزاء التالية من السياق للإجابة على السؤال في النهاية. <brإذا كنت لا تعرف الإجابة، فقط قل أنك لا تعرف،<span
78
+ style="font-weight: bold;">لا تحاول اختلاق إجابة.</span></span><br>
79
+ <h2>سياق</h2>
80
  {% for doc in documents %}
81
  <details class="doc-box" dir="rtl">
82
  <summary>
83
  <b>وثيقة_{{ loop.index }} | <a href="{{ doc.meta.url }}">{{ doc.meta.title }}</a>:</b> <span
84
  class="doc-short">{{ doc.content[:50] }}...</span>
85
  </summary>
86
+ <div class="doc-full">{{ doc.content }}</div>
87
  </details>
88
  {% endfor %}
89
 
90
+ <h2>استفسار</h2>
91
+ <span class="query" dir="rtl">{{ query }}</span>
92
  </div>
93
 
94
  <script>