lamhieu commited on
Commit
7f6ea1d
1 Parent(s): 2bac78a

chore: update something

Browse files
Files changed (3) hide show
  1. README.md +15 -9
  2. app.py +439 -251
  3. requirements.txt +6 -5
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Ghost 8B Beta (β, 128k, Online)
3
  emoji: 👻 / 📚
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
  app_file: app.py
@@ -10,15 +10,22 @@ pinned: true
10
  header: mini
11
  suggested_hardware: a10g-small
12
  language:
13
- - en
14
  - vi
 
15
  - es
16
  - pt
17
- - de
18
- - it
19
- - fr
20
- - ko
21
  - zh
 
 
 
 
 
 
 
 
 
 
 
22
  license: other
23
  license_name: ghost-open-llms
24
  license_link: https://ghost-x.org/ghost-open-llms-license
@@ -28,7 +35,6 @@ tags:
28
 
29
  # ~
30
 
31
-
32
  ### Notes
33
 
34
  The extension source code belongs to: "LLM Maybe LongLM: Self-Extend LLM Context Window Without Tuning". See source code details [here](https://github.com/datamllab/LongLM).
 
1
  ---
2
+ title: Ghost 8B Beta (β, 128k)
3
  emoji: 👻 / 📚
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.36.1
8
  app_file: app.py
 
10
  header: mini
11
  suggested_hardware: a10g-small
12
  language:
 
13
  - vi
14
+ - ko
15
  - es
16
  - pt
 
 
 
 
17
  - zh
18
+ - fr
19
+ - it
20
+ - de
21
+ - ja
22
+ - ru
23
+ - pl
24
+ - nl
25
+ - hi
26
+ - tr
27
+ - id
28
+ - en
29
  license: other
30
  license_name: ghost-open-llms
31
  license_link: https://ghost-x.org/ghost-open-llms-license
 
35
 
36
  # ~
37
 
 
38
  ### Notes
39
 
40
  The extension source code belongs to: "LLM Maybe LongLM: Self-Extend LLM Context Window Without Tuning". See source code details [here](https://github.com/datamllab/LongLM).
app.py CHANGED
@@ -3,6 +3,8 @@
3
  import subprocess
4
  import json
5
  import requests
 
 
6
 
7
  subprocess.run(
8
  f"pip install flash-attn --no-build-isolation",
@@ -17,34 +19,78 @@ from typing import Iterator
17
  import gradio as gr
18
  import spaces
19
  import torch
 
20
  import wikipedia
21
  import time
22
  import SelfExtend
23
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
 
24
  from bs4 import BeautifulSoup
25
  from functools import lru_cache
26
 
 
 
 
27
 
28
  MAX_MAX_NEW_TOKENS = 8192
29
  DEFAULT_MAX_NEW_TOKENS = 2048
30
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "123392"))
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  DESCRIPTION = """\
33
- # Playground with Ghost 8B Beta (β, 128k, Online)
34
 
35
- **Ghost 8B Beta** model outperforms prominent models such as Llama 3 8B Instruct, GPT 3.5 Turbo in the lc_winrate score. In addition, it also outperforms Claude 3 Opus, Claude 3 Sonnet, GPT-4, and Mistral Large when comparing the winrate score of AlpacaEval 2.0, [*](https://ghost-x.org/docs/models/ghost-8b-beta/). The model comes in two context length versions, [8k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-8k) and [128k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-128k), along with multilingual function tools support by default.
36
 
37
- The languages supported are 🇺🇸 English, 🇫🇷 French, 🇮🇹 Italian, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇩🇪 German, 🇻🇳 Vietnamese, 🇰🇷 Korean and 🇨🇳 Chinese.
 
38
 
39
  🗞️ **Updates**
40
- * Jul 23, 2024: added support for tools, now available to search for information on the internet.
 
41
  """
42
 
43
 
44
  PLACEHOLDER = """
45
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
46
- <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👻 Ghost 8B Beta</h1>
47
- <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask and share whatever you want ~</p>
48
  </div>
49
  """
50
 
@@ -55,231 +101,94 @@ LICENSE = """
55
  Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
56
  """
57
 
58
- EXAMPLES = [
59
- [
60
- "What is the significance of the Higgs boson in the Standard Model of particle physics?"
61
- ],
62
- [
63
- "Qu'est-ce que l'effet fondateur et comment influence-t-il la diversité génétique d'une population?"
64
- ],
65
- ["Qual è il principio di Le Chatelier e come si applica agli equilibri chimici?"],
66
- [
67
- "¿Qué es una supernova y cuál es su importancia en la formación de elementos pesados en el universo?"
68
- ],
69
- [
70
- "Qual é a definição formal de uma integral de linha e como é utilizada em física?"
71
- ],
72
- [
73
- "Was versteht man unter dem Moho-Diskontinuität und welche Bedeutung hat sie für das Verständnis der Erdkruste?"
74
- ],
75
- [
76
- "Hiện tượng nhà kính là gì và nó ảnh hưởng như thế nào đến biến đổi khí hậu toàn cầu?"
77
- ],
78
- [
79
- "알고리즘의 시간 복잡도가 중요한 이유는 무엇이며, 시간 복잡도를 어떻게 분석하나요?"
80
- ],
81
- ["什么是CRISPR-Cas9基因编辑技术,它在现代生物学研究中的作用是什么?"],
82
- [
83
- "Create a Python function that takes a list of integers and returns the list sorted in ascending order without using the built-in sort or sorted functions."
84
- ],
85
- [
86
- "Écrivez une fonction en C++ qui trouve le plus long sous-tableau contigu avec une somme égale à zéro."
87
- ],
88
- [
89
- "Scrivi una funzione in Java che calcola il fattoriale di un numero utilizzando la ricorsione."
90
- ],
91
- [
92
- "Desarrolla una función en JavaScript que determine si una cadena de texto es un palíndromo, ignorando espacios y signos de puntuación."
93
- ],
94
- ["Implemente uma função em C# que verifique se uma matriz quadrada é simétrica."],
95
- [
96
- "Schreiben Sie eine Funktion in Swift, die eine gegebene Zeichenfolge in umgekehrter Reihenfolge zurückgibt, ohne integrierte Funktionen zu verwenden."
97
- ],
98
- [
99
- "Viết một hàm trong PHP để tìm tất cả các số nguyên tố trong một khoảng cho trước."
100
- ],
101
- [
102
- "파이썬을 사용하여 주어진 이진 트리가 이진 탐색 트리인지 확인하는 함수를 작성하십시오."
103
- ],
104
- [
105
- "用 Go 语言编写一个函数,计算给定字符串中每个字符出现的次数,并返回一个包含字符及其出现次数的映射。"
106
- ],
107
- [
108
- "Can you help me design a detailed project plan for developing a machine learning model for predicting stock prices?"
109
- ],
110
- [
111
- "Pouvez-vous m'aider à organiser un emploi du temps hebdomadaire pour maximiser la productivité de mon équipe de développement logiciel?"
112
- ],
113
- [
114
- "Puoi aiutarmi a creare un piano di sviluppo per un'applicazione mobile che gestisce le prenotazioni di ristoranti?"
115
- ],
116
- [
117
- "¿Podrías ayudarme a elaborar un plan detallado para la implementación de un sistema de gestión de contenido (CMS) en una empresa mediana?"
118
- ],
119
- [
120
- "Você pode me ajudar a planejar uma estratégia de desenvolvimento para um sistema de comércio eletrônico escalável?"
121
- ],
122
- [
123
- "Können Sie mir helfen, einen detaillierten Zeitplan für die Implementierung eines neuen ERP-Systems in unserem Unternehmen zu erstellen?"
124
- ],
125
- [
126
- "Bạn có thể giúp tôi xây dựng một kế hoạch phát triển chi tiết cho dự án xây dựng hệ thống quản lý chuỗi cung ứng không?"
127
- ],
128
- [
129
- "신경망 기반 이미지 인식 모델 개발을 위한 세부 프로젝트 계획을 세우는 데 도움을 줄 수 있나요?"
130
- ],
131
- ["你能帮我制定一个详细的开发计划,用于创建一个基于区块链的分布式账本系统吗?"],
132
- [
133
- "Prove that the sum of the squares of any two sides of a right triangle is equal to the square of the hypotenuse."
134
- ],
135
- [
136
- "Calculez la force gravitationnelle entre deux masses de 10 kg chacune séparées par une distance de 1 mètre."
137
- ],
138
- [
139
- "Determina la formula molecolare di un composto che contiene il 40% di carbonio, il 6.67% di idrogeno e il 53.33% di ossigeno in massa."
140
- ],
141
- [
142
- "Explica la teoría del ciclo económico de Schumpeter y cómo se aplica a la economía moderna."
143
- ],
144
- [
145
- "Calcule a energia potencial gravitacional de um objeto de 5 kg a uma altura de 10 metros acima do solo (g = 9,8 m/s²)."
146
- ],
147
- [
148
- "Beweisen Sie, dass jede Primzahl der Form 4k+1 als Summe zweier Quadrate geschrieben werden kann."
149
- ],
150
- [
151
- "Tính nồng độ mol của dung dịch H₂SO₄ khi hoà tan 98 gam H₂SO₄ vào nước để được 1 lít dung dịch."
152
- ],
153
- ["케인스 경제학의 핵심 개념과 그것이 현대 경제 정책에 미치는 영향을 설명하십시오."],
154
- ["计算一个质量为2 kg的物体在3米高处的重力势能(g = 9.8 m/s²)。"],
155
- [
156
- 'Identify the author of a novel that features a dystopian society where "Big Brother" watches over its citizens and the protagonist works for the Ministry of Truth.'
157
- ],
158
- [
159
- "Quel est le seul mammifère capable de voler activement, souvent associé à la nuit et capable d'écholocalisation?"
160
- ],
161
- [
162
- "Qual è l'opera letteraria italiana che narra il viaggio immaginario di un poeta attraverso Inferno, Purgatorio e Paradiso, guidato da Virgilio e Beatrice?"
163
- ],
164
- [
165
- "¿Qué insecto es conocido por su organización social compleja, su capacidad para producir miel y su comunicación mediante la danza?"
166
- ],
167
- [
168
- "Qual é o fenômeno atmosférico que ocorre quando uma massa de ar quente se encontra com uma massa de ar frio, resultando em uma violenta tempestade giratória?"
169
- ],
170
- [
171
- "Welches literarische Werk beschreibt die Geschichte eines jungen Mädchens, das durch einen Kaninchenbau in eine fantastische Welt voller skurriler Charaktere fällt?"
172
- ],
173
- [
174
- "Động vật nào có thể tái sinh toàn bộ cơ thể từ một mảnh nhỏ của chính nó, thường sống dưới nước và có thể có nhiều xúc tu?"
175
- ],
176
- [
177
- "어떤 자연 현상은 태양빛이 대기 중의 물방울에 반사되고 굴절되어 발생하며, 하늘에 나타나는 여러 색깔의 아치 형태를 띠나요?"
178
- ],
179
- ["这部文学作品讲述了一位绅士和他的侍从的冒险故事,他们在"],
180
- [
181
- "Can you derive the Euler-Lagrange equation from the principle of stationary action in classical mechanics?"
182
- ],
183
- [
184
- "Expliquez la notion de « différence ontologique » chez Martin Heidegger et son importance pour la phénoménologie."
185
- ],
186
- [
187
- "Qual è il significato simbolico del colore blu nei dipinti di Giotto di Bondone durante il Rinascimento?"
188
- ],
189
- [
190
- "¿Cómo afecta el cambio de código a la estructura gramatical en comunidades bilingües de habla español-inglés?"
191
- ],
192
- [
193
- "Qual é o impacto da política monetária não convencional no controle da inflação durante uma crise econômica?"
194
- ],
195
- [
196
- "Erklären Sie den Unterschied zwischen deterministischen und nicht-deterministischen endlichen Automaten und ihre Anwendungsbereiche."
197
- ],
198
- [
199
- "Giải thích cơ chế của quá trình phiên mã ngược (reverse transcription) và tầm quan trọng của nó trong nghiên cứu HIV/AIDS."
200
- ],
201
- ["조선시대 성리학이 한국 사회와 문화에 미친 영향을 설명하세요."],
202
- ["如何解释量子纠缠现象,以及它在量子计算中的潜在应用?"],
203
- [
204
- "How can you design a daily schedule that maximizes productivity for a remote worker who has multiple meetings and project deadlines?"
205
- ],
206
- [
207
- "Quels sont les meilleures stratégies pour gérer les conflits au sein d'une équipe multiculturelle travaillant sur un projet commun?"
208
- ],
209
- [
210
- "Quali sono i migliori consigli per mantenere un equilibrio tra vita professionale e vita privata in un ambiente lavorativo stressante?"
211
- ],
212
- [
213
- "¿Cómo se puede elaborar un plan financiero personal efectivo que incluya ahorro para la jubilación, inversión y manejo de deudas?"
214
- ],
215
- [
216
- "Quais são as melhores práticas para implementar metodologias ágeis em uma equipe de desenvolvimento de software?"
217
- ],
218
- [
219
- "Welche Strategien können verwendet werden, um ein starkes berufliches Netzwerk aufzubauen und zu pflegen, insbesondere in der Tech-Branche?"
220
- ],
221
- [
222
- "Những bước nào cần thiết để xây dựng một lộ trình phát triển sự nghiệp bền vững trong lĩnh vực công nghệ thông tin?"
223
- ],
224
- ["프로젝트�� 범위 변동을 효과적으로 관리하기 위한 최고의 방법은 무엇인가요?"],
225
- ["在快速变化的职场环境中,如何有效地实现工作与生活的平衡?"],
226
- [
227
- "Write an argumentative essay discussing the pros and cons of artificial intelligence in the workplace, including potential ethical concerns."
228
- ],
229
- [
230
- "Analysez les impacts sociaux et économiques de la digitalisation sur les petites entreprises en France."
231
- ],
232
- [
233
- "Scrivi un'email formale al direttore di una rivista per proporre un articolo sulla sostenibilità ambientale nelle città italiane."
234
- ],
235
- [
236
- "Elabora un informe detallado sobre los efectos del cambio climático en la biodiversidad de la región amazónica."
237
- ],
238
- [
239
- "Analise criticamente os principais pontos abordados no relatório anual do Banco Mundial sobre a pobreza global."
240
- ],
241
- [
242
- "Erstellen Sie eine technische Dokumentation für die Implementierung eines neuen Software-Features in einer bestehenden Anwendung."
243
- ],
244
- [
245
- "Viết một bài luận phân tích về tác động của cuộc cách mạng công nghiệp 4.0 đối với thị trường lao động Việt Nam."
246
- ],
247
- [
248
- "인공지능의 윤리적 문제에 대한 연구 논문을 작성하고, 다양한 사례를 통해 그 영향을 분석하세요."
249
- ],
250
- ["分析鲁迅的小说《阿Q正传》中反映的中国社会问题和作者的批判态度。"],
251
- ]
252
-
253
  if not torch.cuda.is_available():
254
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
255
 
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  if torch.cuda.is_available():
258
- model_id = "ghost-x/ghost-8b-beta"
259
  hf_serect = os.getenv("HF_TOKEN", None)
260
- model = AutoModelForCausalLM.from_pretrained(
261
- model_id,
 
 
 
 
262
  device_map="auto",
263
  torch_dtype=torch.bfloat16,
264
- attn_implementation="flash_attention_2",
265
  trust_remote_code=True,
266
  token=hf_serect,
267
  )
268
- tokenizer = AutoTokenizer.from_pretrained(
269
- model_id,
270
  trust_remote_code=True,
271
  token=hf_serect,
272
  )
273
  SelfExtend.apply(
274
- model,
275
  group_size=16,
276
  window_size=512,
277
  enable_flash_attention=True,
278
  flash_attention_impl="flash_attn",
279
  )
280
- model.generation_config.max_length = 123392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- waiting_tools_timeout = 10
 
283
  supported_tools = json.dumps(
284
  [
285
  {
@@ -319,6 +228,22 @@ supported_tools = json.dumps(
319
 
320
  @lru_cache(maxsize=128)
321
  def extract_text_from_webpage(html_content):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  soup = BeautifulSoup(html_content, "html.parser")
323
  for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
324
  tag.extract()
@@ -330,6 +255,23 @@ def search_with_wikipedia(
330
  query: str,
331
  language: str = "en",
332
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  all_results = []
334
  try:
335
  wikipedia.set_lang(language)
@@ -346,9 +288,39 @@ def search_with_google(
346
  language: str = "en",
347
  ssl_verify: bool = None,
348
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  all_results = []
 
 
350
  max_chars_per_page = 4096
 
 
351
  with requests.Session() as session:
 
352
  resp = session.get(
353
  url="https://www.google.com/search",
354
  headers={
@@ -363,36 +335,118 @@ def search_with_google(
363
  timeout=timeout,
364
  verify=ssl_verify,
365
  )
 
 
366
  resp.raise_for_status()
 
 
367
  soup = BeautifulSoup(resp.text, "html.parser")
 
 
368
  result_block = soup.find_all("div", attrs={"class": "g"})
 
 
369
  for result in result_block:
 
370
  link = result.find("a", href=True)
 
 
371
  if link:
372
  link = link["href"]
373
  try:
 
374
  webpage = session.get(
375
  link,
376
  headers={
377
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
378
  },
379
  )
 
 
380
  webpage.raise_for_status()
 
 
381
  visible_text = extract_text_from_webpage(webpage.text)
 
 
382
  if len(visible_text) > max_chars_per_page:
383
  visible_text = visible_text[:max_chars_per_page]
 
 
384
  all_results.append({"link": link, "text": visible_text})
385
  except requests.exceptions.RequestException as e:
 
386
  print(f"Error fetching or processing {link}: {e}")
387
  pass
388
  else:
389
  pass
 
 
390
  return all_results
391
 
392
 
393
- @spaces.GPU(duration=180)
394
- def generate(
395
- message: str,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  chat_history: list[tuple[str, str]],
397
  allow_used_tools: bool = True,
398
  system_prompt: str = "",
@@ -401,48 +455,44 @@ def generate(
401
  top_p: float = 0.95,
402
  top_k: int = 50,
403
  repetition_penalty: float = 1.0,
404
- other_client_info: str = None,
405
  ) -> Iterator[str]:
406
- # print()
407
- # print("allow_used_tools:\n", allow_used_tools)
408
- # print("system_prompt:\n", system_prompt)
409
- # print("max_new_tokens:\n", max_new_tokens)
410
- # print("temperature:\n", temperature)
411
-
412
  def build_input_ids(
413
  apply_tools: bool = None,
414
  references=None,
415
  ):
416
  conversation = []
 
 
417
  if system_prompt:
418
  conversation.append({"role": "system", "content": system_prompt})
 
 
419
  if apply_tools is True:
420
  conversation.append({"role": "tools", "content": supported_tools})
421
 
 
422
  if references is None:
423
- references = [other_client_info]
424
  else:
425
- references.insert(0, other_client_info)
426
 
427
  if (
428
  references is not None
429
  and isinstance(references, list)
430
  and len(references) > 0
431
  ):
 
 
432
  conversation.append(
433
  {
434
  "role": "refs",
435
- "content": json.dumps(
436
- {
437
- "instructions": "These are only general documents used for reference to give the most accurate and honest answers possible. Ignore it if it's irrelevant and don't overuse it.",
438
- "documents": references,
439
- },
440
- indent=2,
441
- ensure_ascii=False,
442
- ),
443
  }
444
  )
445
 
 
446
  for user, assistant in chat_history:
447
  conversation.extend(
448
  [
@@ -450,12 +500,28 @@ def generate(
450
  {"role": "assistant", "content": assistant},
451
  ]
452
  )
453
- conversation.append({"role": "user", "content": message})
454
 
455
- input_ids = tokenizer.apply_chat_template(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  conversation, add_generation_prompt=True, return_tensors="pt"
457
  )
458
- input_ids = input_ids.to(model.device)
 
 
459
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
460
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
461
  gr.Warning(
@@ -463,10 +529,13 @@ def generate(
463
  )
464
  return input_ids
465
 
 
466
  def generate_chat_responses(
467
  previous_response: str = None,
468
  ):
469
  document_references = []
 
 
470
  if previous_response is not None:
471
  scheduled_tools_runs = None
472
  try:
@@ -481,6 +550,7 @@ def generate(
481
  print(e)
482
  pass
483
 
 
484
  if (
485
  scheduled_tools_runs is not None
486
  and scheduled_tools_runs["name"] == "search_on_internet"
@@ -488,9 +558,8 @@ def generate(
488
  keyword = scheduled_tools_runs["arguments"]["keyword"]
489
  search_type = scheduled_tools_runs["arguments"]["type"]
490
  language = scheduled_tools_runs["arguments"]["language"]
491
- print(
492
- "scheduled_tools_runs:", scheduled_tools_runs
493
- )
494
  if search_type == "wikipedia":
495
  gr.Info(
496
  "Searching for information on the Wikipedia.",
@@ -501,6 +570,7 @@ def generate(
501
  search_with_wikipedia(query=keyword, language=language)
502
  )
503
 
 
504
  gr.Info("Searching for information on the Google.")
505
  document_references.extend(
506
  search_with_google(
@@ -509,20 +579,25 @@ def generate(
509
  num_results=3,
510
  )
511
  )
512
- print(
513
- "document_references:", document_references
514
- )
515
 
 
516
  apply_tools = (
517
  True if allow_used_tools is True and previous_response is None else False
518
  )
 
 
519
  input_ids = build_input_ids(
520
  apply_tools=apply_tools,
521
  references=document_references,
522
  )
 
 
523
  streamer = TextIteratorStreamer(
524
- tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
525
  )
 
 
526
  generate_kwargs = dict(
527
  input_ids=input_ids,
528
  streamer=streamer,
@@ -537,9 +612,14 @@ def generate(
537
  generate_kwargs["top_p"] = top_p
538
  generate_kwargs["top_k"] = top_k
539
 
540
- t = Thread(target=model.generate, kwargs=generate_kwargs)
 
541
  t.start()
542
 
 
 
 
 
543
  state = {
544
  "mark": None,
545
  "respond": False,
@@ -556,6 +636,7 @@ def generate(
556
  state["respond"] = True
557
  yield "".join(outputs)
558
 
 
559
  if (
560
  apply_tools is True
561
  and state["respond"] is False
@@ -564,22 +645,126 @@ def generate(
564
  previous_response = "".join(outputs)
565
  yield from generate_chat_responses(previous_response=previous_response)
566
 
 
567
  yield from generate_chat_responses(previous_response=None)
568
 
569
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  chatbot = gr.Chatbot(
571
- height=500, placeholder=PLACEHOLDER, label="Ghost 8B Beta", show_copy_button=True
 
 
 
572
  )
573
 
574
  chat_interface = gr.ChatInterface(
575
  fn=generate,
576
  chatbot=chatbot,
577
  fill_height=True,
 
 
 
 
 
578
  additional_inputs=[
579
  gr.Checkbox(
580
- label="Allow used tools (available: search on internet)", value=False
 
581
  ),
582
- gr.Textbox(label="System prompt", lines=6),
583
  gr.Slider(
584
  label="Max new tokens",
585
  minimum=1,
@@ -616,23 +801,26 @@ chat_interface = gr.ChatInterface(
616
  value=1.0,
617
  ),
618
  gr.Textbox(
619
- label="Other client information",
 
620
  lines=1,
621
- value="This user's current time: {}".format(time.strftime("%Y-%m-%d")),
 
 
622
  visible=False,
623
  ),
624
  ],
625
  stop_btn="Stop",
626
  cache_examples=False,
627
- examples=EXAMPLES,
628
- examples_per_page=9,
629
  concurrency_limit=100,
630
  )
631
 
632
- with gr.Blocks(fill_height=True, css="style.css") as demo:
633
  gr.Markdown(DESCRIPTION)
634
  chat_interface.render()
635
  gr.Markdown(LICENSE)
636
 
637
  if __name__ == "__main__":
638
- demo.queue(max_size=20).launch(share=True)
 
3
  import subprocess
4
  import json
5
  import requests
6
+ import zlib
7
+ from PIL import Image
8
 
9
  subprocess.run(
10
  f"pip install flash-attn --no-build-isolation",
 
19
  import gradio as gr
20
  import spaces
21
  import torch
22
+ import logging
23
  import wikipedia
24
  import time
25
  import SelfExtend
26
+ from transformers import (
27
+ AutoModelForCausalLM,
28
+ AutoTokenizer,
29
+ AutoProcessor,
30
+ TextIteratorStreamer,
31
+ )
32
+ from transformers.dynamic_module_utils import get_imports
33
  from bs4 import BeautifulSoup
34
  from functools import lru_cache
35
 
36
+ logging.basicConfig(level=logging.INFO)
37
+ logger = logging.getLogger(__name__)
38
+
39
 
40
  MAX_MAX_NEW_TOKENS = 8192
41
  DEFAULT_MAX_NEW_TOKENS = 2048
42
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "123392"))
43
 
44
+ DEFAULT_SYSTEM_PROMPT = """\
45
+ You are a helpful and intelligent AI, trained by Ghost X and named Ghost 8B Beta (often referred to as Ghost Beta).
46
+ You're known for your honesty, spreading positivity, and always striving to assist users. Your expertise lies in understanding their needs and providing insightful suggestions, drawing upon your knowledge and interests. If a query exceeds your understanding, you'll be upfront and state you're unsure, avoiding fabricated responses. You enjoy incorporating emojis to enhance interactions, but maintain a balanced approach for a natural flow. Let's engage in a meaningful conversation, keeping in mind the user's language.
47
+ """
48
+
49
+ # DEFAULT_SYSTEM_PROMPT = """\
50
+ # You are a helpful and intelligent AI, trained by Ghost X and named Ghost 8B Beta (often referred to as 8B Beta).
51
+ # You're known for your honesty, spreading positivity, and always striving to assist users. Your expertise lies in understanding their needs and providing insightful suggestions, drawing upon your knowledge and interests. If a query exceeds your understanding, you'll be upfront and state you're unsure, avoiding fabricated responses. You enjoy incorporating emojis to enhance interactions, but maintain a balanced approach for a natural flow. Let's engage in a meaningful conversation, keeping in mind the user's language.
52
+
53
+ # A guide to dealing with extremely complex questions or challenges. Follow these steps to solve them:
54
+ # 1. Deconstructing Complexity
55
+ # Imagine a puzzle with intricate pieces. I'll present a challenging question. Your task: Break down this question into smaller, distinct parts. Label each part with a specific theme or aspect related to the problem. This will help us understand the multifaceted nature of the query and prepare for a structured solution.
56
+ # 2. Reconstructing Insights
57
+ # Once we've successfully dissected the problem into manageable components, assemble these parts like a puzzle. Focus on identifying connections, potential overlaps, and key information from each theme. The goal is to reconstruct a cohesive, well-rounded answer that addresses the original complexity of the question.
58
+ # """
59
+
60
+ HEAD = """
61
+ <script>
62
+ function schedule_updates() {
63
+ const client_info_element = document.querySelector("#client_info textarea");
64
+ client_info_element.value = "The current time is now: " + new Date().toLocaleString('en-US', {weekday: 'short'});
65
+ client_info_element.dispatchEvent(new Event('input'));
66
+ }
67
+
68
+ function bootstrap() {
69
+ setInterval(schedule_updates, 1000);
70
+ };
71
+
72
+ bootstrap();
73
+ </script>
74
+ """
75
+
76
  DESCRIPTION = """\
77
+ # Ghost 8B Beta (β, 128k)
78
 
79
+ **Ghost 8B Beta** outperforms leading models like Llama 3.1 8B Instruct and GPT-3.5 Turbo in lc_winrate scores. It also surpasses Claude 3 Opus, Claude 3 Sonnet, GPT-4, and Mistral Large in AlpacaEval 2.0 winrate scores. The model offers two context length versions: [8k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-8k) and [128k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-128k), both with built-in multilingual function support.
80
 
81
+ Supported languages: 🇬🇧 English, 🇻🇳 Vietnamese, 🇰🇷 Korean, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇨🇳 Chinese, 🇫🇷 French, 🇮🇹 Italian, 🇩🇪 German, 🇯🇵 Japanese, 🇷🇺 Russian, 🇵🇱 Polish, 🇳🇱 Dutch, 🇮🇳 Hindi, 🇹🇷 Turkish, 🇮🇩 Indonesian.
82
+ Note: with the image will be used another model to explain rather than using directly the Ghost 8B Beta model.
83
 
84
  🗞️ **Updates**
85
+ * Aug 16, 2024: Released version 160824, expanding language support from 9 to 16 languages and improving math, reasoning, and instruction-following capabilities.
86
+ * Jul 23, 2024: Added internet search tools.
87
  """
88
 
89
 
90
  PLACEHOLDER = """
91
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
92
+ <h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👋 Welcome to the Ghost 8B Beta Playground! 🎉</h1>
93
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask me anything and let's have some fun! 🤔💡</p>
94
  </div>
95
  """
96
 
 
101
  Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
102
  """
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  if not torch.cuda.is_available():
105
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
106
 
107
 
108
+ def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
109
+ """
110
+ Workaround for fixed get_imports function.
111
+
112
+ @args:
113
+ filename (str | os.PathLike): The filename or path to the file.
114
+
115
+ @returns:
116
+ list[str]: The list of imports.
117
+
118
+ @remarks:
119
+ - This function is a workaround for the fixed get_imports function.
120
+ - It checks if the filename ends with "/modeling_florence2.py".
121
+ - If it doesn't, it calls the original get_imports function.
122
+ - If it does, it calls the original get_imports function and removes the "flash_attn" import.
123
+
124
+ @usage:
125
+ ```python
126
+ from unittest.mock import patch
127
+ image_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
128
+ with patch(
129
+ "transformers.dynamic_module_utils.get_imports", workaround_fixed_get_imports
130
+ ):
131
+ ```
132
+ """
133
+
134
+ if not str(filename).endswith("/modeling_florence2.py"):
135
+ return get_imports(filename)
136
+ imports = get_imports(filename)
137
+ imports.remove("flash_attn")
138
+ return imports
139
+
140
+
141
  if torch.cuda.is_available():
 
142
  hf_serect = os.getenv("HF_TOKEN", None)
143
+ attn_implementation = "flash_attention_2"
144
+
145
+ chat_model_id = "ghost-x/ghost-8b-beta-1608"
146
+ chat_device = torch.device("cuda")
147
+ chat_model = AutoModelForCausalLM.from_pretrained(
148
+ chat_model_id,
149
  device_map="auto",
150
  torch_dtype=torch.bfloat16,
151
+ attn_implementation=attn_implementation,
152
  trust_remote_code=True,
153
  token=hf_serect,
154
  )
155
+ chat_tokenizer = AutoTokenizer.from_pretrained(
156
+ chat_model_id,
157
  trust_remote_code=True,
158
  token=hf_serect,
159
  )
160
  SelfExtend.apply(
161
+ chat_model,
162
  group_size=16,
163
  window_size=512,
164
  enable_flash_attention=True,
165
  flash_attention_impl="flash_attn",
166
  )
167
+ chat_model.generation_config.max_length = 123392
168
+
169
+ image_model_id = "microsoft/Florence-2-large"
170
+ # image_device = "cuda" if torch.cuda.is_available() else "cpu"
171
+ # image_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
172
+ image_device = "cpu"
173
+ image_torch_dtype = torch.float32
174
+ image_model = (
175
+ AutoModelForCausalLM.from_pretrained(
176
+ image_model_id,
177
+ torch_dtype=image_torch_dtype,
178
+ trust_remote_code=True,
179
+ token=hf_serect,
180
+ )
181
+ .to(image_device)
182
+ .eval()
183
+ )
184
+ image_processor = AutoProcessor.from_pretrained(
185
+ image_model_id,
186
+ trust_remote_code=True,
187
+ token=hf_serect,
188
+ )
189
 
190
+
191
+ waiting_tools_timeout = 5
192
  supported_tools = json.dumps(
193
  [
194
  {
 
228
 
229
  @lru_cache(maxsize=128)
230
  def extract_text_from_webpage(html_content):
231
+ """
232
+ Extracts visible text from an HTML webpage.
233
+
234
+ @args:
235
+ html_content (str): The HTML content of the webpage.
236
+
237
+ @returns:
238
+ str: The visible text extracted from the webpage.
239
+
240
+ @remarks:
241
+ - This function uses the BeautifulSoup library to parse the HTML content.
242
+ - It removes certain tags (script, style, header, footer, nav, form, svg) from the parsed HTML.
243
+ - The remaining visible text is then extracted using the `get_text` method of BeautifulSoup.
244
+ - The extracted text is stripped of leading/trailing whitespace and separated by a single space.
245
+ """
246
+
247
  soup = BeautifulSoup(html_content, "html.parser")
248
  for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
249
  tag.extract()
 
255
  query: str,
256
  language: str = "en",
257
  ):
258
+ """
259
+ Search for a given query on Wikipedia and return the summary.
260
+
261
+ @args:
262
+ query (str): The search query.
263
+ language (str, optional): The language code for the Wikipedia page. Defaults to "en".
264
+
265
+ @returns:
266
+ list: A list containing the summary of the Wikipedia page.
267
+
268
+ @remarks:
269
+ - This function uses the Wikipedia API to search for the given query.
270
+ - The language parameter determines the language of the Wikipedia page to search.
271
+ - If the search is successful, the function returns a list containing the summary of the page.
272
+ - If an exception occurs during the search, an empty list is returned.
273
+ """
274
+
275
  all_results = []
276
  try:
277
  wikipedia.set_lang(language)
 
288
  language: str = "en",
289
  ssl_verify: bool = None,
290
  ):
291
+ """
292
+ Searches Google for the given query and returns a list of search results.
293
+
294
+ @args:
295
+ query (str): The search query.
296
+ num_results (int, optional): The number of search results to retrieve. Defaults to 3.
297
+ timeout (int, optional): The timeout value for the HTTP requests. Defaults to 5.
298
+ language (str, optional): The language for the search results. Defaults to "en".
299
+ ssl_verify (bool, optional): Whether to verify SSL certificates. Defaults to None.
300
+
301
+ @returns:
302
+ list: A list of dictionaries containing the link and visible text of each search result.
303
+
304
+ @remarks:
305
+ - This function uses the requests library to send HTTP requests to Google.
306
+ - It sets the User-Agent header to mimic a Firefox browser.
307
+ - The search results are retrieved from the HTML response using BeautifulSoup.
308
+ - Each search result is represented as a dictionary with "link" and "text" keys.
309
+ - The "link" key contains the URL of the search result.
310
+ - The "text" key contains the visible text extracted from the search result webpage.
311
+ - If the visible text exceeds 4096 characters, it is truncated to that length.
312
+ - If an error occurs while fetching or processing a search result, it is printed and ignored.
313
+ """
314
+
315
+ # Initialize an empty list to store the search results
316
  all_results = []
317
+
318
+ # Define the maximum number of characters per page
319
  max_chars_per_page = 4096
320
+
321
+ # Create a session object to send HTTP requests
322
  with requests.Session() as session:
323
+ # Send a GET request to Google search with the specified query parameters
324
  resp = session.get(
325
  url="https://www.google.com/search",
326
  headers={
 
335
  timeout=timeout,
336
  verify=ssl_verify,
337
  )
338
+
339
+ # Raise an exception if the response status code is not successful
340
  resp.raise_for_status()
341
+
342
+ # Parse the HTML response using BeautifulSoup
343
  soup = BeautifulSoup(resp.text, "html.parser")
344
+
345
+ # Find all the result blocks in the HTML
346
  result_block = soup.find_all("div", attrs={"class": "g"})
347
+
348
+ # Iterate over each result block
349
  for result in result_block:
350
+ # Find the link element within the result block
351
  link = result.find("a", href=True)
352
+
353
+ # If a link is found, extract the URL and process the webpage
354
  if link:
355
  link = link["href"]
356
  try:
357
+ # Send a GET request to the link URL
358
  webpage = session.get(
359
  link,
360
  headers={
361
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
362
  },
363
  )
364
+
365
+ # Raise an exception if the response status code is not successful
366
  webpage.raise_for_status()
367
+
368
+ # Extract the visible text from the webpage
369
  visible_text = extract_text_from_webpage(webpage.text)
370
+
371
+ # Truncate the visible text if it exceeds the maximum number of characters per page
372
  if len(visible_text) > max_chars_per_page:
373
  visible_text = visible_text[:max_chars_per_page]
374
+
375
+ # Append the link and visible text to the search results list
376
  all_results.append({"link": link, "text": visible_text})
377
  except requests.exceptions.RequestException as e:
378
+ # Print an error message if there is an error fetching or processing the link
379
  print(f"Error fetching or processing {link}: {e}")
380
  pass
381
  else:
382
  pass
383
+
384
+ # Return the search results
385
  return all_results
386
 
387
 
388
+ @lru_cache(maxsize=128)
389
+ def extract_text_from_image(file: str) -> str:
390
+ """
391
+ Extracts text from an image file.
392
+
393
+ @args:
394
+ file (str): The path or URL of the image file.
395
+
396
+ @returns:
397
+ str: The extracted text from the image.
398
+
399
+ @remarks:
400
+ - This function uses an LRU cache to store previously processed images for faster retrieval.
401
+ - The image file can be either a local file path or a URL.
402
+ - The function opens the image file using the PIL library.
403
+ - The function processes the image using an image processor.
404
+ - The processed image is then passed to a text generation model to generate text.
405
+ - The generated text is post-processed to obtain the final extracted text.
406
+ """
407
+ # Define the task and load the image
408
+ task = "<MORE_DETAILED_CAPTION>"
409
+ image = Image.open(
410
+ requests.get(file, stream=True).raw
411
+ if file.startswith("http")
412
+ else open(file, "rb")
413
+ )
414
+
415
+ if image.mode != "RGB":
416
+ image = image.convert("RGB")
417
+
418
+ # Preprocess the image using the image processor
419
+ inputs = image_processor(text=task, images=image, return_tensors="pt").to(
420
+ "cpu", image_torch_dtype
421
+ )
422
+
423
+ # Generate text based on the input image
424
+ generated_ids = image_model.generate(
425
+ input_ids=inputs["input_ids"],
426
+ pixel_values=inputs["pixel_values"],
427
+ max_new_tokens=1024,
428
+ num_beams=3,
429
+ do_sample=False,
430
+ )
431
+
432
+ # Decode the generated text and post-process the answer
433
+ generated_text = image_processor.batch_decode(
434
+ generated_ids, skip_special_tokens=False
435
+ )[0]
436
+ parsed_answer = image_processor.post_process_generation(
437
+ generated_text,
438
+ task=task,
439
+ image_size=(image.width, image.height),
440
+ )
441
+
442
+ # Return the parsed answer for the specified task
443
+ return parsed_answer[task]
444
+
445
+
446
+ @spaces.GPU(duration=90)
447
+ def generate_chat(
448
+ uuid: str,
449
+ message: dict,
450
  chat_history: list[tuple[str, str]],
451
  allow_used_tools: bool = True,
452
  system_prompt: str = "",
 
455
  top_p: float = 0.95,
456
  top_k: int = 50,
457
  repetition_penalty: float = 1.0,
458
+ client_info: str = None,
459
  ) -> Iterator[str]:
460
+ # Build the input_ids for the chat conversation
 
 
 
 
 
461
  def build_input_ids(
462
  apply_tools: bool = None,
463
  references=None,
464
  ):
465
  conversation = []
466
+
467
+ # Add the system prompt to the conversation
468
  if system_prompt:
469
  conversation.append({"role": "system", "content": system_prompt})
470
+
471
+ # Add the tools role to the conversation if apply_tools is True
472
  if apply_tools is True:
473
  conversation.append({"role": "tools", "content": supported_tools})
474
 
475
+ # Add the references role to the conversation
476
  if references is None:
477
+ references = [client_info]
478
  else:
479
+ references.insert(0, client_info)
480
 
481
  if (
482
  references is not None
483
  and isinstance(references, list)
484
  and len(references) > 0
485
  ):
486
+ formatted_references = f"Analyze the provided references, extract relevant information to provide accurate and objective feedback. This reference information may include: conversation context, assistant or user memories, reasoning guides, problem-solving suggestions, assistant rules, etc.\nIf the reference is not relevant, ignore it. Try to have a balanced approach, avoiding over-reliance on the documentation."
487
+ formatted_references += "\n\n" + ("\n\n".join(references))
488
  conversation.append(
489
  {
490
  "role": "refs",
491
+ "content": formatted_references,
 
 
 
 
 
 
 
492
  }
493
  )
494
 
495
+ # Add the chat history to the conversation
496
  for user, assistant in chat_history:
497
  conversation.extend(
498
  [
 
500
  {"role": "assistant", "content": assistant},
501
  ]
502
  )
 
503
 
504
+ # Add the user message with image attachments to the conversation
505
+ conversation.append(
506
+ {
507
+ "role": "user",
508
+ "content": (
509
+ f"{' & '.join(message['attachments'])}\n\n{message['text']}"
510
+ if "attachments" in message and len(message["attachments"]) > 0
511
+ else f"{message['text']}"
512
+ ),
513
+ }
514
+ )
515
+
516
+ logger.debug(f"UUID: {uuid} - Conversation: {conversation}")
517
+
518
+ # Apply the chat template to convert the conversation into input_ids
519
+ input_ids = chat_tokenizer.apply_chat_template(
520
  conversation, add_generation_prompt=True, return_tensors="pt"
521
  )
522
+ input_ids = input_ids.to(chat_model.device)
523
+
524
+ # Trim the input_ids if it exceeds the maximum token length
525
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
526
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
527
  gr.Warning(
 
529
  )
530
  return input_ids
531
 
532
+ # Generate chat responses based on the input_ids
533
  def generate_chat_responses(
534
  previous_response: str = None,
535
  ):
536
  document_references = []
537
+
538
+ # Check if the previous response contains scheduled tool runs
539
  if previous_response is not None:
540
  scheduled_tools_runs = None
541
  try:
 
550
  print(e)
551
  pass
552
 
553
+ # If scheduled tool runs exist, perform the corresponding searches
554
  if (
555
  scheduled_tools_runs is not None
556
  and scheduled_tools_runs["name"] == "search_on_internet"
 
558
  keyword = scheduled_tools_runs["arguments"]["keyword"]
559
  search_type = scheduled_tools_runs["arguments"]["type"]
560
  language = scheduled_tools_runs["arguments"]["language"]
561
+
562
+ # Search on Wikipedia if the search type is "wikipedia"
 
563
  if search_type == "wikipedia":
564
  gr.Info(
565
  "Searching for information on the Wikipedia.",
 
570
  search_with_wikipedia(query=keyword, language=language)
571
  )
572
 
573
+ # Search on Google
574
  gr.Info("Searching for information on the Google.")
575
  document_references.extend(
576
  search_with_google(
 
579
  num_results=3,
580
  )
581
  )
582
+ print("document_references:", document_references)
 
 
583
 
584
+ # Determine if tools should be applied based on the allow_used_tools flag
585
  apply_tools = (
586
  True if allow_used_tools is True and previous_response is None else False
587
  )
588
+
589
+ # Build the input_ids for the chat conversation
590
  input_ids = build_input_ids(
591
  apply_tools=apply_tools,
592
  references=document_references,
593
  )
594
+
595
+ # Create a TextIteratorStreamer to generate chat responses
596
  streamer = TextIteratorStreamer(
597
+ chat_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
598
  )
599
+
600
+ # Set the generation parameters
601
  generate_kwargs = dict(
602
  input_ids=input_ids,
603
  streamer=streamer,
 
612
  generate_kwargs["top_p"] = top_p
613
  generate_kwargs["top_k"] = top_k
614
 
615
+ # Start the generation process in a separate thread
616
+ t = Thread(target=chat_model.generate, kwargs=generate_kwargs)
617
  t.start()
618
 
619
+ logger.debug(
620
+ f"UUID: {uuid} - Is apply tools: {apply_tools} - Is apply documents: {len(document_references) > 0} - Is previous response: {previous_response is not None} - Start generating chat responses"
621
+ )
622
+
623
  state = {
624
  "mark": None,
625
  "respond": False,
 
636
  state["respond"] = True
637
  yield "".join(outputs)
638
 
639
+ # If tools are applied and no response is generated within the timeout, continue generating chat responses
640
  if (
641
  apply_tools is True
642
  and state["respond"] is False
 
645
  previous_response = "".join(outputs)
646
  yield from generate_chat_responses(previous_response=previous_response)
647
 
648
+ # Yield the generated chat responses
649
  yield from generate_chat_responses(previous_response=None)
650
 
651
 
652
+ def generate(
653
+ message: dict,
654
+ chat_history: list[tuple[str, str]],
655
+ allow_used_tools: bool = True,
656
+ system_prompt: str = "",
657
+ max_new_tokens: int = 1536,
658
+ temperature: float = 0.4,
659
+ top_p: float = 0.95,
660
+ top_k: int = 50,
661
+ repetition_penalty: float = 1.0,
662
+ client_info: str = None,
663
+ ) -> Iterator[str]:
664
+ # Generate a unique identifier using the The current time is now
665
+ uuid = zlib.crc32(str.encode(str(time.time())))
666
+ logger.info(f"UUID: {uuid} - Starting image text extraction process")
667
+
668
+ # Limit the number of files to process to 2
669
+ if len(message["files"]) > 2:
670
+ gr.Warning("Only the first 2 images will be processed.")
671
+
672
+ message["files"] = message["files"][:2]
673
+
674
+ # Extract text from each image file and replace the file path with an attachment tag containing the extracted text
675
+ message["attachments"] = handle_file_extraction(
676
+ files=list(message["files"]), uuid=uuid
677
+ )
678
+ logger.debug(f"UUID: {uuid} - Image text extraction process completed")
679
+
680
+ logger.debug(f"UUID: {uuid} - Previous chat history: {chat_history}")
681
+ for idx, chat_pair in enumerate(chat_history):
682
+ user_message, assistant_message = chat_pair
683
+ if not isinstance(user_message, str) and assistant_message is None:
684
+ text_descriptions = handle_file_extraction(
685
+ files=list(user_message), uuid=uuid
686
+ )
687
+ chat_input = (
688
+ f"{' & '.join(text_descriptions)}\n\n{chat_history[idx + 1][0]}"
689
+ )
690
+ chat_history[idx + 1][0] = chat_input
691
+ chat_history[idx] = [None, None]
692
+ logger.debug(
693
+ f"UUID: {uuid} - Updated chat history: {chat_history} - Updated chat input: {chat_input}"
694
+ )
695
+
696
+ chat_history = list(
697
+ filter(lambda x: x[0] is not None and x[1] is not None, chat_history)
698
+ )
699
+ logger.debug(f"UUID: {uuid} - Filtered chat history: {chat_history}")
700
+
701
+ yield from generate_chat(
702
+ uuid=uuid,
703
+ message=message,
704
+ chat_history=chat_history,
705
+ allow_used_tools=allow_used_tools,
706
+ system_prompt=system_prompt,
707
+ max_new_tokens=max_new_tokens,
708
+ temperature=temperature,
709
+ top_p=top_p,
710
+ top_k=top_k,
711
+ repetition_penalty=repetition_penalty,
712
+ client_info=client_info,
713
+ )
714
+
715
+
716
+ def handle_file_extraction(files: list[str], uuid: str):
717
+ """
718
+ Extracts text from images in the given message's files and returns a list of attachments.
719
+
720
+ @args:
721
+ message (dict): The message containing files to extract text from.
722
+ uuid (str): The UUID associated with the extraction process.
723
+
724
+ @returns:
725
+ list: A list of attachments, each represented as a string.
726
+
727
+ @memarks:
728
+ - This function iterates over the files in the message and extracts text from each image file.
729
+ - The extracted text is logged along with the UUID and file information.
730
+ - The extracted text is then added to the attachments list as a string representation of an attachment.
731
+ - The attachments list is returned at the end of the function.
732
+ """
733
+
734
+ attachments = []
735
+ for idx, file_to_extract in enumerate(files):
736
+ extracted_text = extract_text_from_image(file=file_to_extract)
737
+ logger.info(
738
+ f"UUID: {uuid} - File: {file_to_extract} - Extracted text: {extracted_text}"
739
+ )
740
+ attachments.append(
741
+ f'<attachment index="{idx}" type="image" description="{extracted_text}" />'
742
+ )
743
+ return attachments
744
+
745
+
746
  chatbot = gr.Chatbot(
747
+ height=500,
748
+ placeholder=PLACEHOLDER,
749
+ label="Ghost 8B Beta (β, 128k)",
750
+ show_copy_button=True,
751
  )
752
 
753
  chat_interface = gr.ChatInterface(
754
  fn=generate,
755
  chatbot=chatbot,
756
  fill_height=True,
757
+ multimodal=True,
758
+ textbox=gr.MultimodalTextbox(
759
+ file_types=["image"],
760
+ placeholder="Type a message...",
761
+ ),
762
  additional_inputs=[
763
  gr.Checkbox(
764
+ label="Allow used tools (available: search on internet)",
765
+ value=False,
766
  ),
767
+ gr.Textbox(label="System prompt", lines=6, value=DEFAULT_SYSTEM_PROMPT),
768
  gr.Slider(
769
  label="Max new tokens",
770
  minimum=1,
 
801
  value=1.0,
802
  ),
803
  gr.Textbox(
804
+ elem_id="client_info",
805
+ label="Client info",
806
  lines=1,
807
+ value="The current time is now: {}".format(
808
+ time.strftime("%A, %D %B %Y %H:%M:%S")
809
+ ),
810
  visible=False,
811
  ),
812
  ],
813
  stop_btn="Stop",
814
  cache_examples=False,
815
+ examples=[],
816
+ examples_per_page=10,
817
  concurrency_limit=100,
818
  )
819
 
820
+ with gr.Blocks(fill_height=True, css="style.css", head=HEAD) as demo:
821
  gr.Markdown(DESCRIPTION)
822
  chat_interface.render()
823
  gr.Markdown(LICENSE)
824
 
825
  if __name__ == "__main__":
826
+ demo.queue().launch(share=True)
requirements.txt CHANGED
@@ -1,10 +1,11 @@
1
- accelerate==0.30.1
2
- bitsandbytes==0.43.1
3
- gradio==4.39.0
 
 
 
4
  scipy==1.13.0
5
  sentencepiece==0.2.0
6
- spaces==0.28.3
7
  torch==2.0.0
8
- transformers==4.41.0
9
  beautifulsoup4>=4.9
10
  wikipedia==1.4.0
 
1
+ accelerate
2
+ bitsandbytes
3
+ gradio
4
+ spaces
5
+ transformers
6
+ timm
7
  scipy==1.13.0
8
  sentencepiece==0.2.0
 
9
  torch==2.0.0
 
10
  beautifulsoup4>=4.9
11
  wikipedia==1.4.0