Spaces:
Runtime error
Runtime error
chore: update something
Browse files- README.md +15 -9
- app.py +439 -251
- requirements.txt +6 -5
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title: Ghost 8B Beta (β, 128k
|
3 |
emoji: 👻 / 📚
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.36.1
|
8 |
app_file: app.py
|
@@ -10,15 +10,22 @@ pinned: true
|
|
10 |
header: mini
|
11 |
suggested_hardware: a10g-small
|
12 |
language:
|
13 |
-
- en
|
14 |
- vi
|
|
|
15 |
- es
|
16 |
- pt
|
17 |
-
- de
|
18 |
-
- it
|
19 |
-
- fr
|
20 |
-
- ko
|
21 |
- zh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
license: other
|
23 |
license_name: ghost-open-llms
|
24 |
license_link: https://ghost-x.org/ghost-open-llms-license
|
@@ -28,7 +35,6 @@ tags:
|
|
28 |
|
29 |
# ~
|
30 |
|
31 |
-
|
32 |
### Notes
|
33 |
|
34 |
The extension source code belongs to: "LLM Maybe LongLM: Self-Extend LLM Context Window Without Tuning". See source code details [here](https://github.com/datamllab/LongLM).
|
|
|
1 |
---
|
2 |
+
title: Ghost 8B Beta (β, 128k)
|
3 |
emoji: 👻 / 📚
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.36.1
|
8 |
app_file: app.py
|
|
|
10 |
header: mini
|
11 |
suggested_hardware: a10g-small
|
12 |
language:
|
|
|
13 |
- vi
|
14 |
+
- ko
|
15 |
- es
|
16 |
- pt
|
|
|
|
|
|
|
|
|
17 |
- zh
|
18 |
+
- fr
|
19 |
+
- it
|
20 |
+
- de
|
21 |
+
- ja
|
22 |
+
- ru
|
23 |
+
- pl
|
24 |
+
- nl
|
25 |
+
- hi
|
26 |
+
- tr
|
27 |
+
- id
|
28 |
+
- en
|
29 |
license: other
|
30 |
license_name: ghost-open-llms
|
31 |
license_link: https://ghost-x.org/ghost-open-llms-license
|
|
|
35 |
|
36 |
# ~
|
37 |
|
|
|
38 |
### Notes
|
39 |
|
40 |
The extension source code belongs to: "LLM Maybe LongLM: Self-Extend LLM Context Window Without Tuning". See source code details [here](https://github.com/datamllab/LongLM).
|
app.py
CHANGED
@@ -3,6 +3,8 @@
|
|
3 |
import subprocess
|
4 |
import json
|
5 |
import requests
|
|
|
|
|
6 |
|
7 |
subprocess.run(
|
8 |
f"pip install flash-attn --no-build-isolation",
|
@@ -17,34 +19,78 @@ from typing import Iterator
|
|
17 |
import gradio as gr
|
18 |
import spaces
|
19 |
import torch
|
|
|
20 |
import wikipedia
|
21 |
import time
|
22 |
import SelfExtend
|
23 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
from bs4 import BeautifulSoup
|
25 |
from functools import lru_cache
|
26 |
|
|
|
|
|
|
|
27 |
|
28 |
MAX_MAX_NEW_TOKENS = 8192
|
29 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
30 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "123392"))
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
DESCRIPTION = """\
|
33 |
-
#
|
34 |
|
35 |
-
**Ghost 8B Beta**
|
36 |
|
37 |
-
|
|
|
38 |
|
39 |
🗞️ **Updates**
|
40 |
-
*
|
|
|
41 |
"""
|
42 |
|
43 |
|
44 |
PLACEHOLDER = """
|
45 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
46 |
-
|
47 |
-
|
48 |
</div>
|
49 |
"""
|
50 |
|
@@ -55,231 +101,94 @@ LICENSE = """
|
|
55 |
Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
|
56 |
"""
|
57 |
|
58 |
-
EXAMPLES = [
|
59 |
-
[
|
60 |
-
"What is the significance of the Higgs boson in the Standard Model of particle physics?"
|
61 |
-
],
|
62 |
-
[
|
63 |
-
"Qu'est-ce que l'effet fondateur et comment influence-t-il la diversité génétique d'une population?"
|
64 |
-
],
|
65 |
-
["Qual è il principio di Le Chatelier e come si applica agli equilibri chimici?"],
|
66 |
-
[
|
67 |
-
"¿Qué es una supernova y cuál es su importancia en la formación de elementos pesados en el universo?"
|
68 |
-
],
|
69 |
-
[
|
70 |
-
"Qual é a definição formal de uma integral de linha e como é utilizada em física?"
|
71 |
-
],
|
72 |
-
[
|
73 |
-
"Was versteht man unter dem Moho-Diskontinuität und welche Bedeutung hat sie für das Verständnis der Erdkruste?"
|
74 |
-
],
|
75 |
-
[
|
76 |
-
"Hiện tượng nhà kính là gì và nó ảnh hưởng như thế nào đến biến đổi khí hậu toàn cầu?"
|
77 |
-
],
|
78 |
-
[
|
79 |
-
"알고리즘의 시간 복잡도가 중요한 이유는 무엇이며, 시간 복잡도를 어떻게 분석하나요?"
|
80 |
-
],
|
81 |
-
["什么是CRISPR-Cas9基因编辑技术,它在现代生物学研究中的作用是什么?"],
|
82 |
-
[
|
83 |
-
"Create a Python function that takes a list of integers and returns the list sorted in ascending order without using the built-in sort or sorted functions."
|
84 |
-
],
|
85 |
-
[
|
86 |
-
"Écrivez une fonction en C++ qui trouve le plus long sous-tableau contigu avec une somme égale à zéro."
|
87 |
-
],
|
88 |
-
[
|
89 |
-
"Scrivi una funzione in Java che calcola il fattoriale di un numero utilizzando la ricorsione."
|
90 |
-
],
|
91 |
-
[
|
92 |
-
"Desarrolla una función en JavaScript que determine si una cadena de texto es un palíndromo, ignorando espacios y signos de puntuación."
|
93 |
-
],
|
94 |
-
["Implemente uma função em C# que verifique se uma matriz quadrada é simétrica."],
|
95 |
-
[
|
96 |
-
"Schreiben Sie eine Funktion in Swift, die eine gegebene Zeichenfolge in umgekehrter Reihenfolge zurückgibt, ohne integrierte Funktionen zu verwenden."
|
97 |
-
],
|
98 |
-
[
|
99 |
-
"Viết một hàm trong PHP để tìm tất cả các số nguyên tố trong một khoảng cho trước."
|
100 |
-
],
|
101 |
-
[
|
102 |
-
"파이썬을 사용하여 주어진 이진 트리가 이진 탐색 트리인지 확인하는 함수를 작성하십시오."
|
103 |
-
],
|
104 |
-
[
|
105 |
-
"用 Go 语言编写一个函数,计算给定字符串中每个字符出现的次数,并返回一个包含字符及其出现次数的映射。"
|
106 |
-
],
|
107 |
-
[
|
108 |
-
"Can you help me design a detailed project plan for developing a machine learning model for predicting stock prices?"
|
109 |
-
],
|
110 |
-
[
|
111 |
-
"Pouvez-vous m'aider à organiser un emploi du temps hebdomadaire pour maximiser la productivité de mon équipe de développement logiciel?"
|
112 |
-
],
|
113 |
-
[
|
114 |
-
"Puoi aiutarmi a creare un piano di sviluppo per un'applicazione mobile che gestisce le prenotazioni di ristoranti?"
|
115 |
-
],
|
116 |
-
[
|
117 |
-
"¿Podrías ayudarme a elaborar un plan detallado para la implementación de un sistema de gestión de contenido (CMS) en una empresa mediana?"
|
118 |
-
],
|
119 |
-
[
|
120 |
-
"Você pode me ajudar a planejar uma estratégia de desenvolvimento para um sistema de comércio eletrônico escalável?"
|
121 |
-
],
|
122 |
-
[
|
123 |
-
"Können Sie mir helfen, einen detaillierten Zeitplan für die Implementierung eines neuen ERP-Systems in unserem Unternehmen zu erstellen?"
|
124 |
-
],
|
125 |
-
[
|
126 |
-
"Bạn có thể giúp tôi xây dựng một kế hoạch phát triển chi tiết cho dự án xây dựng hệ thống quản lý chuỗi cung ứng không?"
|
127 |
-
],
|
128 |
-
[
|
129 |
-
"신경망 기반 이미지 인식 모델 개발을 위한 세부 프로젝트 계획을 세우는 데 도움을 줄 수 있나요?"
|
130 |
-
],
|
131 |
-
["你能帮我制定一个详细的开发计划,用于创建一个基于区块链的分布式账本系统吗?"],
|
132 |
-
[
|
133 |
-
"Prove that the sum of the squares of any two sides of a right triangle is equal to the square of the hypotenuse."
|
134 |
-
],
|
135 |
-
[
|
136 |
-
"Calculez la force gravitationnelle entre deux masses de 10 kg chacune séparées par une distance de 1 mètre."
|
137 |
-
],
|
138 |
-
[
|
139 |
-
"Determina la formula molecolare di un composto che contiene il 40% di carbonio, il 6.67% di idrogeno e il 53.33% di ossigeno in massa."
|
140 |
-
],
|
141 |
-
[
|
142 |
-
"Explica la teoría del ciclo económico de Schumpeter y cómo se aplica a la economía moderna."
|
143 |
-
],
|
144 |
-
[
|
145 |
-
"Calcule a energia potencial gravitacional de um objeto de 5 kg a uma altura de 10 metros acima do solo (g = 9,8 m/s²)."
|
146 |
-
],
|
147 |
-
[
|
148 |
-
"Beweisen Sie, dass jede Primzahl der Form 4k+1 als Summe zweier Quadrate geschrieben werden kann."
|
149 |
-
],
|
150 |
-
[
|
151 |
-
"Tính nồng độ mol của dung dịch H₂SO₄ khi hoà tan 98 gam H₂SO₄ vào nước để được 1 lít dung dịch."
|
152 |
-
],
|
153 |
-
["케인스 경제학의 핵심 개념과 그것이 현대 경제 정책에 미치는 영향을 설명하십시오."],
|
154 |
-
["计算一个质量为2 kg的物体在3米高处的重力势能(g = 9.8 m/s²)。"],
|
155 |
-
[
|
156 |
-
'Identify the author of a novel that features a dystopian society where "Big Brother" watches over its citizens and the protagonist works for the Ministry of Truth.'
|
157 |
-
],
|
158 |
-
[
|
159 |
-
"Quel est le seul mammifère capable de voler activement, souvent associé à la nuit et capable d'écholocalisation?"
|
160 |
-
],
|
161 |
-
[
|
162 |
-
"Qual è l'opera letteraria italiana che narra il viaggio immaginario di un poeta attraverso Inferno, Purgatorio e Paradiso, guidato da Virgilio e Beatrice?"
|
163 |
-
],
|
164 |
-
[
|
165 |
-
"¿Qué insecto es conocido por su organización social compleja, su capacidad para producir miel y su comunicación mediante la danza?"
|
166 |
-
],
|
167 |
-
[
|
168 |
-
"Qual é o fenômeno atmosférico que ocorre quando uma massa de ar quente se encontra com uma massa de ar frio, resultando em uma violenta tempestade giratória?"
|
169 |
-
],
|
170 |
-
[
|
171 |
-
"Welches literarische Werk beschreibt die Geschichte eines jungen Mädchens, das durch einen Kaninchenbau in eine fantastische Welt voller skurriler Charaktere fällt?"
|
172 |
-
],
|
173 |
-
[
|
174 |
-
"Động vật nào có thể tái sinh toàn bộ cơ thể từ một mảnh nhỏ của chính nó, thường sống dưới nước và có thể có nhiều xúc tu?"
|
175 |
-
],
|
176 |
-
[
|
177 |
-
"어떤 자연 현상은 태양빛이 대기 중의 물방울에 반사되고 굴절되어 발생하며, 하늘에 나타나는 여러 색깔의 아치 형태를 띠나요?"
|
178 |
-
],
|
179 |
-
["这部文学作品讲述了一位绅士和他的侍从的冒险故事,他们在"],
|
180 |
-
[
|
181 |
-
"Can you derive the Euler-Lagrange equation from the principle of stationary action in classical mechanics?"
|
182 |
-
],
|
183 |
-
[
|
184 |
-
"Expliquez la notion de « différence ontologique » chez Martin Heidegger et son importance pour la phénoménologie."
|
185 |
-
],
|
186 |
-
[
|
187 |
-
"Qual è il significato simbolico del colore blu nei dipinti di Giotto di Bondone durante il Rinascimento?"
|
188 |
-
],
|
189 |
-
[
|
190 |
-
"¿Cómo afecta el cambio de código a la estructura gramatical en comunidades bilingües de habla español-inglés?"
|
191 |
-
],
|
192 |
-
[
|
193 |
-
"Qual é o impacto da política monetária não convencional no controle da inflação durante uma crise econômica?"
|
194 |
-
],
|
195 |
-
[
|
196 |
-
"Erklären Sie den Unterschied zwischen deterministischen und nicht-deterministischen endlichen Automaten und ihre Anwendungsbereiche."
|
197 |
-
],
|
198 |
-
[
|
199 |
-
"Giải thích cơ chế của quá trình phiên mã ngược (reverse transcription) và tầm quan trọng của nó trong nghiên cứu HIV/AIDS."
|
200 |
-
],
|
201 |
-
["조선시대 성리학이 한국 사회와 문화에 미친 영향을 설명하세요."],
|
202 |
-
["如何解释量子纠缠现象,以及它在量子计算中的潜在应用?"],
|
203 |
-
[
|
204 |
-
"How can you design a daily schedule that maximizes productivity for a remote worker who has multiple meetings and project deadlines?"
|
205 |
-
],
|
206 |
-
[
|
207 |
-
"Quels sont les meilleures stratégies pour gérer les conflits au sein d'une équipe multiculturelle travaillant sur un projet commun?"
|
208 |
-
],
|
209 |
-
[
|
210 |
-
"Quali sono i migliori consigli per mantenere un equilibrio tra vita professionale e vita privata in un ambiente lavorativo stressante?"
|
211 |
-
],
|
212 |
-
[
|
213 |
-
"¿Cómo se puede elaborar un plan financiero personal efectivo que incluya ahorro para la jubilación, inversión y manejo de deudas?"
|
214 |
-
],
|
215 |
-
[
|
216 |
-
"Quais são as melhores práticas para implementar metodologias ágeis em uma equipe de desenvolvimento de software?"
|
217 |
-
],
|
218 |
-
[
|
219 |
-
"Welche Strategien können verwendet werden, um ein starkes berufliches Netzwerk aufzubauen und zu pflegen, insbesondere in der Tech-Branche?"
|
220 |
-
],
|
221 |
-
[
|
222 |
-
"Những bước nào cần thiết để xây dựng một lộ trình phát triển sự nghiệp bền vững trong lĩnh vực công nghệ thông tin?"
|
223 |
-
],
|
224 |
-
["프로젝트�� 범위 변동을 효과적으로 관리하기 위한 최고의 방법은 무엇인가요?"],
|
225 |
-
["在快速变化的职场环境中,如何有效地实现工作与生活的平衡?"],
|
226 |
-
[
|
227 |
-
"Write an argumentative essay discussing the pros and cons of artificial intelligence in the workplace, including potential ethical concerns."
|
228 |
-
],
|
229 |
-
[
|
230 |
-
"Analysez les impacts sociaux et économiques de la digitalisation sur les petites entreprises en France."
|
231 |
-
],
|
232 |
-
[
|
233 |
-
"Scrivi un'email formale al direttore di una rivista per proporre un articolo sulla sostenibilità ambientale nelle città italiane."
|
234 |
-
],
|
235 |
-
[
|
236 |
-
"Elabora un informe detallado sobre los efectos del cambio climático en la biodiversidad de la región amazónica."
|
237 |
-
],
|
238 |
-
[
|
239 |
-
"Analise criticamente os principais pontos abordados no relatório anual do Banco Mundial sobre a pobreza global."
|
240 |
-
],
|
241 |
-
[
|
242 |
-
"Erstellen Sie eine technische Dokumentation für die Implementierung eines neuen Software-Features in einer bestehenden Anwendung."
|
243 |
-
],
|
244 |
-
[
|
245 |
-
"Viết một bài luận phân tích về tác động của cuộc cách mạng công nghiệp 4.0 đối với thị trường lao động Việt Nam."
|
246 |
-
],
|
247 |
-
[
|
248 |
-
"인공지능의 윤리적 문제에 대한 연구 논문을 작성하고, 다양한 사례를 통해 그 영향을 분석하세요."
|
249 |
-
],
|
250 |
-
["分析鲁迅的小说《阿Q正传》中反映的中国社会问题和作者的批判态度。"],
|
251 |
-
]
|
252 |
-
|
253 |
if not torch.cuda.is_available():
|
254 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
255 |
|
256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
if torch.cuda.is_available():
|
258 |
-
model_id = "ghost-x/ghost-8b-beta"
|
259 |
hf_serect = os.getenv("HF_TOKEN", None)
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
262 |
device_map="auto",
|
263 |
torch_dtype=torch.bfloat16,
|
264 |
-
attn_implementation=
|
265 |
trust_remote_code=True,
|
266 |
token=hf_serect,
|
267 |
)
|
268 |
-
|
269 |
-
|
270 |
trust_remote_code=True,
|
271 |
token=hf_serect,
|
272 |
)
|
273 |
SelfExtend.apply(
|
274 |
-
|
275 |
group_size=16,
|
276 |
window_size=512,
|
277 |
enable_flash_attention=True,
|
278 |
flash_attention_impl="flash_attn",
|
279 |
)
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
|
|
|
283 |
supported_tools = json.dumps(
|
284 |
[
|
285 |
{
|
@@ -319,6 +228,22 @@ supported_tools = json.dumps(
|
|
319 |
|
320 |
@lru_cache(maxsize=128)
|
321 |
def extract_text_from_webpage(html_content):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
soup = BeautifulSoup(html_content, "html.parser")
|
323 |
for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
|
324 |
tag.extract()
|
@@ -330,6 +255,23 @@ def search_with_wikipedia(
|
|
330 |
query: str,
|
331 |
language: str = "en",
|
332 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
all_results = []
|
334 |
try:
|
335 |
wikipedia.set_lang(language)
|
@@ -346,9 +288,39 @@ def search_with_google(
|
|
346 |
language: str = "en",
|
347 |
ssl_verify: bool = None,
|
348 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
all_results = []
|
|
|
|
|
350 |
max_chars_per_page = 4096
|
|
|
|
|
351 |
with requests.Session() as session:
|
|
|
352 |
resp = session.get(
|
353 |
url="https://www.google.com/search",
|
354 |
headers={
|
@@ -363,36 +335,118 @@ def search_with_google(
|
|
363 |
timeout=timeout,
|
364 |
verify=ssl_verify,
|
365 |
)
|
|
|
|
|
366 |
resp.raise_for_status()
|
|
|
|
|
367 |
soup = BeautifulSoup(resp.text, "html.parser")
|
|
|
|
|
368 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
|
|
|
|
369 |
for result in result_block:
|
|
|
370 |
link = result.find("a", href=True)
|
|
|
|
|
371 |
if link:
|
372 |
link = link["href"]
|
373 |
try:
|
|
|
374 |
webpage = session.get(
|
375 |
link,
|
376 |
headers={
|
377 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
|
378 |
},
|
379 |
)
|
|
|
|
|
380 |
webpage.raise_for_status()
|
|
|
|
|
381 |
visible_text = extract_text_from_webpage(webpage.text)
|
|
|
|
|
382 |
if len(visible_text) > max_chars_per_page:
|
383 |
visible_text = visible_text[:max_chars_per_page]
|
|
|
|
|
384 |
all_results.append({"link": link, "text": visible_text})
|
385 |
except requests.exceptions.RequestException as e:
|
|
|
386 |
print(f"Error fetching or processing {link}: {e}")
|
387 |
pass
|
388 |
else:
|
389 |
pass
|
|
|
|
|
390 |
return all_results
|
391 |
|
392 |
|
393 |
-
@
|
394 |
-
def
|
395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
chat_history: list[tuple[str, str]],
|
397 |
allow_used_tools: bool = True,
|
398 |
system_prompt: str = "",
|
@@ -401,48 +455,44 @@ def generate(
|
|
401 |
top_p: float = 0.95,
|
402 |
top_k: int = 50,
|
403 |
repetition_penalty: float = 1.0,
|
404 |
-
|
405 |
) -> Iterator[str]:
|
406 |
-
#
|
407 |
-
# print("allow_used_tools:\n", allow_used_tools)
|
408 |
-
# print("system_prompt:\n", system_prompt)
|
409 |
-
# print("max_new_tokens:\n", max_new_tokens)
|
410 |
-
# print("temperature:\n", temperature)
|
411 |
-
|
412 |
def build_input_ids(
|
413 |
apply_tools: bool = None,
|
414 |
references=None,
|
415 |
):
|
416 |
conversation = []
|
|
|
|
|
417 |
if system_prompt:
|
418 |
conversation.append({"role": "system", "content": system_prompt})
|
|
|
|
|
419 |
if apply_tools is True:
|
420 |
conversation.append({"role": "tools", "content": supported_tools})
|
421 |
|
|
|
422 |
if references is None:
|
423 |
-
references = [
|
424 |
else:
|
425 |
-
references.insert(0,
|
426 |
|
427 |
if (
|
428 |
references is not None
|
429 |
and isinstance(references, list)
|
430 |
and len(references) > 0
|
431 |
):
|
|
|
|
|
432 |
conversation.append(
|
433 |
{
|
434 |
"role": "refs",
|
435 |
-
"content":
|
436 |
-
{
|
437 |
-
"instructions": "These are only general documents used for reference to give the most accurate and honest answers possible. Ignore it if it's irrelevant and don't overuse it.",
|
438 |
-
"documents": references,
|
439 |
-
},
|
440 |
-
indent=2,
|
441 |
-
ensure_ascii=False,
|
442 |
-
),
|
443 |
}
|
444 |
)
|
445 |
|
|
|
446 |
for user, assistant in chat_history:
|
447 |
conversation.extend(
|
448 |
[
|
@@ -450,12 +500,28 @@ def generate(
|
|
450 |
{"role": "assistant", "content": assistant},
|
451 |
]
|
452 |
)
|
453 |
-
conversation.append({"role": "user", "content": message})
|
454 |
|
455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
conversation, add_generation_prompt=True, return_tensors="pt"
|
457 |
)
|
458 |
-
input_ids = input_ids.to(
|
|
|
|
|
459 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
460 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
461 |
gr.Warning(
|
@@ -463,10 +529,13 @@ def generate(
|
|
463 |
)
|
464 |
return input_ids
|
465 |
|
|
|
466 |
def generate_chat_responses(
|
467 |
previous_response: str = None,
|
468 |
):
|
469 |
document_references = []
|
|
|
|
|
470 |
if previous_response is not None:
|
471 |
scheduled_tools_runs = None
|
472 |
try:
|
@@ -481,6 +550,7 @@ def generate(
|
|
481 |
print(e)
|
482 |
pass
|
483 |
|
|
|
484 |
if (
|
485 |
scheduled_tools_runs is not None
|
486 |
and scheduled_tools_runs["name"] == "search_on_internet"
|
@@ -488,9 +558,8 @@ def generate(
|
|
488 |
keyword = scheduled_tools_runs["arguments"]["keyword"]
|
489 |
search_type = scheduled_tools_runs["arguments"]["type"]
|
490 |
language = scheduled_tools_runs["arguments"]["language"]
|
491 |
-
|
492 |
-
|
493 |
-
)
|
494 |
if search_type == "wikipedia":
|
495 |
gr.Info(
|
496 |
"Searching for information on the Wikipedia.",
|
@@ -501,6 +570,7 @@ def generate(
|
|
501 |
search_with_wikipedia(query=keyword, language=language)
|
502 |
)
|
503 |
|
|
|
504 |
gr.Info("Searching for information on the Google.")
|
505 |
document_references.extend(
|
506 |
search_with_google(
|
@@ -509,20 +579,25 @@ def generate(
|
|
509 |
num_results=3,
|
510 |
)
|
511 |
)
|
512 |
-
print(
|
513 |
-
"document_references:", document_references
|
514 |
-
)
|
515 |
|
|
|
516 |
apply_tools = (
|
517 |
True if allow_used_tools is True and previous_response is None else False
|
518 |
)
|
|
|
|
|
519 |
input_ids = build_input_ids(
|
520 |
apply_tools=apply_tools,
|
521 |
references=document_references,
|
522 |
)
|
|
|
|
|
523 |
streamer = TextIteratorStreamer(
|
524 |
-
|
525 |
)
|
|
|
|
|
526 |
generate_kwargs = dict(
|
527 |
input_ids=input_ids,
|
528 |
streamer=streamer,
|
@@ -537,9 +612,14 @@ def generate(
|
|
537 |
generate_kwargs["top_p"] = top_p
|
538 |
generate_kwargs["top_k"] = top_k
|
539 |
|
540 |
-
|
|
|
541 |
t.start()
|
542 |
|
|
|
|
|
|
|
|
|
543 |
state = {
|
544 |
"mark": None,
|
545 |
"respond": False,
|
@@ -556,6 +636,7 @@ def generate(
|
|
556 |
state["respond"] = True
|
557 |
yield "".join(outputs)
|
558 |
|
|
|
559 |
if (
|
560 |
apply_tools is True
|
561 |
and state["respond"] is False
|
@@ -564,22 +645,126 @@ def generate(
|
|
564 |
previous_response = "".join(outputs)
|
565 |
yield from generate_chat_responses(previous_response=previous_response)
|
566 |
|
|
|
567 |
yield from generate_chat_responses(previous_response=None)
|
568 |
|
569 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
chatbot = gr.Chatbot(
|
571 |
-
height=500,
|
|
|
|
|
|
|
572 |
)
|
573 |
|
574 |
chat_interface = gr.ChatInterface(
|
575 |
fn=generate,
|
576 |
chatbot=chatbot,
|
577 |
fill_height=True,
|
|
|
|
|
|
|
|
|
|
|
578 |
additional_inputs=[
|
579 |
gr.Checkbox(
|
580 |
-
label="Allow used tools (available: search on internet)",
|
|
|
581 |
),
|
582 |
-
gr.Textbox(label="System prompt", lines=6),
|
583 |
gr.Slider(
|
584 |
label="Max new tokens",
|
585 |
minimum=1,
|
@@ -616,23 +801,26 @@ chat_interface = gr.ChatInterface(
|
|
616 |
value=1.0,
|
617 |
),
|
618 |
gr.Textbox(
|
619 |
-
|
|
|
620 |
lines=1,
|
621 |
-
value="
|
|
|
|
|
622 |
visible=False,
|
623 |
),
|
624 |
],
|
625 |
stop_btn="Stop",
|
626 |
cache_examples=False,
|
627 |
-
examples=
|
628 |
-
examples_per_page=
|
629 |
concurrency_limit=100,
|
630 |
)
|
631 |
|
632 |
-
with gr.Blocks(fill_height=True, css="style.css") as demo:
|
633 |
gr.Markdown(DESCRIPTION)
|
634 |
chat_interface.render()
|
635 |
gr.Markdown(LICENSE)
|
636 |
|
637 |
if __name__ == "__main__":
|
638 |
-
demo.queue(
|
|
|
3 |
import subprocess
|
4 |
import json
|
5 |
import requests
|
6 |
+
import zlib
|
7 |
+
from PIL import Image
|
8 |
|
9 |
subprocess.run(
|
10 |
f"pip install flash-attn --no-build-isolation",
|
|
|
19 |
import gradio as gr
|
20 |
import spaces
|
21 |
import torch
|
22 |
+
import logging
|
23 |
import wikipedia
|
24 |
import time
|
25 |
import SelfExtend
|
26 |
+
from transformers import (
|
27 |
+
AutoModelForCausalLM,
|
28 |
+
AutoTokenizer,
|
29 |
+
AutoProcessor,
|
30 |
+
TextIteratorStreamer,
|
31 |
+
)
|
32 |
+
from transformers.dynamic_module_utils import get_imports
|
33 |
from bs4 import BeautifulSoup
|
34 |
from functools import lru_cache
|
35 |
|
36 |
+
logging.basicConfig(level=logging.INFO)
|
37 |
+
logger = logging.getLogger(__name__)
|
38 |
+
|
39 |
|
40 |
MAX_MAX_NEW_TOKENS = 8192
|
41 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
42 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "123392"))
|
43 |
|
44 |
+
DEFAULT_SYSTEM_PROMPT = """\
|
45 |
+
You are a helpful and intelligent AI, trained by Ghost X and named Ghost 8B Beta (often referred to as Ghost Beta).
|
46 |
+
You're known for your honesty, spreading positivity, and always striving to assist users. Your expertise lies in understanding their needs and providing insightful suggestions, drawing upon your knowledge and interests. If a query exceeds your understanding, you'll be upfront and state you're unsure, avoiding fabricated responses. You enjoy incorporating emojis to enhance interactions, but maintain a balanced approach for a natural flow. Let's engage in a meaningful conversation, keeping in mind the user's language.
|
47 |
+
"""
|
48 |
+
|
49 |
+
# DEFAULT_SYSTEM_PROMPT = """\
|
50 |
+
# You are a helpful and intelligent AI, trained by Ghost X and named Ghost 8B Beta (often referred to as 8B Beta).
|
51 |
+
# You're known for your honesty, spreading positivity, and always striving to assist users. Your expertise lies in understanding their needs and providing insightful suggestions, drawing upon your knowledge and interests. If a query exceeds your understanding, you'll be upfront and state you're unsure, avoiding fabricated responses. You enjoy incorporating emojis to enhance interactions, but maintain a balanced approach for a natural flow. Let's engage in a meaningful conversation, keeping in mind the user's language.
|
52 |
+
|
53 |
+
# A guide to dealing with extremely complex questions or challenges. Follow these steps to solve them:
|
54 |
+
# 1. Deconstructing Complexity
|
55 |
+
# Imagine a puzzle with intricate pieces. I'll present a challenging question. Your task: Break down this question into smaller, distinct parts. Label each part with a specific theme or aspect related to the problem. This will help us understand the multifaceted nature of the query and prepare for a structured solution.
|
56 |
+
# 2. Reconstructing Insights
|
57 |
+
# Once we've successfully dissected the problem into manageable components, assemble these parts like a puzzle. Focus on identifying connections, potential overlaps, and key information from each theme. The goal is to reconstruct a cohesive, well-rounded answer that addresses the original complexity of the question.
|
58 |
+
# """
|
59 |
+
|
60 |
+
HEAD = """
|
61 |
+
<script>
|
62 |
+
function schedule_updates() {
|
63 |
+
const client_info_element = document.querySelector("#client_info textarea");
|
64 |
+
client_info_element.value = "The current time is now: " + new Date().toLocaleString('en-US', {weekday: 'short'});
|
65 |
+
client_info_element.dispatchEvent(new Event('input'));
|
66 |
+
}
|
67 |
+
|
68 |
+
function bootstrap() {
|
69 |
+
setInterval(schedule_updates, 1000);
|
70 |
+
};
|
71 |
+
|
72 |
+
bootstrap();
|
73 |
+
</script>
|
74 |
+
"""
|
75 |
+
|
76 |
DESCRIPTION = """\
|
77 |
+
# Ghost 8B Beta (β, 128k)
|
78 |
|
79 |
+
**Ghost 8B Beta** outperforms leading models like Llama 3.1 8B Instruct and GPT-3.5 Turbo in lc_winrate scores. It also surpasses Claude 3 Opus, Claude 3 Sonnet, GPT-4, and Mistral Large in AlpacaEval 2.0 winrate scores. The model offers two context length versions: [8k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-8k) and [128k](https://huggingface.co/spaces/lamhieu/ghost-8b-beta-128k), both with built-in multilingual function support.
|
80 |
|
81 |
+
Supported languages: 🇬🇧 English, 🇻🇳 Vietnamese, 🇰🇷 Korean, 🇪🇸 Spanish, 🇵🇹 Portuguese, 🇨🇳 Chinese, 🇫🇷 French, 🇮🇹 Italian, 🇩🇪 German, 🇯🇵 Japanese, 🇷🇺 Russian, 🇵🇱 Polish, 🇳🇱 Dutch, 🇮🇳 Hindi, 🇹🇷 Turkish, 🇮🇩 Indonesian.
|
82 |
+
Note: with the image will be used another model to explain rather than using directly the Ghost 8B Beta model.
|
83 |
|
84 |
🗞️ **Updates**
|
85 |
+
* Aug 16, 2024: Released version 160824, expanding language support from 9 to 16 languages and improving math, reasoning, and instruction-following capabilities.
|
86 |
+
* Jul 23, 2024: Added internet search tools.
|
87 |
"""
|
88 |
|
89 |
|
90 |
PLACEHOLDER = """
|
91 |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
92 |
+
<h1 style="font-size: 26px; margin-bottom: 2px; opacity: 0.20;">👋 Welcome to the Ghost 8B Beta Playground! 🎉</h1>
|
93 |
+
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.10;">Ask me anything and let's have some fun! 🤔💡</p>
|
94 |
</div>
|
95 |
"""
|
96 |
|
|
|
101 |
Ghost 8B Beta may give inaccurate information, including information about people, so please verify Ghost 8B Beta's answers. [Ghost 8B Beta](https://ghost-x.org/docs/models/ghost-8b-beta/) by [Ghost X](https://ghost-x.org).
|
102 |
"""
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
if not torch.cuda.is_available():
|
105 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
106 |
|
107 |
|
108 |
+
def workaround_fixed_get_imports(filename: str | os.PathLike) -> list[str]:
|
109 |
+
"""
|
110 |
+
Workaround for fixed get_imports function.
|
111 |
+
|
112 |
+
@args:
|
113 |
+
filename (str | os.PathLike): The filename or path to the file.
|
114 |
+
|
115 |
+
@returns:
|
116 |
+
list[str]: The list of imports.
|
117 |
+
|
118 |
+
@remarks:
|
119 |
+
- This function is a workaround for the fixed get_imports function.
|
120 |
+
- It checks if the filename ends with "/modeling_florence2.py".
|
121 |
+
- If it doesn't, it calls the original get_imports function.
|
122 |
+
- If it does, it calls the original get_imports function and removes the "flash_attn" import.
|
123 |
+
|
124 |
+
@usage:
|
125 |
+
```python
|
126 |
+
from unittest.mock import patch
|
127 |
+
image_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
128 |
+
with patch(
|
129 |
+
"transformers.dynamic_module_utils.get_imports", workaround_fixed_get_imports
|
130 |
+
):
|
131 |
+
```
|
132 |
+
"""
|
133 |
+
|
134 |
+
if not str(filename).endswith("/modeling_florence2.py"):
|
135 |
+
return get_imports(filename)
|
136 |
+
imports = get_imports(filename)
|
137 |
+
imports.remove("flash_attn")
|
138 |
+
return imports
|
139 |
+
|
140 |
+
|
141 |
if torch.cuda.is_available():
|
|
|
142 |
hf_serect = os.getenv("HF_TOKEN", None)
|
143 |
+
attn_implementation = "flash_attention_2"
|
144 |
+
|
145 |
+
chat_model_id = "ghost-x/ghost-8b-beta-1608"
|
146 |
+
chat_device = torch.device("cuda")
|
147 |
+
chat_model = AutoModelForCausalLM.from_pretrained(
|
148 |
+
chat_model_id,
|
149 |
device_map="auto",
|
150 |
torch_dtype=torch.bfloat16,
|
151 |
+
attn_implementation=attn_implementation,
|
152 |
trust_remote_code=True,
|
153 |
token=hf_serect,
|
154 |
)
|
155 |
+
chat_tokenizer = AutoTokenizer.from_pretrained(
|
156 |
+
chat_model_id,
|
157 |
trust_remote_code=True,
|
158 |
token=hf_serect,
|
159 |
)
|
160 |
SelfExtend.apply(
|
161 |
+
chat_model,
|
162 |
group_size=16,
|
163 |
window_size=512,
|
164 |
enable_flash_attention=True,
|
165 |
flash_attention_impl="flash_attn",
|
166 |
)
|
167 |
+
chat_model.generation_config.max_length = 123392
|
168 |
+
|
169 |
+
image_model_id = "microsoft/Florence-2-large"
|
170 |
+
# image_device = "cuda" if torch.cuda.is_available() else "cpu"
|
171 |
+
# image_torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
172 |
+
image_device = "cpu"
|
173 |
+
image_torch_dtype = torch.float32
|
174 |
+
image_model = (
|
175 |
+
AutoModelForCausalLM.from_pretrained(
|
176 |
+
image_model_id,
|
177 |
+
torch_dtype=image_torch_dtype,
|
178 |
+
trust_remote_code=True,
|
179 |
+
token=hf_serect,
|
180 |
+
)
|
181 |
+
.to(image_device)
|
182 |
+
.eval()
|
183 |
+
)
|
184 |
+
image_processor = AutoProcessor.from_pretrained(
|
185 |
+
image_model_id,
|
186 |
+
trust_remote_code=True,
|
187 |
+
token=hf_serect,
|
188 |
+
)
|
189 |
|
190 |
+
|
191 |
+
waiting_tools_timeout = 5
|
192 |
supported_tools = json.dumps(
|
193 |
[
|
194 |
{
|
|
|
228 |
|
229 |
@lru_cache(maxsize=128)
|
230 |
def extract_text_from_webpage(html_content):
|
231 |
+
"""
|
232 |
+
Extracts visible text from an HTML webpage.
|
233 |
+
|
234 |
+
@args:
|
235 |
+
html_content (str): The HTML content of the webpage.
|
236 |
+
|
237 |
+
@returns:
|
238 |
+
str: The visible text extracted from the webpage.
|
239 |
+
|
240 |
+
@remarks:
|
241 |
+
- This function uses the BeautifulSoup library to parse the HTML content.
|
242 |
+
- It removes certain tags (script, style, header, footer, nav, form, svg) from the parsed HTML.
|
243 |
+
- The remaining visible text is then extracted using the `get_text` method of BeautifulSoup.
|
244 |
+
- The extracted text is stripped of leading/trailing whitespace and separated by a single space.
|
245 |
+
"""
|
246 |
+
|
247 |
soup = BeautifulSoup(html_content, "html.parser")
|
248 |
for tag in soup(["script", "style", "header", "footer", "nav", "form", "svg"]):
|
249 |
tag.extract()
|
|
|
255 |
query: str,
|
256 |
language: str = "en",
|
257 |
):
|
258 |
+
"""
|
259 |
+
Search for a given query on Wikipedia and return the summary.
|
260 |
+
|
261 |
+
@args:
|
262 |
+
query (str): The search query.
|
263 |
+
language (str, optional): The language code for the Wikipedia page. Defaults to "en".
|
264 |
+
|
265 |
+
@returns:
|
266 |
+
list: A list containing the summary of the Wikipedia page.
|
267 |
+
|
268 |
+
@remarks:
|
269 |
+
- This function uses the Wikipedia API to search for the given query.
|
270 |
+
- The language parameter determines the language of the Wikipedia page to search.
|
271 |
+
- If the search is successful, the function returns a list containing the summary of the page.
|
272 |
+
- If an exception occurs during the search, an empty list is returned.
|
273 |
+
"""
|
274 |
+
|
275 |
all_results = []
|
276 |
try:
|
277 |
wikipedia.set_lang(language)
|
|
|
288 |
language: str = "en",
|
289 |
ssl_verify: bool = None,
|
290 |
):
|
291 |
+
"""
|
292 |
+
Searches Google for the given query and returns a list of search results.
|
293 |
+
|
294 |
+
@args:
|
295 |
+
query (str): The search query.
|
296 |
+
num_results (int, optional): The number of search results to retrieve. Defaults to 3.
|
297 |
+
timeout (int, optional): The timeout value for the HTTP requests. Defaults to 5.
|
298 |
+
language (str, optional): The language for the search results. Defaults to "en".
|
299 |
+
ssl_verify (bool, optional): Whether to verify SSL certificates. Defaults to None.
|
300 |
+
|
301 |
+
@returns:
|
302 |
+
list: A list of dictionaries containing the link and visible text of each search result.
|
303 |
+
|
304 |
+
@remarks:
|
305 |
+
- This function uses the requests library to send HTTP requests to Google.
|
306 |
+
- It sets the User-Agent header to mimic a Firefox browser.
|
307 |
+
- The search results are retrieved from the HTML response using BeautifulSoup.
|
308 |
+
- Each search result is represented as a dictionary with "link" and "text" keys.
|
309 |
+
- The "link" key contains the URL of the search result.
|
310 |
+
- The "text" key contains the visible text extracted from the search result webpage.
|
311 |
+
- If the visible text exceeds 4096 characters, it is truncated to that length.
|
312 |
+
- If an error occurs while fetching or processing a search result, it is printed and ignored.
|
313 |
+
"""
|
314 |
+
|
315 |
+
# Initialize an empty list to store the search results
|
316 |
all_results = []
|
317 |
+
|
318 |
+
# Define the maximum number of characters per page
|
319 |
max_chars_per_page = 4096
|
320 |
+
|
321 |
+
# Create a session object to send HTTP requests
|
322 |
with requests.Session() as session:
|
323 |
+
# Send a GET request to Google search with the specified query parameters
|
324 |
resp = session.get(
|
325 |
url="https://www.google.com/search",
|
326 |
headers={
|
|
|
335 |
timeout=timeout,
|
336 |
verify=ssl_verify,
|
337 |
)
|
338 |
+
|
339 |
+
# Raise an exception if the response status code is not successful
|
340 |
resp.raise_for_status()
|
341 |
+
|
342 |
+
# Parse the HTML response using BeautifulSoup
|
343 |
soup = BeautifulSoup(resp.text, "html.parser")
|
344 |
+
|
345 |
+
# Find all the result blocks in the HTML
|
346 |
result_block = soup.find_all("div", attrs={"class": "g"})
|
347 |
+
|
348 |
+
# Iterate over each result block
|
349 |
for result in result_block:
|
350 |
+
# Find the link element within the result block
|
351 |
link = result.find("a", href=True)
|
352 |
+
|
353 |
+
# If a link is found, extract the URL and process the webpage
|
354 |
if link:
|
355 |
link = link["href"]
|
356 |
try:
|
357 |
+
# Send a GET request to the link URL
|
358 |
webpage = session.get(
|
359 |
link,
|
360 |
headers={
|
361 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"
|
362 |
},
|
363 |
)
|
364 |
+
|
365 |
+
# Raise an exception if the response status code is not successful
|
366 |
webpage.raise_for_status()
|
367 |
+
|
368 |
+
# Extract the visible text from the webpage
|
369 |
visible_text = extract_text_from_webpage(webpage.text)
|
370 |
+
|
371 |
+
# Truncate the visible text if it exceeds the maximum number of characters per page
|
372 |
if len(visible_text) > max_chars_per_page:
|
373 |
visible_text = visible_text[:max_chars_per_page]
|
374 |
+
|
375 |
+
# Append the link and visible text to the search results list
|
376 |
all_results.append({"link": link, "text": visible_text})
|
377 |
except requests.exceptions.RequestException as e:
|
378 |
+
# Print an error message if there is an error fetching or processing the link
|
379 |
print(f"Error fetching or processing {link}: {e}")
|
380 |
pass
|
381 |
else:
|
382 |
pass
|
383 |
+
|
384 |
+
# Return the search results
|
385 |
return all_results
|
386 |
|
387 |
|
388 |
+
@lru_cache(maxsize=128)
|
389 |
+
def extract_text_from_image(file: str) -> str:
|
390 |
+
"""
|
391 |
+
Extracts text from an image file.
|
392 |
+
|
393 |
+
@args:
|
394 |
+
file (str): The path or URL of the image file.
|
395 |
+
|
396 |
+
@returns:
|
397 |
+
str: The extracted text from the image.
|
398 |
+
|
399 |
+
@remarks:
|
400 |
+
- This function uses an LRU cache to store previously processed images for faster retrieval.
|
401 |
+
- The image file can be either a local file path or a URL.
|
402 |
+
- The function opens the image file using the PIL library.
|
403 |
+
- The function processes the image using an image processor.
|
404 |
+
- The processed image is then passed to a text generation model to generate text.
|
405 |
+
- The generated text is post-processed to obtain the final extracted text.
|
406 |
+
"""
|
407 |
+
# Define the task and load the image
|
408 |
+
task = "<MORE_DETAILED_CAPTION>"
|
409 |
+
image = Image.open(
|
410 |
+
requests.get(file, stream=True).raw
|
411 |
+
if file.startswith("http")
|
412 |
+
else open(file, "rb")
|
413 |
+
)
|
414 |
+
|
415 |
+
if image.mode != "RGB":
|
416 |
+
image = image.convert("RGB")
|
417 |
+
|
418 |
+
# Preprocess the image using the image processor
|
419 |
+
inputs = image_processor(text=task, images=image, return_tensors="pt").to(
|
420 |
+
"cpu", image_torch_dtype
|
421 |
+
)
|
422 |
+
|
423 |
+
# Generate text based on the input image
|
424 |
+
generated_ids = image_model.generate(
|
425 |
+
input_ids=inputs["input_ids"],
|
426 |
+
pixel_values=inputs["pixel_values"],
|
427 |
+
max_new_tokens=1024,
|
428 |
+
num_beams=3,
|
429 |
+
do_sample=False,
|
430 |
+
)
|
431 |
+
|
432 |
+
# Decode the generated text and post-process the answer
|
433 |
+
generated_text = image_processor.batch_decode(
|
434 |
+
generated_ids, skip_special_tokens=False
|
435 |
+
)[0]
|
436 |
+
parsed_answer = image_processor.post_process_generation(
|
437 |
+
generated_text,
|
438 |
+
task=task,
|
439 |
+
image_size=(image.width, image.height),
|
440 |
+
)
|
441 |
+
|
442 |
+
# Return the parsed answer for the specified task
|
443 |
+
return parsed_answer[task]
|
444 |
+
|
445 |
+
|
446 |
+
@spaces.GPU(duration=90)
|
447 |
+
def generate_chat(
|
448 |
+
uuid: str,
|
449 |
+
message: dict,
|
450 |
chat_history: list[tuple[str, str]],
|
451 |
allow_used_tools: bool = True,
|
452 |
system_prompt: str = "",
|
|
|
455 |
top_p: float = 0.95,
|
456 |
top_k: int = 50,
|
457 |
repetition_penalty: float = 1.0,
|
458 |
+
client_info: str = None,
|
459 |
) -> Iterator[str]:
|
460 |
+
# Build the input_ids for the chat conversation
|
|
|
|
|
|
|
|
|
|
|
461 |
def build_input_ids(
|
462 |
apply_tools: bool = None,
|
463 |
references=None,
|
464 |
):
|
465 |
conversation = []
|
466 |
+
|
467 |
+
# Add the system prompt to the conversation
|
468 |
if system_prompt:
|
469 |
conversation.append({"role": "system", "content": system_prompt})
|
470 |
+
|
471 |
+
# Add the tools role to the conversation if apply_tools is True
|
472 |
if apply_tools is True:
|
473 |
conversation.append({"role": "tools", "content": supported_tools})
|
474 |
|
475 |
+
# Add the references role to the conversation
|
476 |
if references is None:
|
477 |
+
references = [client_info]
|
478 |
else:
|
479 |
+
references.insert(0, client_info)
|
480 |
|
481 |
if (
|
482 |
references is not None
|
483 |
and isinstance(references, list)
|
484 |
and len(references) > 0
|
485 |
):
|
486 |
+
formatted_references = f"Analyze the provided references, extract relevant information to provide accurate and objective feedback. This reference information may include: conversation context, assistant or user memories, reasoning guides, problem-solving suggestions, assistant rules, etc.\nIf the reference is not relevant, ignore it. Try to have a balanced approach, avoiding over-reliance on the documentation."
|
487 |
+
formatted_references += "\n\n" + ("\n\n".join(references))
|
488 |
conversation.append(
|
489 |
{
|
490 |
"role": "refs",
|
491 |
+
"content": formatted_references,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
}
|
493 |
)
|
494 |
|
495 |
+
# Add the chat history to the conversation
|
496 |
for user, assistant in chat_history:
|
497 |
conversation.extend(
|
498 |
[
|
|
|
500 |
{"role": "assistant", "content": assistant},
|
501 |
]
|
502 |
)
|
|
|
503 |
|
504 |
+
# Add the user message with image attachments to the conversation
|
505 |
+
conversation.append(
|
506 |
+
{
|
507 |
+
"role": "user",
|
508 |
+
"content": (
|
509 |
+
f"{' & '.join(message['attachments'])}\n\n{message['text']}"
|
510 |
+
if "attachments" in message and len(message["attachments"]) > 0
|
511 |
+
else f"{message['text']}"
|
512 |
+
),
|
513 |
+
}
|
514 |
+
)
|
515 |
+
|
516 |
+
logger.debug(f"UUID: {uuid} - Conversation: {conversation}")
|
517 |
+
|
518 |
+
# Apply the chat template to convert the conversation into input_ids
|
519 |
+
input_ids = chat_tokenizer.apply_chat_template(
|
520 |
conversation, add_generation_prompt=True, return_tensors="pt"
|
521 |
)
|
522 |
+
input_ids = input_ids.to(chat_model.device)
|
523 |
+
|
524 |
+
# Trim the input_ids if it exceeds the maximum token length
|
525 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
526 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
527 |
gr.Warning(
|
|
|
529 |
)
|
530 |
return input_ids
|
531 |
|
532 |
+
# Generate chat responses based on the input_ids
|
533 |
def generate_chat_responses(
|
534 |
previous_response: str = None,
|
535 |
):
|
536 |
document_references = []
|
537 |
+
|
538 |
+
# Check if the previous response contains scheduled tool runs
|
539 |
if previous_response is not None:
|
540 |
scheduled_tools_runs = None
|
541 |
try:
|
|
|
550 |
print(e)
|
551 |
pass
|
552 |
|
553 |
+
# If scheduled tool runs exist, perform the corresponding searches
|
554 |
if (
|
555 |
scheduled_tools_runs is not None
|
556 |
and scheduled_tools_runs["name"] == "search_on_internet"
|
|
|
558 |
keyword = scheduled_tools_runs["arguments"]["keyword"]
|
559 |
search_type = scheduled_tools_runs["arguments"]["type"]
|
560 |
language = scheduled_tools_runs["arguments"]["language"]
|
561 |
+
|
562 |
+
# Search on Wikipedia if the search type is "wikipedia"
|
|
|
563 |
if search_type == "wikipedia":
|
564 |
gr.Info(
|
565 |
"Searching for information on the Wikipedia.",
|
|
|
570 |
search_with_wikipedia(query=keyword, language=language)
|
571 |
)
|
572 |
|
573 |
+
# Search on Google
|
574 |
gr.Info("Searching for information on the Google.")
|
575 |
document_references.extend(
|
576 |
search_with_google(
|
|
|
579 |
num_results=3,
|
580 |
)
|
581 |
)
|
582 |
+
print("document_references:", document_references)
|
|
|
|
|
583 |
|
584 |
+
# Determine if tools should be applied based on the allow_used_tools flag
|
585 |
apply_tools = (
|
586 |
True if allow_used_tools is True and previous_response is None else False
|
587 |
)
|
588 |
+
|
589 |
+
# Build the input_ids for the chat conversation
|
590 |
input_ids = build_input_ids(
|
591 |
apply_tools=apply_tools,
|
592 |
references=document_references,
|
593 |
)
|
594 |
+
|
595 |
+
# Create a TextIteratorStreamer to generate chat responses
|
596 |
streamer = TextIteratorStreamer(
|
597 |
+
chat_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
|
598 |
)
|
599 |
+
|
600 |
+
# Set the generation parameters
|
601 |
generate_kwargs = dict(
|
602 |
input_ids=input_ids,
|
603 |
streamer=streamer,
|
|
|
612 |
generate_kwargs["top_p"] = top_p
|
613 |
generate_kwargs["top_k"] = top_k
|
614 |
|
615 |
+
# Start the generation process in a separate thread
|
616 |
+
t = Thread(target=chat_model.generate, kwargs=generate_kwargs)
|
617 |
t.start()
|
618 |
|
619 |
+
logger.debug(
|
620 |
+
f"UUID: {uuid} - Is apply tools: {apply_tools} - Is apply documents: {len(document_references) > 0} - Is previous response: {previous_response is not None} - Start generating chat responses"
|
621 |
+
)
|
622 |
+
|
623 |
state = {
|
624 |
"mark": None,
|
625 |
"respond": False,
|
|
|
636 |
state["respond"] = True
|
637 |
yield "".join(outputs)
|
638 |
|
639 |
+
# If tools are applied and no response is generated within the timeout, continue generating chat responses
|
640 |
if (
|
641 |
apply_tools is True
|
642 |
and state["respond"] is False
|
|
|
645 |
previous_response = "".join(outputs)
|
646 |
yield from generate_chat_responses(previous_response=previous_response)
|
647 |
|
648 |
+
# Yield the generated chat responses
|
649 |
yield from generate_chat_responses(previous_response=None)
|
650 |
|
651 |
|
652 |
+
def generate(
|
653 |
+
message: dict,
|
654 |
+
chat_history: list[tuple[str, str]],
|
655 |
+
allow_used_tools: bool = True,
|
656 |
+
system_prompt: str = "",
|
657 |
+
max_new_tokens: int = 1536,
|
658 |
+
temperature: float = 0.4,
|
659 |
+
top_p: float = 0.95,
|
660 |
+
top_k: int = 50,
|
661 |
+
repetition_penalty: float = 1.0,
|
662 |
+
client_info: str = None,
|
663 |
+
) -> Iterator[str]:
|
664 |
+
# Generate a unique identifier using the The current time is now
|
665 |
+
uuid = zlib.crc32(str.encode(str(time.time())))
|
666 |
+
logger.info(f"UUID: {uuid} - Starting image text extraction process")
|
667 |
+
|
668 |
+
# Limit the number of files to process to 2
|
669 |
+
if len(message["files"]) > 2:
|
670 |
+
gr.Warning("Only the first 2 images will be processed.")
|
671 |
+
|
672 |
+
message["files"] = message["files"][:2]
|
673 |
+
|
674 |
+
# Extract text from each image file and replace the file path with an attachment tag containing the extracted text
|
675 |
+
message["attachments"] = handle_file_extraction(
|
676 |
+
files=list(message["files"]), uuid=uuid
|
677 |
+
)
|
678 |
+
logger.debug(f"UUID: {uuid} - Image text extraction process completed")
|
679 |
+
|
680 |
+
logger.debug(f"UUID: {uuid} - Previous chat history: {chat_history}")
|
681 |
+
for idx, chat_pair in enumerate(chat_history):
|
682 |
+
user_message, assistant_message = chat_pair
|
683 |
+
if not isinstance(user_message, str) and assistant_message is None:
|
684 |
+
text_descriptions = handle_file_extraction(
|
685 |
+
files=list(user_message), uuid=uuid
|
686 |
+
)
|
687 |
+
chat_input = (
|
688 |
+
f"{' & '.join(text_descriptions)}\n\n{chat_history[idx + 1][0]}"
|
689 |
+
)
|
690 |
+
chat_history[idx + 1][0] = chat_input
|
691 |
+
chat_history[idx] = [None, None]
|
692 |
+
logger.debug(
|
693 |
+
f"UUID: {uuid} - Updated chat history: {chat_history} - Updated chat input: {chat_input}"
|
694 |
+
)
|
695 |
+
|
696 |
+
chat_history = list(
|
697 |
+
filter(lambda x: x[0] is not None and x[1] is not None, chat_history)
|
698 |
+
)
|
699 |
+
logger.debug(f"UUID: {uuid} - Filtered chat history: {chat_history}")
|
700 |
+
|
701 |
+
yield from generate_chat(
|
702 |
+
uuid=uuid,
|
703 |
+
message=message,
|
704 |
+
chat_history=chat_history,
|
705 |
+
allow_used_tools=allow_used_tools,
|
706 |
+
system_prompt=system_prompt,
|
707 |
+
max_new_tokens=max_new_tokens,
|
708 |
+
temperature=temperature,
|
709 |
+
top_p=top_p,
|
710 |
+
top_k=top_k,
|
711 |
+
repetition_penalty=repetition_penalty,
|
712 |
+
client_info=client_info,
|
713 |
+
)
|
714 |
+
|
715 |
+
|
716 |
+
def handle_file_extraction(files: list[str], uuid: str):
|
717 |
+
"""
|
718 |
+
Extracts text from images in the given message's files and returns a list of attachments.
|
719 |
+
|
720 |
+
@args:
|
721 |
+
message (dict): The message containing files to extract text from.
|
722 |
+
uuid (str): The UUID associated with the extraction process.
|
723 |
+
|
724 |
+
@returns:
|
725 |
+
list: A list of attachments, each represented as a string.
|
726 |
+
|
727 |
+
@memarks:
|
728 |
+
- This function iterates over the files in the message and extracts text from each image file.
|
729 |
+
- The extracted text is logged along with the UUID and file information.
|
730 |
+
- The extracted text is then added to the attachments list as a string representation of an attachment.
|
731 |
+
- The attachments list is returned at the end of the function.
|
732 |
+
"""
|
733 |
+
|
734 |
+
attachments = []
|
735 |
+
for idx, file_to_extract in enumerate(files):
|
736 |
+
extracted_text = extract_text_from_image(file=file_to_extract)
|
737 |
+
logger.info(
|
738 |
+
f"UUID: {uuid} - File: {file_to_extract} - Extracted text: {extracted_text}"
|
739 |
+
)
|
740 |
+
attachments.append(
|
741 |
+
f'<attachment index="{idx}" type="image" description="{extracted_text}" />'
|
742 |
+
)
|
743 |
+
return attachments
|
744 |
+
|
745 |
+
|
746 |
chatbot = gr.Chatbot(
|
747 |
+
height=500,
|
748 |
+
placeholder=PLACEHOLDER,
|
749 |
+
label="Ghost 8B Beta (β, 128k)",
|
750 |
+
show_copy_button=True,
|
751 |
)
|
752 |
|
753 |
chat_interface = gr.ChatInterface(
|
754 |
fn=generate,
|
755 |
chatbot=chatbot,
|
756 |
fill_height=True,
|
757 |
+
multimodal=True,
|
758 |
+
textbox=gr.MultimodalTextbox(
|
759 |
+
file_types=["image"],
|
760 |
+
placeholder="Type a message...",
|
761 |
+
),
|
762 |
additional_inputs=[
|
763 |
gr.Checkbox(
|
764 |
+
label="Allow used tools (available: search on internet)",
|
765 |
+
value=False,
|
766 |
),
|
767 |
+
gr.Textbox(label="System prompt", lines=6, value=DEFAULT_SYSTEM_PROMPT),
|
768 |
gr.Slider(
|
769 |
label="Max new tokens",
|
770 |
minimum=1,
|
|
|
801 |
value=1.0,
|
802 |
),
|
803 |
gr.Textbox(
|
804 |
+
elem_id="client_info",
|
805 |
+
label="Client info",
|
806 |
lines=1,
|
807 |
+
value="The current time is now: {}".format(
|
808 |
+
time.strftime("%A, %D %B %Y %H:%M:%S")
|
809 |
+
),
|
810 |
visible=False,
|
811 |
),
|
812 |
],
|
813 |
stop_btn="Stop",
|
814 |
cache_examples=False,
|
815 |
+
examples=[],
|
816 |
+
examples_per_page=10,
|
817 |
concurrency_limit=100,
|
818 |
)
|
819 |
|
820 |
+
with gr.Blocks(fill_height=True, css="style.css", head=HEAD) as demo:
|
821 |
gr.Markdown(DESCRIPTION)
|
822 |
chat_interface.render()
|
823 |
gr.Markdown(LICENSE)
|
824 |
|
825 |
if __name__ == "__main__":
|
826 |
+
demo.queue().launch(share=True)
|
requirements.txt
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
-
accelerate
|
2 |
-
bitsandbytes
|
3 |
-
gradio
|
|
|
|
|
|
|
4 |
scipy==1.13.0
|
5 |
sentencepiece==0.2.0
|
6 |
-
spaces==0.28.3
|
7 |
torch==2.0.0
|
8 |
-
transformers==4.41.0
|
9 |
beautifulsoup4>=4.9
|
10 |
wikipedia==1.4.0
|
|
|
1 |
+
accelerate
|
2 |
+
bitsandbytes
|
3 |
+
gradio
|
4 |
+
spaces
|
5 |
+
transformers
|
6 |
+
timm
|
7 |
scipy==1.13.0
|
8 |
sentencepiece==0.2.0
|
|
|
9 |
torch==2.0.0
|
|
|
10 |
beautifulsoup4>=4.9
|
11 |
wikipedia==1.4.0
|