Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- README.md +8 -6
- app.py +64 -0
- chatacter/__init__.py +32 -0
- chatacter/agents/__init__.py +1 -0
- chatacter/agents/crawler.py +27 -0
- chatacter/agents/llm.py +49 -0
- chatacter/agents/search.py +24 -0
- chatacter/db/__init__.py +1 -0
- chatacter/db/vector_database.py +48 -0
- data/image/Einstein.jpg +3 -0
- data/image/Napoleon.jpg +0 -0
- requirements.txt +70 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/image/Einstein.jpg filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: gray
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.23.1
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Chatacher
|
3 |
+
emoji: 👀
|
4 |
colorFrom: gray
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
+
pinned: true
|
9 |
+
short_description: Chat with Characters
|
10 |
---
|
11 |
|
12 |
+
# App part of the Chatacter Backend
|
13 |
+
|
14 |
+
[](https://microsoft.github.io/pyright/)
|
app.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import InferenceClient
|
3 |
+
|
4 |
+
"""
|
5 |
+
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
6 |
+
"""
|
7 |
+
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
|
8 |
+
|
9 |
+
|
10 |
+
def respond(
|
11 |
+
message,
|
12 |
+
history: list[tuple[str, str]],
|
13 |
+
system_message,
|
14 |
+
max_tokens,
|
15 |
+
temperature,
|
16 |
+
top_p,
|
17 |
+
):
|
18 |
+
messages = [{"role": "system", "content": system_message}]
|
19 |
+
|
20 |
+
for val in history:
|
21 |
+
if val[0]:
|
22 |
+
messages.append({"role": "user", "content": val[0]})
|
23 |
+
if val[1]:
|
24 |
+
messages.append({"role": "assistant", "content": val[1]})
|
25 |
+
|
26 |
+
messages.append({"role": "user", "content": message})
|
27 |
+
|
28 |
+
response = ""
|
29 |
+
|
30 |
+
for message in client.chat_completion(
|
31 |
+
messages,
|
32 |
+
max_tokens=max_tokens,
|
33 |
+
stream=True,
|
34 |
+
temperature=temperature,
|
35 |
+
top_p=top_p,
|
36 |
+
):
|
37 |
+
token = message.choices[0].delta.content
|
38 |
+
|
39 |
+
response += token
|
40 |
+
yield response
|
41 |
+
|
42 |
+
|
43 |
+
"""
|
44 |
+
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
45 |
+
"""
|
46 |
+
demo = gr.ChatInterface(
|
47 |
+
fn=respond,
|
48 |
+
additional_inputs=[
|
49 |
+
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
50 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
51 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
52 |
+
gr.Slider(
|
53 |
+
minimum=0.1,
|
54 |
+
maximum=1.0,
|
55 |
+
value=0.95,
|
56 |
+
step=0.05,
|
57 |
+
label="Top-p (nucleus sampling)",
|
58 |
+
),
|
59 |
+
],
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
demo.launch()
|
chatacter/__init__.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from functools import lru_cache
|
2 |
+
|
3 |
+
from pydantic import BaseModel
|
4 |
+
|
5 |
+
|
6 |
+
class AssetsSettings(BaseModel):
|
7 |
+
audio: str = "./assets/audio/AUDIO.wav"
|
8 |
+
image: str = "./assets/image/"
|
9 |
+
video: str = "./assets/video/VIDEO.mp4"
|
10 |
+
|
11 |
+
|
12 |
+
class HostSettings(BaseModel):
|
13 |
+
voice_generator: str = "http://localhost:8001/"
|
14 |
+
video_generator: str = "http://localhost:8002/"
|
15 |
+
|
16 |
+
|
17 |
+
class Settings(BaseModel):
|
18 |
+
app_name: str = "Chatacter"
|
19 |
+
assets: AssetsSettings = AssetsSettings()
|
20 |
+
character: str = str()
|
21 |
+
host: HostSettings = HostSettings()
|
22 |
+
vector_database_name: str = "chatacter"
|
23 |
+
|
24 |
+
|
25 |
+
@lru_cache
|
26 |
+
def load_settings() -> Settings:
|
27 |
+
return Settings()
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
settings: Settings = load_settings()
|
32 |
+
print(settings.model_dump_json(indent=4))
|
chatacter/agents/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
chatacter/agents/crawler.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
from .search import get_search_results
|
4 |
+
from langchain_community.document_loaders import RecursiveUrlLoader
|
5 |
+
from langchain_core.documents.base import Document
|
6 |
+
from pydantic import StrictStr
|
7 |
+
|
8 |
+
|
9 |
+
def crawl(query: str) -> List[StrictStr]:
|
10 |
+
# get links from search results
|
11 |
+
links_search_engine: List[StrictStr] = get_search_results(query=query)
|
12 |
+
links_crawler: List[StrictStr] = []
|
13 |
+
# load the documents
|
14 |
+
for link in links_search_engine:
|
15 |
+
try:
|
16 |
+
html_loader = RecursiveUrlLoader(url=link, max_depth=1, timeout=5)
|
17 |
+
docs: List[Document] = html_loader.load()
|
18 |
+
for doc in docs:
|
19 |
+
source: StrictStr = doc.metadata.get("source") # type: ignore
|
20 |
+
links_crawler.append(source)
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Error: {e}")
|
23 |
+
return list(set(links_crawler + links_search_engine))
|
24 |
+
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
print(crawl("What is the capital of France"))
|
chatacter/agents/llm.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import List
|
3 |
+
|
4 |
+
from .crawler import crawl
|
5 |
+
from .vector_database import add_data, get_chunks, query_db
|
6 |
+
from langchain.chains import LLMChain
|
7 |
+
from langchain_core.prompts import ChatPromptTemplate
|
8 |
+
from langchain_groq import ChatGroq
|
9 |
+
from pydantic import StrictStr
|
10 |
+
from qdrant_client.fastembed_common import QueryResponse
|
11 |
+
|
12 |
+
chat = ChatGroq(
|
13 |
+
model="llama3-70b-8192",
|
14 |
+
verbose=True,
|
15 |
+
) # type: ignore
|
16 |
+
|
17 |
+
|
18 |
+
def get_response(query: str, character: str) -> tuple[str, str]:
|
19 |
+
start_time: float = time.time()
|
20 |
+
print("Query:", query, "Character:", character)
|
21 |
+
print("start crawling")
|
22 |
+
links: List[StrictStr] = crawl(query=query)
|
23 |
+
print("start getting chunks")
|
24 |
+
chunks: List[Element] = []
|
25 |
+
for link in links:
|
26 |
+
chunks.extend(get_chunks(url=link))
|
27 |
+
print("start adding data to db")
|
28 |
+
add_data(chunks=chunks)
|
29 |
+
print("start querying db")
|
30 |
+
results: List[QueryResponse] = query_db(query=query)
|
31 |
+
prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
|
32 |
+
messages=[
|
33 |
+
(
|
34 |
+
"system",
|
35 |
+
"Act as {character}. Answer in one statement. Answer the question using the provided context. Context: {context}",
|
36 |
+
),
|
37 |
+
("human", "{text}"),
|
38 |
+
]
|
39 |
+
)
|
40 |
+
chain = LLMChain(
|
41 |
+
prompt=prompt,
|
42 |
+
llm=chat,
|
43 |
+
verbose=True,
|
44 |
+
)
|
45 |
+
response = chain.invoke(
|
46 |
+
{"text": query, "character": character, "context": results[0]["text"]}
|
47 |
+
)
|
48 |
+
end_time: float = time.time()
|
49 |
+
return str(object=response.content), str(object=end_time - start_time)
|
chatacter/agents/search.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from agno.agent import Agent
|
2 |
+
from agno.models.groq import Groq
|
3 |
+
from agno.tools.googlesearch import GoogleSearchTools
|
4 |
+
from pydantic import StrictInt, StrictStr
|
5 |
+
|
6 |
+
agent = Agent(
|
7 |
+
model=Groq(id="llama-3.3-70b-versatile"),
|
8 |
+
tools=[GoogleSearchTools()],
|
9 |
+
description="You are a news agent that helps users find the latest news.",
|
10 |
+
instructions=[
|
11 |
+
"Given a topic by the user, respond with 4 latest news items about that topic.",
|
12 |
+
"Search for 10 news items and select the top 4 unique items.",
|
13 |
+
"Search in English and in French.",
|
14 |
+
],
|
15 |
+
show_tool_calls=True,
|
16 |
+
debug_mode=True,
|
17 |
+
)
|
18 |
+
|
19 |
+
if __name__ == "__main__":
|
20 |
+
agent.print_response(
|
21 |
+
message="Mistral AI",
|
22 |
+
markdown=True,
|
23 |
+
show_full_reasoning=True,
|
24 |
+
)
|
chatacter/db/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
chatacter/db/vector_database.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, List
|
2 |
+
|
3 |
+
from pydantic import StrictStr
|
4 |
+
from qdrant_client import QdrantClient
|
5 |
+
from qdrant_client.fastembed_common import QueryResponse
|
6 |
+
|
7 |
+
|
8 |
+
client = QdrantClient(host="localhost", port=6333)
|
9 |
+
|
10 |
+
|
11 |
+
def get_chunks(url: StrictStr) -> List[Element]:
|
12 |
+
elements: List[Element] = partition(url=url)
|
13 |
+
for i in range(len(elements)):
|
14 |
+
elements[i].text = clean_non_ascii_chars(text=elements[i].text)
|
15 |
+
elements[i].text = replace_unicode_quotes(text=elements[i].text)
|
16 |
+
elements[i].text = clean_extra_whitespace(text=elements[i].text)
|
17 |
+
elements[i].text = bytes_string_to_string(text=elements[i].text)
|
18 |
+
return chunk_by_title(elements=elements)
|
19 |
+
|
20 |
+
|
21 |
+
def add_data(chunks: List[Element]) -> None:
|
22 |
+
docs: List[StrictStr] = [chunks[i].text for i in range(len(chunks))]
|
23 |
+
metadata: List[dict[str, Any]] = [
|
24 |
+
chunks[i].metadata.to_dict() for i in range(len(chunks))
|
25 |
+
]
|
26 |
+
ids = list(range(1, len(chunks) + 1))
|
27 |
+
client.add(
|
28 |
+
collection_name=settings.vector_database_name,
|
29 |
+
documents=docs,
|
30 |
+
metadata=metadata,
|
31 |
+
ids=ids,
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
def query_db(query: StrictStr) -> List[QueryResponse]:
|
36 |
+
return client.query(
|
37 |
+
collection_name=settings.vector_database_name,
|
38 |
+
query_text=query,
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
url = "https://en.wikipedia.org/wiki/Napoleon"
|
44 |
+
chunks: List[Element] = get_chunks(url=url)
|
45 |
+
add_data(chunks=chunks)
|
46 |
+
r: List[QueryResponse] = query_db(query="Napoleon Bonaparte")
|
47 |
+
print(len(r))
|
48 |
+
print(r)
|
data/image/Einstein.jpg
ADDED
![]() |
Git LFS Details
|
data/image/Napoleon.jpg
ADDED
![]() |
requirements.txt
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv export --no-dev --frozen --no-hashes --output-file requirements.txt
|
3 |
+
agno==1.2.4
|
4 |
+
aiofiles==23.2.1
|
5 |
+
annotated-types==0.7.0
|
6 |
+
anyio==4.9.0
|
7 |
+
audioop-lts==0.2.1 ; python_full_version >= '3.13'
|
8 |
+
beautifulsoup4==4.13.3
|
9 |
+
certifi==2025.1.31
|
10 |
+
charset-normalizer==3.4.1
|
11 |
+
click==8.1.8
|
12 |
+
colorama==0.4.6 ; sys_platform == 'win32'
|
13 |
+
distro==1.9.0
|
14 |
+
docstring-parser==0.16
|
15 |
+
fastapi==0.115.12
|
16 |
+
ffmpy==0.5.0
|
17 |
+
filelock==3.18.0
|
18 |
+
fsspec==2025.3.0
|
19 |
+
gitdb==4.0.12
|
20 |
+
gitpython==3.1.44
|
21 |
+
googlesearch-python==1.3.0
|
22 |
+
gradio==5.23.1
|
23 |
+
gradio-client==1.8.0
|
24 |
+
groovy==0.1.2
|
25 |
+
groq==0.20.0
|
26 |
+
h11==0.14.0
|
27 |
+
httpcore==1.0.7
|
28 |
+
httpx==0.28.1
|
29 |
+
huggingface-hub==0.29.3
|
30 |
+
idna==3.10
|
31 |
+
jinja2==3.1.6
|
32 |
+
markdown-it-py==3.0.0
|
33 |
+
markupsafe==3.0.2
|
34 |
+
mdurl==0.1.2
|
35 |
+
numpy==2.2.4
|
36 |
+
orjson==3.10.16
|
37 |
+
packaging==24.2
|
38 |
+
pandas==2.2.3
|
39 |
+
pillow==10.4.0
|
40 |
+
pycountry==24.6.1
|
41 |
+
pydantic==2.10.6
|
42 |
+
pydantic-core==2.27.2
|
43 |
+
pydantic-settings==2.8.1
|
44 |
+
pydub==0.25.1
|
45 |
+
pygments==2.19.1
|
46 |
+
python-dateutil==2.9.0.post0
|
47 |
+
python-dotenv==1.1.0
|
48 |
+
python-multipart==0.0.20
|
49 |
+
pytz==2025.2
|
50 |
+
pyyaml==6.0.2
|
51 |
+
requests==2.32.3
|
52 |
+
rich==13.9.4
|
53 |
+
ruff==0.11.2 ; sys_platform != 'emscripten'
|
54 |
+
safehttpx==0.1.6
|
55 |
+
semantic-version==2.10.0
|
56 |
+
shellingham==1.5.4
|
57 |
+
six==1.17.0
|
58 |
+
smmap==5.0.2
|
59 |
+
sniffio==1.3.1
|
60 |
+
soupsieve==2.6
|
61 |
+
starlette==0.46.1
|
62 |
+
tomli==2.2.1
|
63 |
+
tomlkit==0.13.2
|
64 |
+
tqdm==4.67.1
|
65 |
+
typer==0.15.2
|
66 |
+
typing-extensions==4.12.2
|
67 |
+
tzdata==2025.2
|
68 |
+
urllib3==2.3.0
|
69 |
+
uvicorn==0.34.0 ; sys_platform != 'emscripten'
|
70 |
+
websockets==15.0.1
|