MH0386 commited on
Commit
68051dd
·
verified ·
1 Parent(s): 3a89493

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/image/Einstein.jpg filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Chatacter
3
- emoji: 💻
4
  colorFrom: gray
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.23.1
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: Chatacher
3
+ emoji: 👀
4
  colorFrom: gray
5
+ colorTo: blue
6
  sdk: gradio
 
7
  app_file: app.py
8
+ pinned: true
9
+ short_description: Chat with Characters
10
  ---
11
 
12
+ # App part of the Chatacter Backend
13
+
14
+ [![Checked with pyright](https://microsoft.github.io/pyright/img/pyright_badge.svg)](https://microsoft.github.io/pyright/)
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
8
+
9
+
10
+ def respond(
11
+ message,
12
+ history: list[tuple[str, str]],
13
+ system_message,
14
+ max_tokens,
15
+ temperature,
16
+ top_p,
17
+ ):
18
+ messages = [{"role": "system", "content": system_message}]
19
+
20
+ for val in history:
21
+ if val[0]:
22
+ messages.append({"role": "user", "content": val[0]})
23
+ if val[1]:
24
+ messages.append({"role": "assistant", "content": val[1]})
25
+
26
+ messages.append({"role": "user", "content": message})
27
+
28
+ response = ""
29
+
30
+ for message in client.chat_completion(
31
+ messages,
32
+ max_tokens=max_tokens,
33
+ stream=True,
34
+ temperature=temperature,
35
+ top_p=top_p,
36
+ ):
37
+ token = message.choices[0].delta.content
38
+
39
+ response += token
40
+ yield response
41
+
42
+
43
+ """
44
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
+ """
46
+ demo = gr.ChatInterface(
47
+ fn=respond,
48
+ additional_inputs=[
49
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
+ gr.Slider(
53
+ minimum=0.1,
54
+ maximum=1.0,
55
+ value=0.95,
56
+ step=0.05,
57
+ label="Top-p (nucleus sampling)",
58
+ ),
59
+ ],
60
+ )
61
+
62
+
63
+ if __name__ == "__main__":
64
+ demo.launch()
chatacter/__init__.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class AssetsSettings(BaseModel):
7
+ audio: str = "./assets/audio/AUDIO.wav"
8
+ image: str = "./assets/image/"
9
+ video: str = "./assets/video/VIDEO.mp4"
10
+
11
+
12
+ class HostSettings(BaseModel):
13
+ voice_generator: str = "http://localhost:8001/"
14
+ video_generator: str = "http://localhost:8002/"
15
+
16
+
17
+ class Settings(BaseModel):
18
+ app_name: str = "Chatacter"
19
+ assets: AssetsSettings = AssetsSettings()
20
+ character: str = str()
21
+ host: HostSettings = HostSettings()
22
+ vector_database_name: str = "chatacter"
23
+
24
+
25
+ @lru_cache
26
+ def load_settings() -> Settings:
27
+ return Settings()
28
+
29
+
30
+ if __name__ == "__main__":
31
+ settings: Settings = load_settings()
32
+ print(settings.model_dump_json(indent=4))
chatacter/agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
chatacter/agents/crawler.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from .search import get_search_results
4
+ from langchain_community.document_loaders import RecursiveUrlLoader
5
+ from langchain_core.documents.base import Document
6
+ from pydantic import StrictStr
7
+
8
+
9
+ def crawl(query: str) -> List[StrictStr]:
10
+ # get links from search results
11
+ links_search_engine: List[StrictStr] = get_search_results(query=query)
12
+ links_crawler: List[StrictStr] = []
13
+ # load the documents
14
+ for link in links_search_engine:
15
+ try:
16
+ html_loader = RecursiveUrlLoader(url=link, max_depth=1, timeout=5)
17
+ docs: List[Document] = html_loader.load()
18
+ for doc in docs:
19
+ source: StrictStr = doc.metadata.get("source") # type: ignore
20
+ links_crawler.append(source)
21
+ except Exception as e:
22
+ print(f"Error: {e}")
23
+ return list(set(links_crawler + links_search_engine))
24
+
25
+
26
+ if __name__ == "__main__":
27
+ print(crawl("What is the capital of France"))
chatacter/agents/llm.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import List
3
+
4
+ from .crawler import crawl
5
+ from .vector_database import add_data, get_chunks, query_db
6
+ from langchain.chains import LLMChain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain_groq import ChatGroq
9
+ from pydantic import StrictStr
10
+ from qdrant_client.fastembed_common import QueryResponse
11
+
12
+ chat = ChatGroq(
13
+ model="llama3-70b-8192",
14
+ verbose=True,
15
+ ) # type: ignore
16
+
17
+
18
+ def get_response(query: str, character: str) -> tuple[str, str]:
19
+ start_time: float = time.time()
20
+ print("Query:", query, "Character:", character)
21
+ print("start crawling")
22
+ links: List[StrictStr] = crawl(query=query)
23
+ print("start getting chunks")
24
+ chunks: List[Element] = []
25
+ for link in links:
26
+ chunks.extend(get_chunks(url=link))
27
+ print("start adding data to db")
28
+ add_data(chunks=chunks)
29
+ print("start querying db")
30
+ results: List[QueryResponse] = query_db(query=query)
31
+ prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
32
+ messages=[
33
+ (
34
+ "system",
35
+ "Act as {character}. Answer in one statement. Answer the question using the provided context. Context: {context}",
36
+ ),
37
+ ("human", "{text}"),
38
+ ]
39
+ )
40
+ chain = LLMChain(
41
+ prompt=prompt,
42
+ llm=chat,
43
+ verbose=True,
44
+ )
45
+ response = chain.invoke(
46
+ {"text": query, "character": character, "context": results[0]["text"]}
47
+ )
48
+ end_time: float = time.time()
49
+ return str(object=response.content), str(object=end_time - start_time)
chatacter/agents/search.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agno.agent import Agent
2
+ from agno.models.groq import Groq
3
+ from agno.tools.googlesearch import GoogleSearchTools
4
+ from pydantic import StrictInt, StrictStr
5
+
6
+ agent = Agent(
7
+ model=Groq(id="llama-3.3-70b-versatile"),
8
+ tools=[GoogleSearchTools()],
9
+ description="You are a news agent that helps users find the latest news.",
10
+ instructions=[
11
+ "Given a topic by the user, respond with 4 latest news items about that topic.",
12
+ "Search for 10 news items and select the top 4 unique items.",
13
+ "Search in English and in French.",
14
+ ],
15
+ show_tool_calls=True,
16
+ debug_mode=True,
17
+ )
18
+
19
+ if __name__ == "__main__":
20
+ agent.print_response(
21
+ message="Mistral AI",
22
+ markdown=True,
23
+ show_full_reasoning=True,
24
+ )
chatacter/db/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
chatacter/db/vector_database.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, List
2
+
3
+ from pydantic import StrictStr
4
+ from qdrant_client import QdrantClient
5
+ from qdrant_client.fastembed_common import QueryResponse
6
+
7
+
8
+ client = QdrantClient(host="localhost", port=6333)
9
+
10
+
11
+ def get_chunks(url: StrictStr) -> List[Element]:
12
+ elements: List[Element] = partition(url=url)
13
+ for i in range(len(elements)):
14
+ elements[i].text = clean_non_ascii_chars(text=elements[i].text)
15
+ elements[i].text = replace_unicode_quotes(text=elements[i].text)
16
+ elements[i].text = clean_extra_whitespace(text=elements[i].text)
17
+ elements[i].text = bytes_string_to_string(text=elements[i].text)
18
+ return chunk_by_title(elements=elements)
19
+
20
+
21
+ def add_data(chunks: List[Element]) -> None:
22
+ docs: List[StrictStr] = [chunks[i].text for i in range(len(chunks))]
23
+ metadata: List[dict[str, Any]] = [
24
+ chunks[i].metadata.to_dict() for i in range(len(chunks))
25
+ ]
26
+ ids = list(range(1, len(chunks) + 1))
27
+ client.add(
28
+ collection_name=settings.vector_database_name,
29
+ documents=docs,
30
+ metadata=metadata,
31
+ ids=ids,
32
+ )
33
+
34
+
35
+ def query_db(query: StrictStr) -> List[QueryResponse]:
36
+ return client.query(
37
+ collection_name=settings.vector_database_name,
38
+ query_text=query,
39
+ )
40
+
41
+
42
+ if __name__ == "__main__":
43
+ url = "https://en.wikipedia.org/wiki/Napoleon"
44
+ chunks: List[Element] = get_chunks(url=url)
45
+ add_data(chunks=chunks)
46
+ r: List[QueryResponse] = query_db(query="Napoleon Bonaparte")
47
+ print(len(r))
48
+ print(r)
data/image/Einstein.jpg ADDED

Git LFS Details

  • SHA256: 4c97e39f0682557315535871d13878aef8b89cd16edfd657c9c528a30ba25eb4
  • Pointer size: 131 Bytes
  • Size of remote file: 161 kB
data/image/Napoleon.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv export --no-dev --frozen --no-hashes --output-file requirements.txt
3
+ agno==1.2.4
4
+ aiofiles==23.2.1
5
+ annotated-types==0.7.0
6
+ anyio==4.9.0
7
+ audioop-lts==0.2.1 ; python_full_version >= '3.13'
8
+ beautifulsoup4==4.13.3
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ click==8.1.8
12
+ colorama==0.4.6 ; sys_platform == 'win32'
13
+ distro==1.9.0
14
+ docstring-parser==0.16
15
+ fastapi==0.115.12
16
+ ffmpy==0.5.0
17
+ filelock==3.18.0
18
+ fsspec==2025.3.0
19
+ gitdb==4.0.12
20
+ gitpython==3.1.44
21
+ googlesearch-python==1.3.0
22
+ gradio==5.23.1
23
+ gradio-client==1.8.0
24
+ groovy==0.1.2
25
+ groq==0.20.0
26
+ h11==0.14.0
27
+ httpcore==1.0.7
28
+ httpx==0.28.1
29
+ huggingface-hub==0.29.3
30
+ idna==3.10
31
+ jinja2==3.1.6
32
+ markdown-it-py==3.0.0
33
+ markupsafe==3.0.2
34
+ mdurl==0.1.2
35
+ numpy==2.2.4
36
+ orjson==3.10.16
37
+ packaging==24.2
38
+ pandas==2.2.3
39
+ pillow==10.4.0
40
+ pycountry==24.6.1
41
+ pydantic==2.10.6
42
+ pydantic-core==2.27.2
43
+ pydantic-settings==2.8.1
44
+ pydub==0.25.1
45
+ pygments==2.19.1
46
+ python-dateutil==2.9.0.post0
47
+ python-dotenv==1.1.0
48
+ python-multipart==0.0.20
49
+ pytz==2025.2
50
+ pyyaml==6.0.2
51
+ requests==2.32.3
52
+ rich==13.9.4
53
+ ruff==0.11.2 ; sys_platform != 'emscripten'
54
+ safehttpx==0.1.6
55
+ semantic-version==2.10.0
56
+ shellingham==1.5.4
57
+ six==1.17.0
58
+ smmap==5.0.2
59
+ sniffio==1.3.1
60
+ soupsieve==2.6
61
+ starlette==0.46.1
62
+ tomli==2.2.1
63
+ tomlkit==0.13.2
64
+ tqdm==4.67.1
65
+ typer==0.15.2
66
+ typing-extensions==4.12.2
67
+ tzdata==2025.2
68
+ urllib3==2.3.0
69
+ uvicorn==0.34.0 ; sys_platform != 'emscripten'
70
+ websockets==15.0.1