Spaces:

AlphaSphereDotAI
/

Chatacter

Sleeping

App Files Files Community

MH0386 commited on Mar 27

Commit

68051dd

verified ·

1 Parent(s): 3a89493

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.gitattributes +1 -0
README.md +8 -6
app.py +64 -0
chatacter/__init__.py +32 -0
chatacter/agents/__init__.py +1 -0
chatacter/agents/crawler.py +27 -0
chatacter/agents/llm.py +49 -0
chatacter/agents/search.py +24 -0
chatacter/db/__init__.py +1 -0
chatacter/db/vector_database.py +48 -0
data/image/Einstein.jpg +3 -0
data/image/Napoleon.jpg +0 -0
requirements.txt +70 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/image/Einstein.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
 ---
-title: Chatacter
-emoji: 💻
 colorFrom: gray
-colorTo: purple
 sdk: gradio
-sdk_version: 5.23.1
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Chatacher
+emoji: 👀
 colorFrom: gray
+colorTo: blue
 sdk: gradio
 app_file: app.py
+pinned: true
+short_description: Chat with Characters
 ---
+# App part of the Chatacter Backend
+[![Checked with pyright](https://microsoft.github.io/pyright/img/pyright_badge.svg)](https://microsoft.github.io/pyright/)

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+from huggingface_hub import InferenceClient
+"""
+For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+"""
+client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+):
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        if val[0]:
+            messages.append({"role": "user", "content": val[0]})
+        if val[1]:
+            messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        response += token
+        yield response
+"""
+For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+"""
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()

chatacter/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from functools import lru_cache
+from pydantic import BaseModel
+class AssetsSettings(BaseModel):
+    audio: str = "./assets/audio/AUDIO.wav"
+    image: str = "./assets/image/"
+    video: str = "./assets/video/VIDEO.mp4"
+class HostSettings(BaseModel):
+    voice_generator: str = "http://localhost:8001/"
+    video_generator: str = "http://localhost:8002/"
+class Settings(BaseModel):
+    app_name: str = "Chatacter"
+    assets: AssetsSettings = AssetsSettings()
+    character: str = str()
+    host: HostSettings = HostSettings()
+    vector_database_name: str = "chatacter"
+@lru_cache
+def load_settings() -> Settings:
+    return Settings()
+if __name__ == "__main__":
+    settings: Settings = load_settings()
+    print(settings.model_dump_json(indent=4))

chatacter/agents/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

chatacter/agents/crawler.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import List
+from .search import get_search_results
+from langchain_community.document_loaders import RecursiveUrlLoader
+from langchain_core.documents.base import Document
+from pydantic import StrictStr
+def crawl(query: str) -> List[StrictStr]:
+    # get links from search results
+    links_search_engine: List[StrictStr] = get_search_results(query=query)
+    links_crawler: List[StrictStr] = []
+    # load the documents
+    for link in links_search_engine:
+        try:
+            html_loader = RecursiveUrlLoader(url=link, max_depth=1, timeout=5)
+            docs: List[Document] = html_loader.load()
+            for doc in docs:
+                source: StrictStr = doc.metadata.get("source")  # type: ignore
+                links_crawler.append(source)
+        except Exception as e:
+            print(f"Error: {e}")
+    return list(set(links_crawler + links_search_engine))
+if __name__ == "__main__":
+    print(crawl("What is the capital of France"))

chatacter/agents/llm.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import time
+from typing import List
+from .crawler import crawl
+from .vector_database import add_data, get_chunks, query_db
+from langchain.chains import LLMChain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+from pydantic import StrictStr
+from qdrant_client.fastembed_common import QueryResponse
+chat = ChatGroq(
+    model="llama3-70b-8192",
+    verbose=True,
+)  # type: ignore
+def get_response(query: str, character: str) -> tuple[str, str]:
+    start_time: float = time.time()
+    print("Query:", query, "Character:", character)
+    print("start crawling")
+    links: List[StrictStr] = crawl(query=query)
+    print("start getting chunks")
+    chunks: List[Element] = []
+    for link in links:
+        chunks.extend(get_chunks(url=link))
+    print("start adding data to db")
+    add_data(chunks=chunks)
+    print("start querying db")
+    results: List[QueryResponse] = query_db(query=query)
+    prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages(
+        messages=[
+            (
+                "system",
+                "Act as {character}. Answer in one statement. Answer the question using the provided context. Context: {context}",
+            ),
+            ("human", "{text}"),
+        ]
+    )
+    chain = LLMChain(
+        prompt=prompt,
+        llm=chat,
+        verbose=True,
+    )
+    response = chain.invoke(
+        {"text": query, "character": character, "context": results[0]["text"]}
+    )
+    end_time: float = time.time()
+    return str(object=response.content), str(object=end_time - start_time)

chatacter/agents/search.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from agno.agent import Agent
+from agno.models.groq import Groq
+from agno.tools.googlesearch import GoogleSearchTools
+from pydantic import StrictInt, StrictStr
+agent = Agent(
+    model=Groq(id="llama-3.3-70b-versatile"),
+    tools=[GoogleSearchTools()],
+    description="You are a news agent that helps users find the latest news.",
+    instructions=[
+        "Given a topic by the user, respond with 4 latest news items about that topic.",
+        "Search for 10 news items and select the top 4 unique items.",
+        "Search in English and in French.",
+    ],
+    show_tool_calls=True,
+    debug_mode=True,
+)
+if __name__ == "__main__":
+    agent.print_response(
+        message="Mistral AI",
+        markdown=True,
+        show_full_reasoning=True,
+    )

chatacter/db/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

chatacter/db/vector_database.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from typing import Any, List
+from pydantic import StrictStr
+from qdrant_client import QdrantClient
+from qdrant_client.fastembed_common import QueryResponse
+client = QdrantClient(host="localhost", port=6333)
+def get_chunks(url: StrictStr) -> List[Element]:
+    elements: List[Element] = partition(url=url)
+    for i in range(len(elements)):
+        elements[i].text = clean_non_ascii_chars(text=elements[i].text)
+        elements[i].text = replace_unicode_quotes(text=elements[i].text)
+        elements[i].text = clean_extra_whitespace(text=elements[i].text)
+        elements[i].text = bytes_string_to_string(text=elements[i].text)
+    return chunk_by_title(elements=elements)
+def add_data(chunks: List[Element]) -> None:
+    docs: List[StrictStr] = [chunks[i].text for i in range(len(chunks))]
+    metadata: List[dict[str, Any]] = [
+        chunks[i].metadata.to_dict() for i in range(len(chunks))
+    ]
+    ids = list(range(1, len(chunks) + 1))
+    client.add(
+        collection_name=settings.vector_database_name,
+        documents=docs,
+        metadata=metadata,
+        ids=ids,
+    )
+def query_db(query: StrictStr) -> List[QueryResponse]:
+    return client.query(
+        collection_name=settings.vector_database_name,
+        query_text=query,
+    )
+if __name__ == "__main__":
+    url = "https://en.wikipedia.org/wiki/Napoleon"
+    chunks: List[Element] = get_chunks(url=url)
+    add_data(chunks=chunks)
+    r: List[QueryResponse] = query_db(query="Napoleon Bonaparte")
+    print(len(r))
+    print(r)

data/image/Einstein.jpg ADDED Viewed

Git LFS Details

SHA256: 4c97e39f0682557315535871d13878aef8b89cd16edfd657c9c528a30ba25eb4
Pointer size: 131 Bytes
Size of remote file: 161 kB

data/image/Napoleon.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,70 @@

+# This file was autogenerated by uv via the following command:
+#    uv export --no-dev --frozen --no-hashes --output-file requirements.txt
+agno==1.2.4
+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.9.0
+audioop-lts==0.2.1 ; python_full_version >= '3.13'
+beautifulsoup4==4.13.3
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6 ; sys_platform == 'win32'
+distro==1.9.0
+docstring-parser==0.16
+fastapi==0.115.12
+ffmpy==0.5.0
+filelock==3.18.0
+fsspec==2025.3.0
+gitdb==4.0.12
+gitpython==3.1.44
+googlesearch-python==1.3.0
+gradio==5.23.1
+gradio-client==1.8.0
+groovy==0.1.2
+groq==0.20.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+idna==3.10
+jinja2==3.1.6
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+mdurl==0.1.2
+numpy==2.2.4
+orjson==3.10.16
+packaging==24.2
+pandas==2.2.3
+pillow==10.4.0
+pycountry==24.6.1
+pydantic==2.10.6
+pydantic-core==2.27.2
+pydantic-settings==2.8.1
+pydub==0.25.1
+pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+pyyaml==6.0.2
+requests==2.32.3
+rich==13.9.4
+ruff==0.11.2 ; sys_platform != 'emscripten'
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+soupsieve==2.6
+starlette==0.46.1
+tomli==2.2.1
+tomlkit==0.13.2
+tqdm==4.67.1
+typer==0.15.2
+typing-extensions==4.12.2
+tzdata==2025.2
+urllib3==2.3.0
+uvicorn==0.34.0 ; sys_platform != 'emscripten'
+websockets==15.0.1