Finance

Running on Zero

File size: 5,771 Bytes

5416372
a2e6c05
 
d81ed7c
a2e6c05
d81ed7c
a2e6c05
 
 
20636b9
 
d777e50
 
a2e6c05
 
 
 
 
 
 
5fe2c9a
 
3b39700
a2e6c05
5fe2c9a
63ba25e
a2e6c05
e976361
63ba25e
a2e6c05
d81ed7c
 
90e8d67
 
 
8b77502
90e8d67
1240624
2a18dea
36f3f97
e1de38d
1240624
90e8d67
 
583461f
c2e1d70
d81ed7c
 
 
 
 
 
e976361
 
a2e6c05
d81ed7c
a2e6c05
 
 
 
 
 
0a910e6
 
 
a2e6c05
 
e976361
 
a2e6c05
e976361
4993069
a2e6c05
 
d81ed7c
a2e6c05
 
db00aa2
a2e6c05
 
 
 
 
e976361
 
 
a2e6c05
e976361
a2e6c05
d81ed7c
a2e6c05
d81ed7c
a2e6c05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d81ed7c
 
 
 
e976361
d81ed7c
 
 
 
 
 
e976361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d81ed7c
20636b9
d81ed7c
b23a519
 
 
88a7fc3
18e5a55
5619915
18e5a55
3b39700
014d21e
 
a2e6c05
 
 
 
 
 
09dc1cd
08a4d08
d81ed7c
 
2c8259d

import spaces
import json
import subprocess
import gradio as gr
from huggingface_hub import hf_hub_download

subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)

hf_hub_download(repo_id="baconnier/Finance_dolphin-2.9.1-yi-1.5-34b_GGUF", filename="Finance_dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf",  local_dir = "./models")
hf_hub_download(repo_id="baconnier/Finance_dolphin-2.9.1-yi-1.5-9b_GGUF", filename="Finance_dolphin-2.9.1-yi-1.5-9b_Q8_0.gguf",  local_dir = "./models")
#hf_hub_download(repo_id="baconnier/finance_dolphin_orpo_llama3_8B_r64_51K_GGUF", filename="finance_dolphin_orpo_llama3_8B_r64_51K_GGUF-unsloth.Q8_0.gguf",  local_dir = "./models")
#hf_hub_download(repo_id="crusoeai/dolphin-2.9.1-llama-3-8b-GGUF", filename="dolphin-2.9.1-llama-3-8b.Q6_K.gguf",  local_dir = "./models")

css = """
.message-row {
    justify-content: space-evenly !important;
}
.message-bubble-border {
    border-radius: 6px !important;
}
.dark.message-bubble-border {
    border-color: #21293b !important;
}
.dark.user {
    background: #0a1120 !important;
}
.dark.assistant {
    background: transparent !important;
}
"""

PLACEHOLDER = """
<div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
    <figure style="margin: 0;">
        <img src="https://huggingface.co/spaces/baconnier/Finance/resolve/main/banker.jpg" style="width: 100%; height: 100%; border-radius: 8px;">
    </figure>
    <div style="padding: .5rem 1.5rem;">
        <img src="https://huggingface.co/spaces/baconnier/Finance/resolve/main/banker_plus.jpg" style="width: 100%; height: 10%; border-radius: 8px;">    
        <h2 style="text-align: left; font-size: 1.5rem; font-weight: 700; margin-bottom: 0.5rem;"> </h2>
        <p style="text-align: left; font-size: 16px; line-height: 1.5; margin-bottom: 15px;">Banker++ is trained to act like a Senior Banker. Use this template for learning purposes only.</p>
    </div>    
</div>
"""

@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    max_tokens,
    temperature,
    top_p,
    top_k,
    repeat_penalty,
    model,
):
    from llama_cpp import Llama
    from llama_cpp_agent import LlamaCppAgent
    from llama_cpp_agent import MessagesFormatterType
    from llama_cpp_agent.providers import LlamaCppPythonProvider
    from llama_cpp_agent.chat_history import BasicChatHistory
    from llama_cpp_agent.chat_history.messages import Roles
    print(message)
    print(history)
    
    llm = Llama(
        model_path=f"models/{model}",
        flash_attn=True,
        n_threads=40,
        n_gpu_layers=81,
        n_batch=1024,
        n_ctx=8192,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt="You are Alan, a financial analyst.",
        predefined_messages_formatter_type=MessagesFormatterType.CHATML,
        debug_output=True
    )
    
    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }
        messages.add_message(user)
        messages.add_message(assistant)
    
    stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
    
    outputs = ""
    for output in stream:
        outputs += output
        yield outputs

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p",
        ),
        gr.Slider(
            minimum=0,
            maximum=100,
            value=40,
            step=1,
            label="Top-k",
        ),
        gr.Slider(
            minimum=0.0,
            maximum=2.0,
            value=1.1,
            step=0.1,
            label="Repetition penalty",
        ),
        gr.Dropdown(["Finance_dolphin-2.9.1-yi-1.5-9b_Q8_0.gguf",'Finance_dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf'], value="Finance_dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf", label="Model"),
    ],
    theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
        body_background_fill_dark="#0f172a",
        block_background_fill_dark="#0f172a",
        block_border_width="1px",
        block_title_background_fill_dark="#070d1b",
        #input_background_fill_dark="#0c1425",
        button_secondary_background_fill_dark="#070d1b",
        border_color_primary_dark="#21293b",
        background_fill_secondary_dark="#0f172a",
        color_accent_soft_dark="transparent"
    ),
    css=css,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Send",
    description="BANKER++ is fine-tuned on Cognitive Computation: Chat Dolphin 🐬 2.9.1-yi-1.5-34b",
    chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
)

if __name__ == "__main__": 
    demo.launch()