File size: 3,986 Bytes
4f4d036
 
 
 
297c90d
 
15f0db2
4f4d036
297c90d
 
4f4d036
e4fce0b
15f0db2
 
 
 
 
 
4f4d036
 
 
 
cd18bf6
 
 
 
 
4f4d036
 
a749ad1
4f4d036
297c90d
4f4d036
 
 
15f0db2
 
 
 
 
 
bdc9b47
15f0db2
 
 
 
 
 
 
bdc9b47
 
cd18bf6
 
 
 
bdc9b47
 
 
 
 
15f0db2
 
4f4d036
15f0db2
 
e4fce0b
2cb7341
 
 
15f0db2
2cb7341
15f0db2
2cb7341
4f4d036
15f0db2
e4fce0b
15f0db2
2cb7341
 
 
 
15f0db2
4f4d036
15f0db2
4f4d036
 
15f0db2
916a98f
15f0db2
916a98f
15f0db2
 
06bd9b9
15f0db2
 
 
 
 
4f4d036
15f0db2
 
 
 
 
 
 
 
 
 
 
 
e4fce0b
15f0db2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os

import hydra

import aiflows
from aiflows.backends.api_info import ApiInfo
from aiflows.utils.general_helpers import read_yaml_file, quick_load_api_keys

from aiflows import logging
from aiflows.flow_cache import CACHING_PARAMETERS, clear_cache

from aiflows.utils import serving
from aiflows.workers import run_dispatch_worker_thread
from aiflows.messages import FlowMessage
from aiflows.interfaces import KeyInterface
from aiflows.utils.colink_utils import start_colink_server
from aiflows.workers import run_dispatch_worker_thread

CACHING_PARAMETERS.do_caching = False  # Set to True in order to disable caching
# clear_cache() # Uncomment this line to clear the cache

logging.set_verbosity_debug()
# Comment out if using huggingface backend
# The set_verbose() method is used for debugging and logging the outputs and drop_params() method automatically drops the parameters which are not set my litellm (since the response format of OpenAI and huggingface is different we require this to translate Huggingface to OpenAI response format).
# import litellm
# litellm.set_verbose=True
# litellm.drop_params=True

dependencies = [
    {"url": "aiflows/ChatFlowModule", "revision": os.getcwd()},
]
from aiflows import flow_verse
flow_verse.sync_dependencies(dependencies)

if __name__ == "__main__":
   
    #1. ~~~~~ Set up a colink server ~~~~
    FLOW_MODULES_PATH = "./"
    
    cl = start_colink_server()
    

    #2. ~~~~~Load flow config~~~~~~
    root_dir = "."
    cfg_path = os.path.join(root_dir, "demo.yaml")
    cfg = read_yaml_file(cfg_path)
    
    #2.1 ~~~ Set the API information ~~~
    # OpenAI backend
    api_information = [ApiInfo(backend_used="openai",
                              api_key = os.getenv("OPENAI_API_KEY"))]
    #Huggingface backend
    # Here the API_BASE is the api link to your hosted server. By this way, you can run any flows on your own server.
    # api_information = [ApiInfo(backend_used="huggingface",
    #                           api_key = os.getenv("HUGGINGFACE_API_KEY"), api_base="http://0.0.0.0:5000/v1/completions")]
    # # Azure backend
    # api_information = ApiInfo(backend_used = "azure",
    #                           api_base = os.getenv("AZURE_API_BASE"),
    #                           api_key = os.getenv("AZURE_OPENAI_KEY"),
    #                           api_version =  os.getenv("AZURE_API_VERSION") )
    
    quick_load_api_keys(cfg, api_information, key="api_infos")

    
    #3. ~~~~ Serve The Flow ~~~~
    serving.serve_flow(
        cl=cl,
        flow_class_name="flow_modules.aiflows.ChatFlowModule.ChatAtomicFlow",
        flow_endpoint="ChatAtomicFlow",
    )

    #4. ~~~~~Start A Worker Thread~~~~~
    run_dispatch_worker_thread(cl)

    #5. ~~~~~Mount the flow and get its proxy~~~~~~
    proxy_flow= serving.get_flow_instance(
        cl=cl,
        flow_endpoint="ChatAtomicFlow",
        user_id="local",
        config_overrides= cfg
    )
    

    #6. ~~~ Get the data ~~~
    data = {"id": 0, "question": "What is the capital of France?"}  # This can be a list of samples
    # data = {"id": 0, "question": "Who was the NBA champion in 2023?"}  # This can be a list of samples
    
 
    #option2: use the proxy_flow
    input_message = proxy_flow.package_input_message(data = data)
    
    #7. ~~~ Run inference ~~~
    future = proxy_flow.get_reply_future(input_message)
    
    #uncomment this line if you would like to get the full message back
    #reply_message = future.get_message()
    reply_data = future.get_data()
    
    # ~~~ Print the output ~~~
    print("~~~~~~Reply~~~~~~")
    print(reply_data)
    
    
    #8. ~~~~ (Optional) apply output interface on reply ~~~~
    # output_interface = KeyInterface(
    #     keys_to_rename={"api_output": "answer"},
    # )
    # print("Output: ", output_interface(reply_data))
    
    
    #9. ~~~~~Optional: Unserve Flow~~~~~~
    # serving.delete_served_flow(cl, "ReverseNumberAtomicFlow_served")