File size: 6,878 Bytes
23b91c1
 
 
 
85e4c9e
560c065
23b91c1
85e4c9e
392e047
560c065
23b91c1
 
 
85e4c9e
 
 
 
 
560c065
 
 
85e4c9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560c065
85e4c9e
 
 
560c065
85e4c9e
 
 
 
 
aef15aa
 
85e4c9e
 
 
23b91c1
560c065
85e4c9e
560c065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85e4c9e
 
 
 
 
 
 
 
 
 
 
560c065
23b91c1
85e4c9e
560c065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85e4c9e
23b91c1
2dbe175
85e4c9e
560c065
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import gradio as gr
from threading import Thread
import random
import llama_cpp
import os
import spaces
randtxt = ""
print("downloading!")
#os.system("wget https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q3_K_L.gguf")
llama = llama_cpp.Llama("Meta-Llama-3-8B-Instruct.Q4_0.gguf", chat_format="llama-3")

def randomize():
    global randtxt
    while True:
        print("generating")
        genTurn()


#chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences. "},
#         {"role": "user", "content": "berry: Good morning"}] # POV: llama is "assistant" 
#print(chat[len(chat)-1]["content"])


#llama

def reversechat(chat):
    nchat = []
    nchat.append(chat[0])
    for msg in chat:
        nmsg = msg
        if nmsg["role"] == "user":
            nmsg["role"] = "assistant"
        else:
            nmsg["role"] = "user"
        if msg["role"] != "system":
            nchat.append(nmsg)
    return nchat[1:]

chara = "a"
def genTurn():
    global chara
    global chat
    try:
        if chara == "a":
            msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
            chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
        else:
            #Arteex
            msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
            chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
        print(msg)
    except:
        print("this chat is over now :(")
        chara ="a"
        chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."},
         {"role": "user", "content": "berry: Good afternoon!"}]




def watch(prompt):
            global chara
            global chat
            prompt.append(("hi", "yo"))
            return prompt


            
            chat[0] = {"role": "system", "content": prompt}
            chat[1] = {"role": "user", "content": c2 + ": " + msg1}

            #Generate message
            try:
                if chara == "a":
                    msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
                else:
                    #Arteex
                    msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
                print(msg)
            except:
                print("this chat is over now :(")
                chara ="a"
                chat = [{"role": "system", "content": prompt},
                {"role": "user", "content": c2 + ": " + msg1}]



            msgsview = []
            for msg in chat:
                if msg["role"] == "system":
                    pass
                else:
                    if not msg["content"].lower().startswith("llama:"):
                        msgsview.append((msg["content"], None))
                    else:
                        msgsview.append((None, msg["content"]))
            yield msgsview

#demo = gr.Interface(watch,inputs=None, outputs=gr.Chatbot(), live=True, description="click generate to show latest chat!", title="LlamaLive, watch an llm conversation!")

#randomize()

import time
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    clear = gr.ClearButton([chatbot])
    btn = gr.Button()
    stopbtn = gr.Button("Stop")
    iprompt=""
    stop = 0
    def stp():
        global stop
        stop=1
    stopbtn.click(None, js="window.location.reload()")

    @spaces.GPU
    def watch(prompt):
            global chara
            global chat
            c1= "berry"
            c2= "llama"
            msg1="Good Morning!"
            
            chat = [{"role": "system", "content": "The following is a never-ending chat between Berry and Llama. Berry is the personification of a raspberry. Llama is Berry's best friend. They already know each other well. The chat will not end but may cut to a later date after a chat. They try to use relatively short responses no longer than 5 sentences."}, {"role": "user", "content": "berry: Good Morning!"}]
            for i in prompt:
                if i[0] != None:
                    chat.append({"role": "user", "content": i[0]})
                if i[1] != None:
                    chat.append({"role": "assistant", "content": i[1]})

            #Generate message
            
            try:
                if chara == "a":
                    msg = llama.create_chat_completion(chat, max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "assistant", "content": msg.removesuffix("<|eot_id|>")})
                else:
                    #Arteex
                    msg = llama.create_chat_completion(reversechat(chat), max_tokens=200)["choices"][0]["message"]["content"]
                    chat.append({"role": "user", "content": msg.removesuffix("<|eot_id|>")})
                print(msg)
            except:
                print("this chat is over now :(")
                chara ="a"
                chat = [{"role": "system", "content": prompt},
                {"role": "user", "content": c2 + ": " + msg1}]



            msgsview = []
            for msg in chat:
                if msg["role"] == "system":
                    pass
                else:
                    if not msg["content"].lower().startswith("llama:"):
                        msgsview.append((msg["content"], None))
                    else:
                        msgsview.append((None, msg["content"]))
            return msgsview
    btn.click(watch, [chatbot], [chatbot])
    chatbot.change(watch, [chatbot], [chatbot])

if __name__ == "__main__":
    demo.launch()

exit()







print(chat)

if __name__ == "__main__":
    
    #Thread(target=randomize).start() bad idea running llm 24/7 for no reason
    with gr.Blocks() as demo:
        gr.Markdown("# LlamaLive\nwatch a live interaction between 2 chatbots!")
        cb = gr.Chatbot()
        cb.value=([(None, "testing")])
        btn = gr.Button()
        btn.click(watch, inputs=[cb], outputs=[cb])
    demo.launch()