FM-1976 commited on
Commit
56bdf8f
β€’
1 Parent(s): 6ac4c3d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -0
app.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from llama_cpp import Llama
3
+ import warnings
4
+ warnings.filterwarnings(action='ignore')
5
+ import datetime
6
+ import random
7
+ import string
8
+ from time import sleep
9
+ import tiktoken
10
+ import os
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ # for counting the tokens in the prompt and in the result
14
+ #context_count = len(encoding.encode(yourtext))
15
+ encoding = tiktoken.get_encoding("r50k_base")
16
+
17
+ nCTX = 2048
18
+ sTOPS = ['</s>']
19
+ modelname = "Lite-Mistral-150M-v2-Instruct"
20
+ # Set the webpage title
21
+ st.set_page_config(
22
+ page_title=f"Your LocalGPT ✨ with {modelname}",
23
+ page_icon="🌟",
24
+ layout="wide")
25
+
26
+ if "hf_model" not in st.session_state:
27
+ st.session_state.hf_model = "Lite-Mistral-150M-v2-Instruct"
28
+
29
+ # Initialize chat history
30
+ if "messages" not in st.session_state:
31
+ st.session_state.messages = []
32
+
33
+ if "repeat" not in st.session_state:
34
+ st.session_state.repeat = 1.35
35
+
36
+ if "temperature" not in st.session_state:
37
+ st.session_state.temperature = 0.1
38
+
39
+ if "maxlength" not in st.session_state:
40
+ st.session_state.maxlength = 500
41
+
42
+ if "speed" not in st.session_state:
43
+ st.session_state.speed = 0.0
44
+
45
+ if "modelfile" not in st.session_state:
46
+ modelfile = hf_hub_download(
47
+ repo_id=os.environ.get("REPO_ID", "OuteAI/Lite-Mistral-150M-v2-Instruct-GGUF"),
48
+ filename=os.environ.get("MODEL_FILE", "Lite-Mistral-150M-v2-Instruct-Q8_0.gguf"),
49
+ )
50
+ st.session_state.modelfile = modelfile
51
+
52
+ def writehistory(filename,text):
53
+ with open(filename, 'a', encoding='utf-8') as f:
54
+ f.write(text)
55
+ f.write('\n')
56
+ f.close()
57
+
58
+ def genRANstring(n):
59
+ """
60
+ n = int number of char to randomize
61
+ """
62
+ N = n
63
+ res = ''.join(random.choices(string.ascii_uppercase +
64
+ string.digits, k=N))
65
+ return res
66
+
67
+ @st.cache_resource
68
+ def create_chat():
69
+ # Set HF API token and HF repo
70
+ from llama_cpp import Llama
71
+ client = Llama(
72
+ model_path=st.session_state.modelfile,
73
+ #n_gpu_layers=0,
74
+ temperature=0.1,
75
+ top_p = 0.5,
76
+ n_ctx=nCTX,
77
+ max_tokens=600,
78
+ repeat_penalty=1.18,
79
+ stop=sTOPS,
80
+ verbose=False,
81
+ )
82
+ print('loading Lite-Mistral-150M-v2-Instruct with LlamaCPP...')
83
+ return client
84
+
85
+
86
+ # create THE SESSIoN STATES
87
+ if "logfilename" not in st.session_state:
88
+ ## Logger file
89
+ logfile = f'{genRANstring(5)}_log.txt'
90
+ st.session_state.logfilename = logfile
91
+ #Write in the history the first 2 sessions
92
+ writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with πŸŒ€ {modelname}\n---\n🧠🫑: You are a helpful assistant.')
93
+ writehistory(st.session_state.logfilename,f'πŸŒ€: How may I help you today?')
94
+
95
+
96
+ #AVATARS
97
+ av_us = 'user.png' # './man.png' #"πŸ¦–" #A single emoji, e.g. "πŸ§‘β€πŸ’»", "πŸ€–", "πŸ¦–". Shortcodes are not supported.
98
+ av_ass = 'assistant3002.png' #'./robot.png'
99
+
100
+ ### START STREAMLIT UI
101
+ # Create a header element
102
+ mytitle = '# πŸ”³ OuteAI Local GPT'
103
+ st.markdown(mytitle, unsafe_allow_html=True)
104
+ st.markdown(f'> *🌟 {modelname} with {nCTX} tokens Context window*')
105
+ st.markdown('---')
106
+
107
+ # CREATE THE SIDEBAR
108
+ with st.sidebar:
109
+ st.image('logo300.png', use_column_width=True)
110
+ st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.02)
111
+ st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
112
+ value=500, step=50)
113
+ st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.35, step=0.01)
114
+ st.markdown(f"**Logfile**: {st.session_state.logfilename}")
115
+ statspeed = st.markdown(f'πŸ’« speed: {st.session_state.speed} t/s')
116
+ btnClear = st.button("Clear History",type="primary", use_container_width=True)
117
+
118
+ llm = create_chat()
119
+
120
+ # Display chat messages from history on app rerun
121
+ for message in st.session_state.messages:
122
+ if message["role"] == "user":
123
+ with st.chat_message(message["role"],avatar=av_us):
124
+ st.markdown(message["content"])
125
+ else:
126
+ with st.chat_message(message["role"],avatar=av_ass):
127
+ st.markdown(message["content"])
128
+ # Accept user input
129
+ if myprompt := st.chat_input("What is an AI model?"):
130
+ # Add user message to chat history
131
+ st.session_state.messages.append({"role": "user", "content": myprompt})
132
+ # Display user message in chat message container
133
+ with st.chat_message("user", avatar=av_us):
134
+ st.markdown(myprompt)
135
+ usertext = f"user: {myprompt}"
136
+ writehistory(st.session_state.logfilename,usertext)
137
+ # Display assistant response in chat message container
138
+ with st.chat_message("assistant",avatar=av_ass):
139
+ message_placeholder = st.empty()
140
+ with st.spinner("Thinking..."):
141
+ start = datetime.datetime.now()
142
+ response = ''
143
+ conv_messages = []
144
+ conv_messages.append(st.session_state.messages[-1])
145
+ full_response = ""
146
+ for chunk in llm.create_chat_completion(
147
+ messages=conv_messages,
148
+ temperature=st.session_state.temperature,
149
+ repeat_penalty= st.session_state.repeat,
150
+ stop=sTOPS,
151
+ max_tokens=st.session_state.maxlength,
152
+ stream=True,):
153
+ try:
154
+ if chunk["choices"][0]["delta"]["content"]:
155
+ full_response += chunk["choices"][0]["delta"]["content"]
156
+ message_placeholder.markdown(full_response + "πŸ”³")
157
+ delta = datetime.datetime.now() -start
158
+ totalseconds = delta.total_seconds()
159
+ prompttokens = len(encoding.encode(myprompt))
160
+ assistanttokens = len(encoding.encode(full_response))
161
+ totaltokens = prompttokens + assistanttokens
162
+ st.session_state.speed = totaltokens/totalseconds
163
+ statspeed.markdown(f'πŸ’« speed: {st.session_state.speed:.2f} t/s')
164
+ except:
165
+ pass
166
+
167
+ delta = datetime.datetime.now() - start
168
+ totalseconds = delta.total_seconds()
169
+ prompttokens = len(encoding.encode(myprompt))
170
+ assistanttokens = len(encoding.encode(full_response))
171
+ totaltokens = prompttokens + assistanttokens
172
+ st.session_state.speed = totaltokens/totalseconds
173
+ statspeed.markdown(f'πŸ’« speed: {st.session_state.speed:.3f} t/s')
174
+ toregister = full_response + f"""
175
+ ```
176
+
177
+ 🧾 prompt tokens: {prompttokens}
178
+ πŸ“ˆ generated tokens: {assistanttokens}
179
+ ⏳ generation time: {delta}
180
+ πŸ’« speed: {st.session_state.speed:.2f} t/s
181
+ ```"""
182
+ message_placeholder.markdown(toregister)
183
+ asstext = f"assistant: {toregister}"
184
+ writehistory(st.session_state.logfilename,asstext)
185
+ st.session_state.messages.append({"role": "assistant", "content": toregister})