awacke1 commited on
Commit
82cc38a
β€’
1 Parent(s): 8e4132b

Create backup14.addedArxivOnly.app.py

Browse files
Files changed (1) hide show
  1. backup14.addedArxivOnly.app.py +382 -0
backup14.addedArxivOnly.app.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import anthropic, openai, base64, cv2, glob, json, math, os, pytz, random, re, requests, textract, time, zipfile
3
+ import plotly.graph_objects as go
4
+ import streamlit.components.v1 as components
5
+ from datetime import datetime
6
+ from audio_recorder_streamlit import audio_recorder
7
+ from bs4 import BeautifulSoup
8
+ from collections import deque
9
+ from dotenv import load_dotenv
10
+ from gradio_client import Client
11
+ from huggingface_hub import InferenceClient
12
+ from io import BytesIO
13
+ from PIL import Image
14
+ from PyPDF2 import PdfReader
15
+ from urllib.parse import quote
16
+ from xml.etree import ElementTree as ET
17
+ from openai import OpenAI
18
+ import extra_streamlit_components as stx
19
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
20
+
21
+ # πŸ”§ Config & Setup
22
+ st.set_page_config(
23
+ page_title="🚲BikeAIπŸ† Claude/GPT Research",
24
+ page_icon="πŸš²πŸ†",
25
+ layout="wide",
26
+ initial_sidebar_state="auto",
27
+ menu_items={
28
+ 'Get Help': 'https://huggingface.co/awacke1',
29
+ 'Report a bug': 'https://huggingface.co/spaces/awacke1',
30
+ 'About': "🚲BikeAIπŸ† Claude/GPT Research AI"
31
+ }
32
+ )
33
+ load_dotenv()
34
+ openai.api_key = os.getenv('OPENAI_API_KEY') or st.secrets['OPENAI_API_KEY']
35
+ anthropic_key = os.getenv("ANTHROPIC_API_KEY_3") or st.secrets["ANTHROPIC_API_KEY"]
36
+ claude_client = anthropic.Anthropic(api_key=anthropic_key)
37
+ openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
38
+ HF_KEY = os.getenv('HF_KEY')
39
+ API_URL = os.getenv('API_URL')
40
+
41
+ st.session_state.setdefault('transcript_history', [])
42
+ st.session_state.setdefault('chat_history', [])
43
+ st.session_state.setdefault('openai_model', "gpt-4o-2024-05-13")
44
+ st.session_state.setdefault('messages', [])
45
+ st.session_state.setdefault('last_voice_input', "")
46
+
47
+ # 🎨 Minimal Custom CSS
48
+ st.markdown("""
49
+ <style>
50
+ .main { background: linear-gradient(to right, #1a1a1a, #2d2d2d); color: #fff; }
51
+ .stMarkdown { font-family: 'Helvetica Neue', sans-serif; }
52
+ </style>
53
+ """, unsafe_allow_html=True)
54
+
55
+ # πŸ”‘ Common Utilities
56
+ def generate_filename(prompt, file_type="md"):
57
+ ctz = pytz.timezone('US/Central')
58
+ date_str = datetime.now(ctz).strftime("%m%d_%H%M")
59
+ safe = re.sub(r'[<>:"/\\\\|?*\n]', ' ', prompt)
60
+ safe = re.sub(r'\s+', ' ', safe).strip()[:90]
61
+ return f"{date_str}_{safe}.{file_type}"
62
+
63
+ def create_file(filename, prompt, response):
64
+ with open(filename, 'w', encoding='utf-8') as f:
65
+ f.write(prompt + "\n\n" + response)
66
+
67
+ def get_download_link(file):
68
+ with open(file, "rb") as f:
69
+ b64 = base64.b64encode(f.read()).decode()
70
+ return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file)}">πŸ“‚ Download {os.path.basename(file)}</a>'
71
+
72
+ @st.cache_resource
73
+ def speech_synthesis_html(result):
74
+ html_code = f"""
75
+ <html><body>
76
+ <script>
77
+ var msg = new SpeechSynthesisUtterance("{result.replace('"', '')}");
78
+ window.speechSynthesis.speak(msg);
79
+ </script>
80
+ </body></html>
81
+ """
82
+ components.html(html_code, height=0)
83
+
84
+ def process_image(image_path, user_prompt):
85
+ with open(image_path, "rb") as imgf:
86
+ image_data = imgf.read()
87
+ b64img = base64.b64encode(image_data).decode("utf-8")
88
+ resp = openai_client.chat.completions.create(
89
+ model=st.session_state["openai_model"],
90
+ messages=[
91
+ {"role": "system", "content": "You are a helpful assistant."},
92
+ {"role": "user", "content": [
93
+ {"type": "text", "text": user_prompt},
94
+ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64img}"}}
95
+ ]}
96
+ ],
97
+ temperature=0.0,
98
+ )
99
+ return resp.choices[0].message.content
100
+
101
+ def process_audio(audio_path):
102
+ with open(audio_path, "rb") as f:
103
+ transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
104
+ st.session_state.messages.append({"role": "user", "content": transcription.text})
105
+ return transcription.text
106
+
107
+ def process_video(video_path, seconds_per_frame=1):
108
+ vid = cv2.VideoCapture(video_path)
109
+ total = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
110
+ fps = vid.get(cv2.CAP_PROP_FPS)
111
+ skip = int(fps*seconds_per_frame)
112
+ frames_b64 = []
113
+ for i in range(0, total, skip):
114
+ vid.set(cv2.CAP_PROP_POS_FRAMES, i)
115
+ ret, frame = vid.read()
116
+ if not ret: break
117
+ _, buf = cv2.imencode(".jpg", frame)
118
+ frames_b64.append(base64.b64encode(buf).decode("utf-8"))
119
+ vid.release()
120
+ return frames_b64
121
+
122
+ def process_video_with_gpt(video_path, prompt):
123
+ frames = process_video(video_path)
124
+ resp = openai_client.chat.completions.create(
125
+ model=st.session_state["openai_model"],
126
+ messages=[
127
+ {"role":"system","content":"Analyze video frames."},
128
+ {"role":"user","content":[
129
+ {"type":"text","text":prompt},
130
+ *[{"type":"image_url","image_url":{"url":f"data:image/jpeg;base64,{fr}"}} for fr in frames]
131
+ ]}
132
+ ]
133
+ )
134
+ return resp.choices[0].message.content
135
+
136
+ def search_arxiv(query):
137
+ st.write("πŸ” Searching ArXiv...")
138
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
139
+ r1 = client.predict(prompt=query, llm_model_picked="mistralai/Mixtral-8x7B-Instruct-v0.1", stream_outputs=True, api_name="/ask_llm")
140
+ st.markdown("### Mistral-8x7B-Instruct-v0.1 Result")
141
+ st.markdown(r1)
142
+ r2 = client.predict(prompt=query, llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", stream_outputs=True, api_name="/ask_llm")
143
+ st.markdown("### Mistral-7B-Instruct-v0.2 Result")
144
+ st.markdown(r2)
145
+ return f"{r1}\n\n{r2}"
146
+
147
+ def perform_ai_lookup(q):
148
+ start = time.time()
149
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
150
+ # Perform a RAG-based search
151
+ r = client.predict(q,20,"Semantic Search","mistralai/Mixtral-8x7B-Instruct-v0.1",api_name="/update_with_rag_md")
152
+ refs = r[0]
153
+ # Ask model for answer
154
+ r2 = client.predict(q,"mistralai/Mixtral-8x7B-Instruct-v0.1",True,api_name="/ask_llm")
155
+ result = f"### πŸ”Ž {q}\n\n{r2}\n\n{refs}"
156
+ # Speak results
157
+ speech_synthesis_html(r2)
158
+
159
+ # Attempt to speak summaries and titles from refs
160
+ # Assuming refs contain a set of references in Markdown with possible titles.
161
+ # We'll just re-speak refs as "summaries".
162
+ summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
163
+ speech_synthesis_html(summaries_text)
164
+
165
+ # Extract titles from refs (looking for markdown links [Title](URL))
166
+ titles = []
167
+ for line in refs.split('\n'):
168
+ m = re.search(r"\[([^\]]+)\]", line)
169
+ if m:
170
+ titles.append(m.group(1))
171
+ if titles:
172
+ titles_text = "Here are the titles of the papers: " + ", ".join(titles)
173
+ speech_synthesis_html(titles_text)
174
+
175
+ st.markdown(result)
176
+ elapsed = time.time()-start
177
+ st.write(f"Elapsed: {elapsed:.2f} s")
178
+ fn = generate_filename(q,"md")
179
+ create_file(fn,q,result)
180
+ return result
181
+
182
+ def process_with_gpt(text):
183
+ if not text: return
184
+ st.session_state.messages.append({"role":"user","content":text})
185
+ with st.chat_message("user"):
186
+ st.markdown(text)
187
+ with st.chat_message("assistant"):
188
+ c = openai_client.chat.completions.create(
189
+ model=st.session_state["openai_model"],
190
+ messages=st.session_state.messages,
191
+ stream=False
192
+ )
193
+ ans = c.choices[0].message.content
194
+ st.write("GPT-4o: " + ans)
195
+ create_file(generate_filename(text,"md"),text,ans)
196
+ st.session_state.messages.append({"role":"assistant","content":ans})
197
+ return ans
198
+
199
+ def process_with_claude(text):
200
+ if not text: return
201
+ with st.chat_message("user"):
202
+ st.markdown(text)
203
+ with st.chat_message("assistant"):
204
+ r = claude_client.messages.create(
205
+ model="claude-3-sonnet-20240229",
206
+ max_tokens=1000,
207
+ messages=[{"role":"user","content":text}]
208
+ )
209
+ ans = r.content[0].text
210
+ st.write("Claude: " + ans)
211
+ create_file(generate_filename(text,"md"),text,ans)
212
+ st.session_state.chat_history.append({"user":text,"claude":ans})
213
+ return ans
214
+
215
+ def create_zip_of_files(files):
216
+ zip_name = "all_files.zip"
217
+ with zipfile.ZipFile(zip_name,'w') as z:
218
+ for f in files: z.write(f)
219
+ return zip_name
220
+
221
+ def get_media_html(p,typ="video",w="100%"):
222
+ d = base64.b64encode(open(p,'rb').read()).decode()
223
+ if typ=="video":
224
+ return f'<video width="{w}" controls autoplay muted loop><source src="data:video/mp4;base64,{d}" type="video/mp4"></video>'
225
+ else:
226
+ return f'<audio controls style="width:{w};"><source src="data:audio/mpeg;base64,{d}" type="audio/mpeg"></audio>'
227
+
228
+ def create_media_gallery():
229
+ st.header("🎬 Media Gallery")
230
+ tabs = st.tabs(["πŸ–ΌοΈ Images", "🎡 Audio", "πŸŽ₯ Video"])
231
+ with tabs[0]:
232
+ imgs = glob.glob("*.png")+glob.glob("*.jpg")
233
+ if imgs:
234
+ c = st.slider("Cols",1,5,3)
235
+ cols = st.columns(c)
236
+ for i,f in enumerate(imgs):
237
+ with cols[i%c]:
238
+ st.image(Image.open(f),use_container_width=True)
239
+ if st.button(f"πŸ‘€ Analyze {os.path.basename(f)}"):
240
+ a = process_image(f,"Describe this image.")
241
+ st.markdown(a)
242
+ with tabs[1]:
243
+ auds = glob.glob("*.mp3")+glob.glob("*.wav")
244
+ for a in auds:
245
+ with st.expander(f"🎡 {os.path.basename(a)}"):
246
+ st.markdown(get_media_html(a,"audio"),unsafe_allow_html=True)
247
+ if st.button(f"Transcribe {os.path.basename(a)}"):
248
+ t = process_audio(a)
249
+ st.write(t)
250
+ with tabs[2]:
251
+ vids = glob.glob("*.mp4")
252
+ for v in vids:
253
+ with st.expander(f"πŸŽ₯ {os.path.basename(v)}"):
254
+ st.markdown(get_media_html(v,"video"),unsafe_allow_html=True)
255
+ if st.button(f"Analyze {os.path.basename(v)}"):
256
+ a = process_video_with_gpt(v,"Describe video.")
257
+ st.markdown(a)
258
+
259
+ def display_file_manager():
260
+ st.sidebar.title("πŸ“ File Management")
261
+ files = sorted(glob.glob("*.md"),reverse=True)
262
+ if st.sidebar.button("πŸ—‘ Delete All"):
263
+ for f in files: os.remove(f)
264
+ st.experimental_rerun()
265
+ if st.sidebar.button("⬇️ Download All"):
266
+ z= create_zip_of_files(files)
267
+ st.sidebar.markdown(get_download_link(z),unsafe_allow_html=True)
268
+ for f in files:
269
+ col1,col2,col3,col4 = st.sidebar.columns([1,3,1,1])
270
+ with col1:
271
+ if st.button("🌐",key="v"+f):
272
+ st.session_state.current_file=f
273
+ c=open(f,'r',encoding='utf-8').read()
274
+ st.write(c)
275
+ with col2:
276
+ st.markdown(get_download_link(f),unsafe_allow_html=True)
277
+ with col3:
278
+ if st.button("πŸ“‚",key="e"+f):
279
+ st.session_state.current_file=f
280
+ st.session_state.file_content=open(f,'r',encoding='utf-8').read()
281
+ with col4:
282
+ if st.button("πŸ—‘",key="d"+f):
283
+ os.remove(f)
284
+ st.experimental_rerun()
285
+
286
+ def main():
287
+ st.sidebar.markdown("### 🚲BikeAIπŸ† Multi-Agent Research AI")
288
+ tab_main = st.radio("Action:",["🎀 Voice Input","πŸ“Έ Media Gallery","πŸ” Search ArXiv","πŸ“ File Editor"],horizontal=True)
289
+
290
+ # Changed model order and default:
291
+ model_choice = st.sidebar.radio("AI Model:", ["Arxiv","GPT-4o","Claude-3","GPT+Claude+Arxiv"], index=0)
292
+
293
+ # Speech-to-Text component placeholder (example)
294
+ mycomponent = components.declare_component("mycomponent", path="mycomponent")
295
+ val = mycomponent(my_input_value="Hello")
296
+ if val:
297
+ user_input = val
298
+ if model_choice == "GPT-4o":
299
+ process_with_gpt(user_input)
300
+ elif model_choice == "Claude-3":
301
+ process_with_claude(user_input)
302
+ elif model_choice == "Arxiv":
303
+ # Just Arxiv on its own, full column, speak results
304
+ st.subheader("Arxiv Only Results:")
305
+ perform_ai_lookup(user_input)
306
+ else:
307
+ # GPT+Claude+Arxiv
308
+ col1,col2,col3=st.columns(3)
309
+ with col1:
310
+ st.subheader("GPT-4o Omni:")
311
+ try: process_with_gpt(user_input)
312
+ except: st.write('GPT 4o error')
313
+ with col2:
314
+ st.subheader("Claude-3 Sonnet:")
315
+ try: process_with_claude(user_input)
316
+ except: st.write('Claude error')
317
+ with col3:
318
+ st.subheader("Arxiv + Mistral:")
319
+ try:
320
+ r = perform_ai_lookup(user_input)
321
+ st.markdown(r)
322
+ except:
323
+ st.write("Arxiv error")
324
+
325
+ if tab_main == "🎀 Voice Input":
326
+ st.subheader("🎀 Voice Recognition")
327
+ user_text = st.text_area("Message:", height=100)
328
+ if st.button("Send πŸ“¨"):
329
+ if user_text:
330
+ if model_choice == "GPT-4o":
331
+ process_with_gpt(user_text)
332
+ elif model_choice == "Claude-3":
333
+ process_with_claude(user_text)
334
+ elif model_choice == "Arxiv":
335
+ st.subheader("Arxiv Only Results:")
336
+ perform_ai_lookup(user_text)
337
+ else:
338
+ # GPT+Claude+Arxiv
339
+ col1,col2,col3=st.columns(3)
340
+ with col1:
341
+ st.subheader("GPT-4o Omni:")
342
+ process_with_gpt(user_text)
343
+ with col2:
344
+ st.subheader("Claude-3 Sonnet:")
345
+ process_with_claude(user_text)
346
+ with col3:
347
+ st.subheader("Arxiv & Mistral:")
348
+ res = perform_ai_lookup(user_text)
349
+ st.markdown(res)
350
+ st.subheader("πŸ“œ Chat History")
351
+ t1,t2=st.tabs(["Claude History","GPT-4o History"])
352
+ with t1:
353
+ for c in st.session_state.chat_history:
354
+ st.write("**You:**", c["user"])
355
+ st.write("**Claude:**", c["claude"])
356
+ with t2:
357
+ for m in st.session_state.messages:
358
+ with st.chat_message(m["role"]):
359
+ st.markdown(m["content"])
360
+
361
+ elif tab_main == "πŸ“Έ Media Gallery":
362
+ create_media_gallery()
363
+
364
+ elif tab_main == "πŸ” Search ArXiv":
365
+ q=st.text_input("Research query:")
366
+ if q:
367
+ r=search_arxiv(q)
368
+ st.markdown(r)
369
+
370
+ elif tab_main == "πŸ“ File Editor":
371
+ if getattr(st.session_state,'current_file',None):
372
+ st.subheader(f"Editing: {st.session_state.current_file}")
373
+ new_text = st.text_area("Content:", st.session_state.file_content, height=300)
374
+ if st.button("Save"):
375
+ with open(st.session_state.current_file,'w',encoding='utf-8') as f:
376
+ f.write(new_text)
377
+ st.success("Updated!")
378
+
379
+ display_file_manager()
380
+
381
+ if __name__=="__main__":
382
+ main()