Spaces:
Sleeping
Sleeping
import streamlit as st | |
import streamlit.components.v1 as components | |
import anthropic | |
import openai | |
import base64 | |
from datetime import datetime | |
import plotly.graph_objects as go | |
import cv2 | |
import glob | |
import json | |
import math | |
import os | |
import pytz | |
import random | |
import re | |
import requests | |
import textract | |
import time | |
import zipfile | |
from audio_recorder_streamlit import audio_recorder | |
from bs4 import BeautifulSoup | |
from collections import deque | |
from dotenv import load_dotenv | |
from gradio_client import Client | |
from huggingface_hub import InferenceClient | |
from io import BytesIO | |
from PIL import Image | |
from PyPDF2 import PdfReader | |
from urllib.parse import quote | |
from xml.etree import ElementTree as ET | |
from openai import OpenAI | |
import extra_streamlit_components as stx | |
from streamlit.runtime.scriptrunner import get_script_run_ctx | |
import extra_streamlit_components as stx | |
# 1. π²BikeAIπ Configuration and Setup | |
Site_Name = 'π²BikeAIπ Claude and GPT Multi-Agent Research AI' | |
title = "π²BikeAIπ Claude and GPT Multi-Agent Research AI" | |
helpURL = 'https://huggingface.co/awacke1' | |
bugURL = 'https://huggingface.co/spaces/awacke1' | |
icons = 'π²π' | |
st.set_page_config( | |
page_title=title, | |
page_icon=icons, | |
layout="wide", | |
initial_sidebar_state="auto", | |
menu_items={ | |
'Get Help': helpURL, | |
'Report a bug': bugURL, | |
'About': title | |
} | |
) | |
def create_speech_component(): | |
"""Create speech recognition component using postMessage for communication.""" | |
speech_recognition_html = """ | |
<div style="padding: 20px;"> | |
<div class="controls"> | |
<button id="start">Start Listening</button> | |
<button id="stop" disabled>Stop Listening</button> | |
<button id="clear">Clear Text</button> | |
</div> | |
<div id="status" style="margin: 10px 0; padding: 10px; background: #e8f5e9;">Ready</div> | |
<div id="output" style="white-space: pre-wrap; padding: 15px; background: #f5f5f5; min-height: 100px; max-height: 400px; overflow-y: auto;"></div> | |
<div id="debug" style="margin-top: 10px; color: #666;"></div> | |
<script> | |
let currentTranscript = ''; | |
const debug = document.getElementById('debug'); | |
function sendTranscriptUpdate() { | |
// Send transcript to parent (Streamlit) | |
window.parent.postMessage({ | |
type: 'transcript_update', | |
data: currentTranscript | |
}, '*'); | |
debug.textContent = `Last update: ${new Date().toLocaleTimeString()} - Length: ${currentTranscript.length}`; | |
} | |
// Set up periodic updates | |
setInterval(sendTranscriptUpdate, 3000); // Send update every 3 seconds | |
const recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)(); | |
const startButton = document.getElementById('start'); | |
const stopButton = document.getElementById('stop'); | |
const clearButton = document.getElementById('clear'); | |
const status = document.getElementById('status'); | |
const output = document.getElementById('output'); | |
recognition.continuous = true; | |
recognition.interimResults = true; | |
startButton.onclick = () => { | |
recognition.start(); | |
status.textContent = 'π€ Listening...'; | |
startButton.disabled = true; | |
stopButton.disabled = false; | |
}; | |
stopButton.onclick = () => { | |
recognition.stop(); | |
status.textContent = 'Stopped'; | |
startButton.disabled = false; | |
stopButton.disabled = true; | |
sendTranscriptUpdate(); // Send final update when stopped | |
}; | |
clearButton.onclick = () => { | |
currentTranscript = ''; | |
output.textContent = ''; | |
sendTranscriptUpdate(); // Send empty transcript | |
}; | |
recognition.onresult = (event) => { | |
let interimTranscript = ''; | |
let finalTranscript = ''; | |
for (let i = event.resultIndex; i < event.results.length; i++) { | |
const transcript = event.results[i][0].transcript; | |
if (event.results[i].isFinal) { | |
finalTranscript += transcript + ' '; | |
currentTranscript += transcript + ' '; | |
} else { | |
interimTranscript += transcript; | |
} | |
} | |
output.textContent = currentTranscript + (interimTranscript ? '... ' + interimTranscript : ''); | |
output.scrollTop = output.scrollHeight; | |
if (finalTranscript) { | |
sendTranscriptUpdate(); // Send update when we have final transcript | |
} | |
}; | |
recognition.onend = () => { | |
if (!stopButton.disabled) { | |
recognition.start(); | |
} | |
}; | |
// Auto-start on load | |
window.addEventListener('load', () => { | |
setTimeout(() => startButton.click(), 1000); | |
}); | |
</script> | |
</div> | |
""" | |
# Return both the component value | |
return components.html( | |
speech_recognition_html, | |
height=400, | |
) | |
def integrate_speech_component(): | |
"""Integrate speech component with session state management.""" | |
if "voice_transcript" not in st.session_state: | |
st.session_state.voice_transcript = "" | |
if "last_update" not in st.session_state: | |
st.session_state.last_update = time.time() | |
# Create placeholders for display | |
transcript_container = st.empty() | |
status_container = st.empty() | |
# Create component | |
component_val = create_speech_component() | |
# Display current transcript | |
current_transcript = st.session_state.voice_transcript | |
transcript_container.text_area( | |
"Voice Transcript:", | |
value=current_transcript, | |
height=100, | |
key=f"transcript_display_{int(time.time())}" | |
) | |
# Show status | |
status_container.text( | |
f"Last updated: {datetime.fromtimestamp(st.session_state.last_update).strftime('%H:%M:%S')}" | |
) | |
return current_transcript | |
# 2. π²BikeAIπ Load environment variables and initialize clients | |
load_dotenv() | |
# OpenAI setup | |
openai.api_key = os.getenv('OPENAI_API_KEY') | |
if openai.api_key == None: | |
openai.api_key = st.secrets['OPENAI_API_KEY'] | |
openai_client = OpenAI( | |
api_key=os.getenv('OPENAI_API_KEY'), | |
organization=os.getenv('OPENAI_ORG_ID') | |
) | |
# 3.π²BikeAIπ Claude setup | |
anthropic_key = os.getenv("ANTHROPIC_API_KEY_3") | |
if anthropic_key == None: | |
anthropic_key = st.secrets["ANTHROPIC_API_KEY"] | |
claude_client = anthropic.Anthropic(api_key=anthropic_key) | |
# 4.π²BikeAIπ Initialize session states | |
if 'transcript_history' not in st.session_state: | |
st.session_state.transcript_history = [] | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
if "openai_model" not in st.session_state: | |
st.session_state["openai_model"] = "gpt-4o-2024-05-13" | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
if 'last_voice_input' not in st.session_state: | |
st.session_state.last_voice_input = "" | |
# 5. π²BikeAIπ HuggingFace AI setup | |
API_URL = os.getenv('API_URL') | |
HF_KEY = os.getenv('HF_KEY') | |
MODEL1 = "meta-llama/Llama-2-7b-chat-hf" | |
MODEL2 = "openai/whisper-small.en" | |
headers = { | |
"Authorization": f"Bearer {HF_KEY}", | |
"Content-Type": "application/json" | |
} | |
# 6. π²BikeAIπ Custom CSS | |
st.markdown(""" | |
<style> | |
.main { | |
background: linear-gradient(to right, #1a1a1a, #2d2d2d); | |
color: #ffffff; | |
} | |
.stMarkdown { | |
font-family: 'Helvetica Neue', sans-serif; | |
} | |
.category-header { | |
background: linear-gradient(45deg, #2b5876, #4e4376); | |
padding: 20px; | |
border-radius: 10px; | |
margin: 10px 0; | |
} | |
.scene-card { | |
background: rgba(0,0,0,0.3); | |
padding: 15px; | |
border-radius: 8px; | |
margin: 10px 0; | |
border: 1px solid rgba(255,255,255,0.1); | |
} | |
.media-gallery { | |
display: grid; | |
gap: 1rem; | |
padding: 1rem; | |
} | |
.bike-card { | |
background: rgba(255,255,255,0.05); | |
border-radius: 10px; | |
padding: 15px; | |
transition: transform 0.3s; | |
} | |
.bike-card:hover { | |
transform: scale(1.02); | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# 7. Helper Functions | |
def generate_filename(prompt, file_type): | |
"""Generate a safe filename using the prompt and file type.""" | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
replaced_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt) | |
safe_prompt = re.sub(r'\s+', ' ', replaced_prompt).strip()[:230] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# 8. Function to create and save a file (and avoid the black hole of lost data π³) | |
def create_file(filename, prompt, response, should_save=True): | |
if not should_save: | |
return | |
with open(filename, 'w', encoding='utf-8') as file: | |
file.write(prompt + "\n\n" + response) | |
def create_and_save_file(content, file_type="md", prompt=None, is_image=False, should_save=True): | |
"""Create and save file with proper handling of different types.""" | |
if not should_save: | |
return None | |
filename = generate_filename(prompt if prompt else content, file_type) | |
with open(filename, "w", encoding="utf-8") as f: | |
if is_image: | |
f.write(content) | |
else: | |
f.write(prompt + "\n\n" + content if prompt else content) | |
return filename | |
def get_download_link(file_path): | |
"""Create download link for file.""" | |
with open(file_path, "rb") as file: | |
contents = file.read() | |
b64 = base64.b64encode(contents).decode() | |
return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}π</a>' | |
def SpeechSynthesis(result): | |
"""HTML5 Speech Synthesis.""" | |
documentHTML5 = f''' | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Read It Aloud</title> | |
<script type="text/javascript"> | |
function readAloud() {{ | |
const text = document.getElementById("textArea").value; | |
const speech = new SpeechSynthesisUtterance(text); | |
window.speechSynthesis.speak(speech); | |
}} | |
</script> | |
</head> | |
<body> | |
<h1>π Read It Aloud</h1> | |
<textarea id="textArea" rows="10" cols="80">{result}</textarea> | |
<br> | |
<button onclick="readAloud()">π Read Aloud</button> | |
</body> | |
</html> | |
''' | |
components.html(documentHTML5, width=1280, height=300) | |
# Media Processing Functions | |
def process_image(image_input, user_prompt): | |
"""Process image with GPT-4o vision.""" | |
if isinstance(image_input, str): | |
with open(image_input, "rb") as image_file: | |
image_input = image_file.read() | |
base64_image = base64.b64encode(image_input).decode("utf-8") | |
response = openai_client.chat.completions.create( | |
model=st.session_state["openai_model"], | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant that responds in Markdown."}, | |
{"role": "user", "content": [ | |
{"type": "text", "text": user_prompt}, | |
{"type": "image_url", "image_url": { | |
"url": f"data:image/png;base64,{base64_image}" | |
}} | |
]} | |
], | |
temperature=0.0, | |
) | |
return response.choices[0].message.content | |
def process_audio(audio_input, text_input=''): | |
"""Process audio with Whisper and GPT.""" | |
if isinstance(audio_input, str): | |
with open(audio_input, "rb") as file: | |
audio_input = file.read() | |
transcription = openai_client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_input, | |
) | |
st.session_state.messages.append({"role": "user", "content": transcription.text}) | |
with st.chat_message("assistant"): | |
st.markdown(transcription.text) | |
SpeechSynthesis(transcription.text) | |
filename = generate_filename(transcription.text, "wav") | |
create_and_save_file(audio_input, "wav", transcription.text, True) | |
# Modified video processing function without moviepy dependency | |
def process_video(video_path, seconds_per_frame=1): | |
"""Process video files for frame extraction.""" | |
base64Frames = [] | |
video = cv2.VideoCapture(video_path) | |
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) | |
fps = video.get(cv2.CAP_PROP_FPS) | |
frames_to_skip = int(fps * seconds_per_frame) | |
for frame_idx in range(0, total_frames, frames_to_skip): | |
video.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) | |
success, frame = video.read() | |
if not success: | |
break | |
_, buffer = cv2.imencode(".jpg", frame) | |
base64Frames.append(base64.b64encode(buffer).decode("utf-8")) | |
video.release() | |
return base64Frames, None | |
def process_video_with_gpt(video_input, user_prompt): | |
"""Process video with GPT-4 vision.""" | |
base64Frames, _ = process_video(video_input) | |
response = openai_client.chat.completions.create( | |
model=st.session_state["openai_model"], | |
messages=[ | |
{"role": "system", "content": "Analyze the video frames and provide a detailed description."}, | |
{"role": "user", "content": [ | |
{"type": "text", "text": user_prompt}, | |
*[{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{frame}"}} | |
for frame in base64Frames] | |
]} | |
] | |
) | |
return response.choices[0].message.content | |
def extract_urls(text): | |
try: | |
date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})') | |
abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)') | |
pdf_link_pattern = re.compile(r'\[β¬οΈ\]\((https://arxiv\.org/pdf/\d+\.\d+)\)') | |
title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]') | |
date_matches = date_pattern.findall(text) | |
abs_link_matches = abs_link_pattern.findall(text) | |
pdf_link_matches = pdf_link_pattern.findall(text) | |
title_matches = title_pattern.findall(text) | |
# markdown with the extracted fields | |
markdown_text = "" | |
for i in range(len(date_matches)): | |
date = date_matches[i] | |
title = title_matches[i] | |
abs_link = abs_link_matches[i][1] | |
pdf_link = pdf_link_matches[i] | |
markdown_text += f"**Date:** {date}\n\n" | |
markdown_text += f"**Title:** {title}\n\n" | |
markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n" | |
markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n" | |
markdown_text += "---\n\n" | |
return markdown_text | |
except: | |
st.write('.') | |
return '' | |
def search_arxiv(query): | |
st.write("Performing AI Lookup...") | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
result1 = client.predict( | |
prompt=query, | |
llm_model_picked="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
stream_outputs=True, | |
api_name="/ask_llm" | |
) | |
st.markdown("### Mixtral-8x7B-Instruct-v0.1 Result") | |
st.markdown(result1) | |
result2 = client.predict( | |
prompt=query, | |
llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2", | |
stream_outputs=True, | |
api_name="/ask_llm" | |
) | |
st.markdown("### Mistral-7B-Instruct-v0.2 Result") | |
st.markdown(result2) | |
combined_result = f"{result1}\n\n{result2}" | |
return combined_result | |
#return responseall | |
# Function to generate a filename based on prompt and time (because names matter π) | |
def generate_filename(prompt, file_type): | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
safe_prompt = re.sub(r'\W+', '_', prompt)[:90] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# Function to create and save a file (and avoid the black hole of lost data π³) | |
def create_file(filename, prompt, response): | |
with open(filename, 'w', encoding='utf-8') as file: | |
file.write(prompt + "\n\n" + response) | |
def perform_ai_lookup(query): | |
start_time = time.strftime("%Y-%m-%d %H:%M:%S") | |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern") | |
response1 = client.predict( | |
query, | |
20, | |
"Semantic Search", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
api_name="/update_with_rag_md" | |
) | |
Question = '### π ' + query + '\r\n' # Format for markdown display with links | |
References = response1[0] | |
ReferenceLinks = extract_urls(References) | |
RunSecondQuery = True | |
results='' | |
if RunSecondQuery: | |
# Search 2 - Retrieve the Summary with Papers Context and Original Query | |
response2 = client.predict( | |
query, | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
True, | |
api_name="/ask_llm" | |
) | |
if len(response2) > 10: | |
Answer = response2 | |
SpeechSynthesis(Answer) | |
# Restructure results to follow format of Question, Answer, References, ReferenceLinks | |
results = Question + '\r\n' + Answer + '\r\n' + References + '\r\n' + ReferenceLinks | |
st.markdown(results) | |
st.write('πRun of Multi-Agent System Paper Summary Spec is Complete') | |
end_time = time.strftime("%Y-%m-%d %H:%M:%S") | |
start_timestamp = time.mktime(time.strptime(start_time, "%Y-%m-%d %H:%M:%S")) | |
end_timestamp = time.mktime(time.strptime(end_time, "%Y-%m-%d %H:%M:%S")) | |
elapsed_seconds = end_timestamp - start_timestamp | |
st.write(f"Start time: {start_time}") | |
st.write(f"Finish time: {end_time}") | |
st.write(f"Elapsed time: {elapsed_seconds:.2f} seconds") | |
filename = generate_filename(query, "md") | |
create_file(filename, query, results) | |
return results | |
# Chat Processing Functions | |
def process_with_gpt(text_input): | |
"""Process text with GPT-4o.""" | |
if text_input: | |
st.session_state.messages.append({"role": "user", "content": text_input}) | |
with st.chat_message("user"): | |
st.markdown(text_input) | |
with st.chat_message("assistant"): | |
completion = openai_client.chat.completions.create( | |
model=st.session_state["openai_model"], | |
messages=[ | |
{"role": m["role"], "content": m["content"]} | |
for m in st.session_state.messages | |
], | |
stream=False | |
) | |
return_text = completion.choices[0].message.content | |
st.write("GPT-4o: " + return_text) | |
#filename = generate_filename(text_input, "md") | |
filename = generate_filename("GPT-4o: " + return_text, "md") | |
create_file(filename, text_input, return_text) | |
st.session_state.messages.append({"role": "assistant", "content": return_text}) | |
return return_text | |
def process_with_claude(text_input): | |
"""Process text with Claude.""" | |
if text_input: | |
with st.chat_message("user"): | |
st.markdown(text_input) | |
with st.chat_message("assistant"): | |
response = claude_client.messages.create( | |
model="claude-3-sonnet-20240229", | |
max_tokens=1000, | |
messages=[ | |
{"role": "user", "content": text_input} | |
] | |
) | |
response_text = response.content[0].text | |
st.write("Claude: " + response_text) | |
#filename = generate_filename(text_input, "md") | |
filename = generate_filename("Claude: " + response_text, "md") | |
create_file(filename, text_input, response_text) | |
st.session_state.chat_history.append({ | |
"user": text_input, | |
"claude": response_text | |
}) | |
return response_text | |
# File Management Functions | |
def load_file(file_name): | |
"""Load file content.""" | |
with open(file_name, "r", encoding='utf-8') as file: | |
content = file.read() | |
return content | |
def create_zip_of_files(files): | |
"""Create zip archive of files.""" | |
zip_name = "all_files.zip" | |
with zipfile.ZipFile(zip_name, 'w') as zipf: | |
for file in files: | |
zipf.write(file) | |
return zip_name | |
def get_media_html(media_path, media_type="video", width="100%"): | |
"""Generate HTML for media player.""" | |
media_data = base64.b64encode(open(media_path, 'rb').read()).decode() | |
if media_type == "video": | |
return f''' | |
<video width="{width}" controls autoplay muted loop> | |
<source src="data:video/mp4;base64,{media_data}" type="video/mp4"> | |
Your browser does not support the video tag. | |
</video> | |
''' | |
else: # audio | |
return f''' | |
<audio controls style="width: {width};"> | |
<source src="data:audio/mpeg;base64,{media_data}" type="audio/mpeg"> | |
Your browser does not support the audio element. | |
</audio> | |
''' | |
def create_media_gallery(): | |
"""Create the media gallery interface.""" | |
st.header("π¬ Media Gallery") | |
tabs = st.tabs(["πΌοΈ Images", "π΅ Audio", "π₯ Video"]) | |
with tabs[0]: | |
image_files = glob.glob("*.png") + glob.glob("*.jpg") | |
if image_files: | |
num_cols = st.slider("Number of columns", 1, 5, 3) | |
cols = st.columns(num_cols) | |
for idx, image_file in enumerate(image_files): | |
with cols[idx % num_cols]: | |
img = Image.open(image_file) | |
st.image(img, use_container_width=True) | |
# Add GPT vision analysis option | |
if st.button(f"Analyze {os.path.basename(image_file)}"): | |
analysis = process_image(image_file, | |
"Describe this image in detail and identify key elements.") | |
st.markdown(analysis) | |
with tabs[1]: | |
audio_files = glob.glob("*.mp3") + glob.glob("*.wav") | |
for audio_file in audio_files: | |
with st.expander(f"π΅ {os.path.basename(audio_file)}"): | |
st.markdown(get_media_html(audio_file, "audio"), unsafe_allow_html=True) | |
if st.button(f"Transcribe {os.path.basename(audio_file)}"): | |
with open(audio_file, "rb") as f: | |
transcription = process_audio(f) | |
st.write(transcription) | |
with tabs[2]: | |
video_files = glob.glob("*.mp4") | |
for video_file in video_files: | |
with st.expander(f"π₯ {os.path.basename(video_file)}"): | |
st.markdown(get_media_html(video_file, "video"), unsafe_allow_html=True) | |
if st.button(f"Analyze {os.path.basename(video_file)}"): | |
analysis = process_video_with_gpt(video_file, | |
"Describe what's happening in this video.") | |
st.markdown(analysis) | |
def display_file_manager(): | |
"""Display file management sidebar with guaranteed unique button keys.""" | |
st.sidebar.title("π File Management") | |
all_files = glob.glob("*.md") | |
all_files.sort(reverse=True) | |
if st.sidebar.button("π Delete All", key="delete_all_files_button"): | |
for file in all_files: | |
os.remove(file) | |
st.rerun() | |
if st.sidebar.button("β¬οΈ Download All", key="download_all_files_button"): | |
zip_file = create_zip_of_files(all_files) | |
st.sidebar.markdown(get_download_link(zip_file), unsafe_allow_html=True) | |
# Create unique keys using file attributes | |
for idx, file in enumerate(all_files): | |
# Get file stats for unique identification | |
file_stat = os.stat(file) | |
unique_id = f"{idx}_{file_stat.st_size}_{file_stat.st_mtime}" | |
col1, col2, col3, col4 = st.sidebar.columns([1,3,1,1]) | |
with col1: | |
if st.button("π", key=f"view_{unique_id}"): | |
st.session_state.current_file = file | |
st.session_state.file_content = load_file(file) | |
with col2: | |
st.markdown(get_download_link(file), unsafe_allow_html=True) | |
with col3: | |
if st.button("π", key=f"edit_{unique_id}"): | |
st.session_state.current_file = file | |
st.session_state.file_content = load_file(file) | |
with col4: | |
if st.button("π", key=f"delete_{unique_id}"): | |
os.remove(file) | |
st.rerun() | |
# Speech Recognition HTML Component | |
speech_recognition_html = """ | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>Continuous Speech Demo</title> | |
<style> | |
body { | |
font-family: sans-serif; | |
padding: 20px; | |
max-width: 800px; | |
margin: 0 auto; | |
} | |
button { | |
padding: 10px 20px; | |
margin: 10px 5px; | |
font-size: 16px; | |
} | |
#status { | |
margin: 10px 0; | |
padding: 10px; | |
background: #e8f5e9; | |
border-radius: 4px; | |
} | |
#output { | |
white-space: pre-wrap; | |
padding: 15px; | |
background: #f5f5f5; | |
border-radius: 4px; | |
margin: 10px 0; | |
min-height: 100px; | |
max-height: 400px; | |
overflow-y: auto; | |
} | |
.controls { | |
margin: 10px 0; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="controls"> | |
<button id="start">Start Listening</button> | |
<button id="stop" disabled>Stop Listening</button> | |
<button id="clear">Clear Text</button> | |
</div> | |
<div id="status">Ready</div> | |
<div id="output"></div> | |
<!-- Add the hidden input here --> | |
<input type="hidden" id="streamlit-data" value=""> | |
<script> | |
if (!('webkitSpeechRecognition' in window)) { | |
alert('Speech recognition not supported'); | |
} else { | |
const recognition = new webkitSpeechRecognition(); | |
const startButton = document.getElementById('start'); | |
const stopButton = document.getElementById('stop'); | |
const clearButton = document.getElementById('clear'); | |
const status = document.getElementById('status'); | |
const output = document.getElementById('output'); | |
let fullTranscript = ''; | |
let lastUpdateTime = Date.now(); | |
// Configure recognition | |
recognition.continuous = true; | |
recognition.interimResults = true; | |
// Function to start recognition | |
const startRecognition = () => { | |
try { | |
recognition.start(); | |
status.textContent = 'Listening...'; | |
startButton.disabled = true; | |
stopButton.disabled = false; | |
} catch (e) { | |
console.error(e); | |
status.textContent = 'Error: ' + e.message; | |
} | |
}; | |
// Auto-start on load | |
window.addEventListener('load', () => { | |
setTimeout(startRecognition, 1000); | |
}); | |
startButton.onclick = startRecognition; | |
stopButton.onclick = () => { | |
recognition.stop(); | |
status.textContent = 'Stopped'; | |
startButton.disabled = false; | |
stopButton.disabled = true; | |
}; | |
clearButton.onclick = () => { | |
fullTranscript = ''; | |
output.textContent = ''; | |
window.parent.postMessage({ | |
type: 'clear_transcript', | |
}, '*'); | |
}; | |
recognition.onresult = (event) => { | |
let interimTranscript = ''; | |
let finalTranscript = ''; | |
for (let i = event.resultIndex; i < event.results.length; i++) { | |
const transcript = event.results[i][0].transcript; | |
if (event.results[i].isFinal) { | |
finalTranscript += transcript + '\\n'; | |
} else { | |
interimTranscript += transcript; | |
} | |
} | |
if (finalTranscript || (Date.now() - lastUpdateTime > 5000)) { | |
if (finalTranscript) { | |
fullTranscript += finalTranscript; | |
// Update the hidden input value | |
document.getElementById('streamlit-data').value = fullTranscript; | |
} | |
lastUpdateTime = Date.now(); | |
} | |
output.textContent = fullTranscript + (interimTranscript ? '... ' + interimTranscript : ''); | |
output.scrollTop = output.scrollHeight; | |
document.getElementById('streamlit-data').value = fullTranscript; | |
}; | |
recognition.onend = () => { | |
if (!stopButton.disabled) { | |
try { | |
recognition.start(); | |
console.log('Restarted recognition'); | |
} catch (e) { | |
console.error('Failed to restart recognition:', e); | |
status.textContent = 'Error restarting: ' + e.message; | |
startButton.disabled = false; | |
stopButton.disabled = true; | |
} | |
} | |
}; | |
recognition.onerror = (event) => { | |
console.error('Recognition error:', event.error); | |
status.textContent = 'Error: ' + event.error; | |
if (event.error === 'not-allowed' || event.error === 'service-not-allowed') { | |
startButton.disabled = false; | |
stopButton.disabled = true; | |
} | |
}; | |
} | |
</script> | |
</body> | |
</html> | |
""" | |
# Helper Functions | |
def generate_filename(prompt, file_type): | |
central = pytz.timezone('US/Central') | |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") | |
replaced_prompt = re.sub(r'[<>:"/\\|?*\n]', ' ', prompt) | |
safe_prompt = re.sub(r'\s+', ' ', replaced_prompt).strip()[:230] | |
return f"{safe_date_time}_{safe_prompt}.{file_type}" | |
# File Management Functions | |
def load_file(file_name): | |
"""Load file content.""" | |
with open(file_name, "r", encoding='utf-8') as file: | |
content = file.read() | |
return content | |
def create_zip_of_files(files): | |
"""Create zip archive of files.""" | |
zip_name = "all_files.zip" | |
with zipfile.ZipFile(zip_name, 'w') as zipf: | |
for file in files: | |
zipf.write(file) | |
return zip_name | |
def get_download_link(file): | |
"""Create download link for file.""" | |
with open(file, "rb") as f: | |
contents = f.read() | |
b64 = base64.b64encode(contents).decode() | |
return f'<a href="data:file/txt;base64,{b64}" download="{os.path.basename(file)}">Download {os.path.basename(file)}π</a>' | |
def display_file_manager(): | |
"""Display file management sidebar.""" | |
st.sidebar.title("π File Management") | |
all_files = glob.glob("*.md") | |
all_files.sort(reverse=True) | |
if st.sidebar.button("π Delete All"): | |
for file in all_files: | |
os.remove(file) | |
st.rerun() | |
if st.sidebar.button("β¬οΈ Download All"): | |
zip_file = create_zip_of_files(all_files) | |
st.sidebar.markdown(get_download_link(zip_file), unsafe_allow_html=True) | |
for file in all_files: | |
col1, col2, col3, col4 = st.sidebar.columns([1,3,1,1]) | |
with col1: | |
if st.button("π", key="view_"+file): | |
st.session_state.current_file = file | |
st.session_state.file_content = load_file(file) | |
with col2: | |
st.markdown(get_download_link(file), unsafe_allow_html=True) | |
with col3: | |
if st.button("π", key="edit_"+file): | |
st.session_state.current_file = file | |
st.session_state.file_content = load_file(file) | |
with col4: | |
if st.button("π", key="delete_"+file): | |
os.remove(file) | |
st.rerun() | |
def create_media_gallery(): | |
"""Create the media gallery interface.""" | |
st.header("π¬ Media Gallery") | |
tabs = st.tabs(["πΌοΈ Images", "π΅ Audio", "π₯ Video"]) | |
with tabs[0]: | |
image_files = glob.glob("*.png") + glob.glob("*.jpg") | |
if image_files: | |
num_cols = st.slider("Number of columns", 1, 5, 3) | |
cols = st.columns(num_cols) | |
for idx, image_file in enumerate(image_files): | |
with cols[idx % num_cols]: | |
img = Image.open(image_file) | |
st.image(img, use_container_width=True) | |
# Add GPT vision analysis option | |
if st.button(f"Analyze {os.path.basename(image_file)}"): | |
analysis = process_image(image_file, | |
"Describe this image in detail and identify key elements.") | |
st.markdown(analysis) | |
with tabs[1]: | |
audio_files = glob.glob("*.mp3") + glob.glob("*.wav") | |
for audio_file in audio_files: | |
with st.expander(f"π΅ {os.path.basename(audio_file)}"): | |
st.markdown(get_media_html(audio_file, "audio"), unsafe_allow_html=True) | |
if st.button(f"Transcribe {os.path.basename(audio_file)}"): | |
with open(audio_file, "rb") as f: | |
transcription = process_audio(f) | |
st.write(transcription) | |
with tabs[2]: | |
video_files = glob.glob("*.mp4") | |
for video_file in video_files: | |
with st.expander(f"π₯ {os.path.basename(video_file)}"): | |
st.markdown(get_media_html(video_file, "video"), unsafe_allow_html=True) | |
if st.button(f"Analyze {os.path.basename(video_file)}"): | |
analysis = process_video_with_gpt(video_file, | |
"Describe what's happening in this video.") | |
st.markdown(analysis) | |
def get_media_html(media_path, media_type="video", width="100%"): | |
"""Generate HTML for media player.""" | |
media_data = base64.b64encode(open(media_path, 'rb').read()).decode() | |
if media_type == "video": | |
return f''' | |
<video width="{width}" controls autoplay muted loop> | |
<source src="data:video/mp4;base64,{media_data}" type="video/mp4"> | |
Your browser does not support the video tag. | |
</video> | |
''' | |
else: # audio | |
return f''' | |
<audio controls style="width: {width};"> | |
<source src="data:audio/mpeg;base64,{media_data}" type="audio/mpeg"> | |
Your browser does not support the audio element. | |
</audio> | |
''' | |
def set_transcript(text): | |
"""Set transcript in session state.""" | |
st.session_state.voice_transcript = text | |
def main(): | |
st.sidebar.markdown("### π²BikeAIπ Claude and GPT Multi-Agent Research AI") | |
tab_main = st.radio("Choose Action:", | |
["π€ Voice Input", "π¬ Chat", "πΈ Media Gallery", "π Search ArXiv", "π File Editor"], | |
horizontal=True) | |
if tab_main == "π€ Voice Input": | |
st.subheader("Voice Recognition") | |
try: | |
# Initialize speech component | |
current_transcript = integrate_speech_component() | |
# Show last update time | |
st.text(f"Last updated: {datetime.fromtimestamp(st.session_state.last_update).strftime('%H:%M:%S')}") | |
# Process buttons if we have a transcript | |
if current_transcript: | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
if st.button("Process with GPT"): | |
with st.spinner("Processing with GPT..."): | |
response = process_with_gpt(current_transcript) | |
st.markdown(response) | |
with col2: | |
if st.button("Process with Claude"): | |
with st.spinner("Processing with Claude..."): | |
response = process_with_claude(current_transcript) | |
st.markdown(response) | |
with col3: | |
if st.button("Search ArXiv"): | |
with st.spinner("Searching ArXiv..."): | |
results = perform_ai_lookup(current_transcript) | |
st.markdown(results) | |
except Exception as e: | |
st.error(f"Error in voice input: {str(e)}") | |
# Always show file manager in sidebar | |
display_file_manager() | |
if __name__ == "__main__": | |
main() |