Spaces:
Running
Running
import soundfile as sf | |
import json | |
import os | |
import uuid | |
import numpy as np | |
from flask import Flask, request, jsonify | |
import sys | |
import requests | |
import yt_dlp | |
from google import genai | |
import logging | |
import subprocess | |
from retrying import retry | |
import time | |
from google.genai import types | |
from PIL import Image | |
from io import BytesIO | |
import PIL.Image | |
import base64 | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s', | |
handlers=[ | |
logging.StreamHandler(sys.stdout) | |
] | |
) | |
logger = logging.getLogger(__name__) | |
client = genai.Client(api_key=os.getenv("API_KEY")) | |
gemini_model = "gemini-2.0-flash" | |
app = Flask(__name__) | |
output_dir = "/tmp" | |
def auth(): | |
if request.path != '/': | |
if not (request.headers.get('x-secret-token') and request.headers.get('x-secret-token') == os.getenv('GOOGLE_SECRET')): | |
logger.info("Invalid token") | |
return jsonify({"error":"Invalid token"}), 403 | |
def hello(): | |
return "Server is alive" | |
def proxy(): | |
clear_temp_dir() | |
if request.is_json: | |
body = request.get_json() | |
else: | |
body = request.form | |
AUDIO_EXTENSIONS = {'mp3', 'ogg', 'wav'} | |
IMAGE_EXTENSIONS = {'jpg','jpeg','png','gif'} | |
VIDEO_EXTENSIONS = {'mp4','mov','avi'} | |
def is_audio(filename): | |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in AUDIO_EXTENSIONS | |
try: | |
prompt = body.get('prompt') | |
logger.info(f"prompt: {prompt}") | |
file_type = body.get('file_type') | |
generate_images = body.get('generate_images') | |
if file_type == 'image': | |
files = request.files.getlist('image') | |
elif file_type == 'audio': | |
audio_file = request.files.get('audio') | |
files = [audio_file] if audio_file else [] | |
elif file_type == 'video': | |
video_file = request.files.get('video') | |
files = [video_file] if video_file else [] | |
else: | |
files = [] | |
uploaded_files = [] | |
saved_paths = [] | |
logger.info(f"files: {files}") | |
for file in files: | |
if file: | |
unique_filename = str(uuid.uuid4()) | |
file_extension = file.filename.rsplit('.', 1)[-1].lower() | |
saved_path = os.path.join(output_dir, f"{unique_filename}.{file_extension}") | |
logger.info(f"Saving file {saved_path}") | |
file.save(saved_path) | |
saved_paths.append(saved_path) | |
if file_type == 'audio': | |
saved_path = convert_to_mp3(input_path=saved_path, output_dir=output_dir) | |
if os.path.exists(saved_path): | |
logger.info(f"Uploading {saved_path}") | |
myfile = client.files.upload(file=saved_path) | |
uploaded_files.append(myfile) | |
else: | |
return jsonify({"error": "Error while processing file"}), 500 | |
if uploaded_files: | |
logger.info(f"Uploaded {len(uploaded_files)} file(s)") | |
response = generate_content_with_retry( | |
client, | |
model=gemini_model, | |
contents=[prompt] + uploaded_files, | |
generate_images=generate_images | |
) | |
for myfile in uploaded_files: | |
client.files.delete(name=myfile.name) | |
elif prompt is not None: | |
response = generate_content_with_retry(client,model=gemini_model,contents=prompt,generate_images=generate_images) | |
else: | |
return jsonify({"error":"No prompt provided"}),400 | |
for path in saved_paths: | |
if os.path.exists(path): | |
logger.info(f"Removing local file {path}") | |
os.remove(path) | |
clear_temp_dir() | |
if generate_images == 1 or generate_images == '1': | |
result = {'text': '', 'images': []} | |
for part in response.candidates[0].content.parts: | |
if part.text: | |
result['text'] += part.text | |
elif part.inline_data: | |
img = Image.open(BytesIO(part.inline_data.data)) | |
img_io = BytesIO() | |
img.save(img_io, 'PNG') | |
img_base64 = base64.b64encode(img_io.getvalue()).decode('utf-8') | |
result['images'].append({ | |
'data': f"data:image/png;base64,{img_base64}", 'format': 'png'}) | |
img.close() | |
img_io.close() | |
return jsonify({"status": "ok", "response": result}), 200 | |
else: | |
result = {'text': response.text} | |
return jsonify({"status": "ok", "response": result}), 200 | |
except Exception as e: | |
logger.error(str(e), exc_info=True) | |
return jsonify({"error": str(e)}), 500 | |
def notifier(): | |
try: | |
clear_temp_dir() | |
body = request.get_json() | |
logger.info(jsonify(body)) | |
prompt = body['prompt'] | |
url = body['url'] | |
unique_filename = f"audio_{uuid.uuid4()}" | |
for f in client.files.list(): | |
client.files.delete(name=f.name) | |
file_path = download_audio(url, os.path.join(output_dir, unique_filename)) | |
duration = get_auido_duration(file_path) | |
logger.info(f"file duration: {duration}") | |
segments = create_and_trim_segments(file_path, segment_duration=7200) # 2 часа | |
logger.info(f"segments: {segments}") | |
responses = [] | |
for idx, segment in enumerate(segments): | |
if idx >= 1: # Задержка 10 минут перед следующими сегментами | |
logger.info(f"Waiting 10 minutes before processing segment {idx+1}") | |
time.sleep(600) | |
segment_duration, _ = get_audio_duration(segment) | |
start_time_seconds = idx * 7200 | |
start_time = f"{int(start_time_seconds // 3600):02d}:{int((start_time_seconds % 3600) // 60):02d}:{int(start_time_seconds % 60):02d}" | |
logger.info(f"Processing segment {idx+1}, start: {start_time}, duration: {segment_duration}") | |
file_size = os.path.getsize(segment) | |
logger.info(f"Uploading segment {segment}, size: {file_size / (1024 * 1024):.2f} MB") | |
myfile = client.files.upload(file=segment) | |
logger.info(f"Uploaded segment: {myfile.name}, URI: {myfile.uri}") | |
if idx == 0: | |
print(f"Prompt: {prompt}") | |
full_prompt = f"Этот аудиофайл — сегмент стрима по GTA 5 RP, начиная с {start_time}." + prompt | |
try: | |
response = generate_content_with_retry(client,model=gemini_model,contents=[full_prompt, myfile]) | |
logger.info(f"Segment {idx+1}") | |
client.files.delete(name=myfile.name) | |
segment_response = f"Сегмент {start_time} ({segment_duration}):\n{response.text}\n" | |
responses.append(segment_response) | |
payload = {"summary":segment_response,"token": os.getenv("GOOGLE_SECRET")} | |
headers = {'Content-Type': 'application/json'} | |
requests.post(os.getenv("GOOGLE_SCRIPT_URL"),json=payload,headers=headers) | |
except Exception as e: | |
logger.error(f"Failed to process segment {idx+1}: {str(e)}", exc_info=True) | |
raise | |
finally: | |
if os.path.exists(segment): | |
logger.info(f"Removing segment: {segment}") | |
os.remove(segment) | |
combined_response = "\n\n".join(responses) if responses else "Нет данных для хард RP." | |
# myfile = client.files.upload(file=file_path) | |
# try: | |
# response = generate_content_with_retry(client,model="gemini-2.0-flash",contents=[prompt, myfile]) | |
# client.files.delete(name=myfile.name) | |
# except Exception as e: | |
# logger.error(f"Failed to generate content after retries: {str(e)}") | |
# raise | |
# response = client.models.generate_content(model="gemini-2.0-flash", contents=[prompt, myfile]) | |
# payload = {"summary":combined_response,"token": os.getenv("GOOGLE_SECRET")} | |
# headers = {'Content-Type': 'application/json'} | |
clear_temp_dir(); | |
# requests.post(os.getenv("GOOGLE_SCRIPT_URL"),json=payload,headers=headers) | |
return jsonify({"status":"ok", "response": response.text}), 200 | |
except Exception as e: | |
logger.error(str(e), exc_info=True) | |
return jsonify({"error": str(e)}), 500 | |
def download_audio(url,output_path): | |
ydl_opts = { | |
'format': 'bestaudio', | |
'outtmpl': f"{output_path}.%(ext)s", | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', | |
'preferredquality': '192' | |
}], | |
'postprocessor_args':{ | |
'FFmpegExtractAudio': ['-b:a', '192k'], | |
}, | |
'quiet': True, | |
} | |
logger.info("Starting download") | |
output_path_with_ext = f"{output_path}.mp3" | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
if not os.path.exists(output_path_with_ext): | |
raise FileNotFoundError(f"Expected output file {output_path_with_ext} not found") | |
logger.info(f"Download finished to {output_path_with_ext}") | |
return output_path_with_ext | |
def remove_temp_file(path): | |
if os.path.exists(path): | |
logger.info(f"Removing {path}") | |
os.remove(path) | |
def clear_temp_dir(): | |
try: | |
for file in os.listdir(output_dir): | |
path = os.path.join(output_dir,file) | |
if os.path.isfile(path): | |
try: | |
os.remove(path) | |
logger.info(f"Removed {path}") | |
except Exception as e: | |
logger.info(f"Error while deleting file {e}") | |
except Exception as e: | |
logger.info(f"An error occured: {e}") | |
def get_auido_duration(file): | |
ffprobe_cmd = [ | |
'ffprobe', '-v', 'error', '-show_entries', 'format=duration', | |
'-of', 'default=noprint_wrappers=1:nokey=1', file | |
] | |
duration_seconds = float(subprocess.check_output(ffprobe_cmd, text=True).strip()) | |
duration_formatted = f"{int(duration_seconds // 3600):02}:{int((duration_seconds % 3600) // 60):02}:{int(duration_seconds % 60):02}" | |
return duration_formatted | |
def is_503_error(exception): | |
return isinstance(exception, Exception) and "503" in str(exception) | |
def generate_content_with_retry(client, model, contents, generate_images=0): | |
if generate_images == 1 or generate_images == '1': | |
gemini_model = "gemini-2.0-flash-preview-image-generation" | |
return client.models.generate_content(model=gemini_model, | |
contents = contents, | |
config=types.GenerateContentConfig(response_modalities=['TEXT','IMAGE']) | |
) | |
else: | |
gemini_model = "gemini-2.0-flash" | |
return client.models.generate_content(model=model, contents=contents) | |
def get_audio_duration(file_path): | |
try: | |
cmd = [ | |
'ffprobe', '-v', 'error', '-show_entries', 'format=duration', | |
'-of', 'default=noprint_wrappers=1:nokey=1', file_path | |
] | |
duration_seconds = float(subprocess.check_output(cmd, text=True).strip()) | |
duration_formatted = f"{int(duration_seconds // 3600):02d}:{int((duration_seconds % 3600) // 60):02d}:{int(duration_seconds % 60):02d}" | |
return duration_formatted, duration_seconds | |
except subprocess.CalledProcessError as e: | |
logger.error(f"Error getting duration: {e.stderr}",exc_info=True) | |
raise | |
def create_and_trim_segments(file_path, segment_duration=7200): | |
segments = [] | |
try: | |
output_dir = os.path.dirname(file_path) | |
base_name = os.path.splitext(os.path.basename(file_path))[0] | |
segment_index = 0 | |
while os.path.exists(file_path): | |
try: | |
duration_formatted, remaining_seconds = get_audio_duration(file_path) | |
except subprocess.CalledProcessError as e: | |
logger.warning(f"Cannot get duration: {e.stderr}. File may be corrupted - deleting") | |
os.remove(file_path) | |
break | |
# Определяем реальную длительность для текущего сегмента | |
current_segment_duration = min(segment_duration, remaining_seconds) | |
if current_segment_duration < 1: # Если меньше 1 секунды - выходим | |
logger.info(f"Remaining duration too short ({remaining_seconds:.2f}s), deleting source file") | |
os.remove(file_path) | |
break | |
segment_path = os.path.join(output_dir, f"{base_name}_segment_{segment_index:03d}.mp3") | |
temp_file = os.path.join(output_dir, f"{base_name}_temp.mp3") | |
# Создаём сегмент с реальной длительностью | |
cmd_segment = [ | |
'ffmpeg', '-y', '-i', file_path, | |
'-t', str(current_segment_duration), | |
'-c', 'copy', segment_path | |
] | |
try: | |
subprocess.run(cmd_segment, check=True, capture_output=True, text=True) | |
logger.info(f"Created segment: {segment_path} ({current_segment_duration:.2f}s)") | |
except subprocess.CalledProcessError as e: | |
logger.error(f"Error creating segment: {e.stderr}",exc_info=True) | |
if os.path.exists(segment_path): | |
os.remove(segment_path) | |
raise | |
if not os.path.exists(segment_path): | |
raise RuntimeError(f"Segment {segment_path} was not created") | |
segments.append(segment_path) | |
# Если это был последний сегмент - удаляем исходный файл и выходим | |
if remaining_seconds <= segment_duration: | |
logger.info("Last segment created, deleting source file") | |
os.remove(file_path) | |
break | |
# Обрезаем исходный файл только если осталось больше segment_duration | |
cmd_trim = [ | |
'ffmpeg', '-y', | |
'-ss', str(current_segment_duration), | |
'-i', file_path, | |
'-c', 'copy', temp_file | |
] | |
try: | |
subprocess.run(cmd_trim, check=True, capture_output=True, text=True) | |
# Проверяем результат обрезки | |
if os.path.exists(temp_file): | |
os.remove(file_path) | |
os.rename(temp_file, file_path) | |
# Проверяем, что новый файл валиден | |
try: | |
_, new_duration = get_audio_duration(file_path) | |
logger.info(f"Trimmed original file, new duration: {new_duration:.2f}s") | |
except: | |
logger.error("Trimmed file is invalid") | |
raise | |
else: | |
raise RuntimeError("Temp file was not created") | |
except subprocess.CalledProcessError as e: | |
if os.path.exists(temp_file): | |
os.remove(temp_file) | |
logger.error(f"Error trimming file: {e.stderr}") | |
raise | |
segment_index += 1 | |
return segments | |
except Exception as e: | |
logger.error(f"Fatal error: {str(e)}",exc_info=True) | |
# Очистка временных файлов при ошибке | |
if 'temp_file' in locals() and os.path.exists(temp_file): | |
os.remove(temp_file) | |
raise | |
def convert_to_mp3(input_path: str, output_dir: str = None, bitrate: str = '192k') -> str: | |
# Проверяем существование исходного файла | |
if not os.path.exists(input_path): | |
raise FileNotFoundError(f"Input file not found: {input_path}") | |
# Создаем директорию для выходного файла, если нужно | |
if output_dir: | |
os.makedirs(output_dir, exist_ok=True) | |
def is_mp3(filepath): | |
try: | |
cmd = ['ffprobe', '-v', 'error', '-select_streams', 'a:0', | |
'-show_entries', 'stream=codec_name', '-of', | |
'default=noprint_wrappers=1:nokey=1', filepath] | |
result = subprocess.run(cmd, capture_output=True, text=True) | |
return 'mp3' in result.stdout.lower() | |
except: | |
return filepath.lower().endswith('.mp3') | |
if is_mp3(input_path): | |
logger.info(f"File {input_path} is already MP3, skipping conversion") | |
return input_path | |
# Формируем путь для выходного файла | |
original_name = os.path.splitext(os.path.basename(input_path))[0] | |
original_name = os.path.splitext(os.path.basename(input_path))[0] | |
output_path = os.path.join( | |
output_dir or os.path.dirname(input_path), | |
f"{original_name}_converted.mp3" # Добавляем суффикс | |
) | |
# Команда для конвертации | |
command = [ | |
'ffmpeg', | |
'-i', input_path, | |
'-codec:a', 'libmp3lame', | |
'-b:a', '192k', # Постоянный битрейт | |
'-ar', '44100', # Частота дискретизации | |
'-ac', '1', # Моно (улучшает распознавание речи) | |
'-af', 'highpass=f=200,lowpass=f=3000', # Фильтр речевых частот | |
'-metadata:s:a:0', 'language=rus', # Явное указание языка | |
'-fflags', '+bitexact', # Стандартизация формата | |
'-map_metadata', '0', # Сохранение метаданных | |
'-y', | |
output_path | |
] | |
logger.info(f"Converting {input_path} to MP3...") | |
try: | |
# Запускаем процесс конвертации | |
subprocess.run(command,check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,text=True) | |
logger.info(f"Successfully converted to {output_path}") | |
check_cmd = ['ffprobe', '-v', 'error', '-select_streams', 'a', | |
'-show_entries', 'format=bit_rate', '-of', | |
'default=noprint_wrappers=1:nokey=1', output_path] | |
result = subprocess.run(check_cmd, capture_output=True, text=True) | |
actual_bitrate = int(result.stdout.strip()) // 1000 | |
if actual_bitrate != int(bitrate[:-1]): | |
logger.warning(f"Requested {bitrate}, but got {actual_bitrate}k") | |
return output_path | |
except subprocess.CalledProcessError as e: | |
error_msg = f"FFmpeg conversion failed: {e.stderr}" | |
logger.error(error_msg) | |
if os.path.exists(output_path): | |
os.remove(output_path) | |
raise RuntimeError(error_msg) from e | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860, debug=True) |