gemini-proxy / main.py
dtfyu3's picture
Init
3c2a312
import soundfile as sf
import json
import os
import uuid
import numpy as np
from flask import Flask, request, jsonify
import sys
import requests
import yt_dlp
from google import genai
import logging
import subprocess
from retrying import retry
import time
from google.genai import types
from PIL import Image
from io import BytesIO
import PIL.Image
import base64
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout)
]
)
logger = logging.getLogger(__name__)
client = genai.Client(api_key=os.getenv("API_KEY"))
gemini_model = "gemini-2.0-flash"
app = Flask(__name__)
output_dir = "/tmp"
@app.before_request
def auth():
if request.path != '/':
if not (request.headers.get('x-secret-token') and request.headers.get('x-secret-token') == os.getenv('GOOGLE_SECRET')):
logger.info("Invalid token")
return jsonify({"error":"Invalid token"}), 403
@app.route('/', methods=['GET'])
def hello():
return "Server is alive"
@app.route('/proxy', methods=['POST'])
def proxy():
clear_temp_dir()
if request.is_json:
body = request.get_json()
else:
body = request.form
AUDIO_EXTENSIONS = {'mp3', 'ogg', 'wav'}
IMAGE_EXTENSIONS = {'jpg','jpeg','png','gif'}
VIDEO_EXTENSIONS = {'mp4','mov','avi'}
def is_audio(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in AUDIO_EXTENSIONS
try:
prompt = body.get('prompt')
logger.info(f"prompt: {prompt}")
file_type = body.get('file_type')
generate_images = body.get('generate_images')
if file_type == 'image':
files = request.files.getlist('image')
elif file_type == 'audio':
audio_file = request.files.get('audio')
files = [audio_file] if audio_file else []
elif file_type == 'video':
video_file = request.files.get('video')
files = [video_file] if video_file else []
else:
files = []
uploaded_files = []
saved_paths = []
logger.info(f"files: {files}")
for file in files:
if file:
unique_filename = str(uuid.uuid4())
file_extension = file.filename.rsplit('.', 1)[-1].lower()
saved_path = os.path.join(output_dir, f"{unique_filename}.{file_extension}")
logger.info(f"Saving file {saved_path}")
file.save(saved_path)
saved_paths.append(saved_path)
if file_type == 'audio':
saved_path = convert_to_mp3(input_path=saved_path, output_dir=output_dir)
if os.path.exists(saved_path):
logger.info(f"Uploading {saved_path}")
myfile = client.files.upload(file=saved_path)
uploaded_files.append(myfile)
else:
return jsonify({"error": "Error while processing file"}), 500
if uploaded_files:
logger.info(f"Uploaded {len(uploaded_files)} file(s)")
response = generate_content_with_retry(
client,
model=gemini_model,
contents=[prompt] + uploaded_files,
generate_images=generate_images
)
for myfile in uploaded_files:
client.files.delete(name=myfile.name)
elif prompt is not None:
response = generate_content_with_retry(client,model=gemini_model,contents=prompt,generate_images=generate_images)
else:
return jsonify({"error":"No prompt provided"}),400
for path in saved_paths:
if os.path.exists(path):
logger.info(f"Removing local file {path}")
os.remove(path)
clear_temp_dir()
if generate_images == 1 or generate_images == '1':
result = {'text': '', 'images': []}
for part in response.candidates[0].content.parts:
if part.text:
result['text'] += part.text
elif part.inline_data:
img = Image.open(BytesIO(part.inline_data.data))
img_io = BytesIO()
img.save(img_io, 'PNG')
img_base64 = base64.b64encode(img_io.getvalue()).decode('utf-8')
result['images'].append({
'data': f"data:image/png;base64,{img_base64}", 'format': 'png'})
img.close()
img_io.close()
return jsonify({"status": "ok", "response": result}), 200
else:
result = {'text': response.text}
return jsonify({"status": "ok", "response": result}), 200
except Exception as e:
logger.error(str(e), exc_info=True)
return jsonify({"error": str(e)}), 500
@app.route('/notifier', methods=['POST'])
def notifier():
try:
clear_temp_dir()
body = request.get_json()
logger.info(jsonify(body))
prompt = body['prompt']
url = body['url']
unique_filename = f"audio_{uuid.uuid4()}"
for f in client.files.list():
client.files.delete(name=f.name)
file_path = download_audio(url, os.path.join(output_dir, unique_filename))
duration = get_auido_duration(file_path)
logger.info(f"file duration: {duration}")
segments = create_and_trim_segments(file_path, segment_duration=7200) # 2 часа
logger.info(f"segments: {segments}")
responses = []
for idx, segment in enumerate(segments):
if idx >= 1: # Задержка 10 минут перед следующими сегментами
logger.info(f"Waiting 10 minutes before processing segment {idx+1}")
time.sleep(600)
segment_duration, _ = get_audio_duration(segment)
start_time_seconds = idx * 7200
start_time = f"{int(start_time_seconds // 3600):02d}:{int((start_time_seconds % 3600) // 60):02d}:{int(start_time_seconds % 60):02d}"
logger.info(f"Processing segment {idx+1}, start: {start_time}, duration: {segment_duration}")
file_size = os.path.getsize(segment)
logger.info(f"Uploading segment {segment}, size: {file_size / (1024 * 1024):.2f} MB")
myfile = client.files.upload(file=segment)
logger.info(f"Uploaded segment: {myfile.name}, URI: {myfile.uri}")
if idx == 0:
print(f"Prompt: {prompt}")
full_prompt = f"Этот аудиофайл — сегмент стрима по GTA 5 RP, начиная с {start_time}." + prompt
try:
response = generate_content_with_retry(client,model=gemini_model,contents=[full_prompt, myfile])
logger.info(f"Segment {idx+1}")
client.files.delete(name=myfile.name)
segment_response = f"Сегмент {start_time} ({segment_duration}):\n{response.text}\n"
responses.append(segment_response)
payload = {"summary":segment_response,"token": os.getenv("GOOGLE_SECRET")}
headers = {'Content-Type': 'application/json'}
requests.post(os.getenv("GOOGLE_SCRIPT_URL"),json=payload,headers=headers)
except Exception as e:
logger.error(f"Failed to process segment {idx+1}: {str(e)}", exc_info=True)
raise
finally:
if os.path.exists(segment):
logger.info(f"Removing segment: {segment}")
os.remove(segment)
combined_response = "\n\n".join(responses) if responses else "Нет данных для хард RP."
# myfile = client.files.upload(file=file_path)
# try:
# response = generate_content_with_retry(client,model="gemini-2.0-flash",contents=[prompt, myfile])
# client.files.delete(name=myfile.name)
# except Exception as e:
# logger.error(f"Failed to generate content after retries: {str(e)}")
# raise
# response = client.models.generate_content(model="gemini-2.0-flash", contents=[prompt, myfile])
# payload = {"summary":combined_response,"token": os.getenv("GOOGLE_SECRET")}
# headers = {'Content-Type': 'application/json'}
clear_temp_dir();
# requests.post(os.getenv("GOOGLE_SCRIPT_URL"),json=payload,headers=headers)
return jsonify({"status":"ok", "response": response.text}), 200
except Exception as e:
logger.error(str(e), exc_info=True)
return jsonify({"error": str(e)}), 500
def download_audio(url,output_path):
ydl_opts = {
'format': 'bestaudio',
'outtmpl': f"{output_path}.%(ext)s",
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192'
}],
'postprocessor_args':{
'FFmpegExtractAudio': ['-b:a', '192k'],
},
'quiet': True,
}
logger.info("Starting download")
output_path_with_ext = f"{output_path}.mp3"
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
if not os.path.exists(output_path_with_ext):
raise FileNotFoundError(f"Expected output file {output_path_with_ext} not found")
logger.info(f"Download finished to {output_path_with_ext}")
return output_path_with_ext
def remove_temp_file(path):
if os.path.exists(path):
logger.info(f"Removing {path}")
os.remove(path)
def clear_temp_dir():
try:
for file in os.listdir(output_dir):
path = os.path.join(output_dir,file)
if os.path.isfile(path):
try:
os.remove(path)
logger.info(f"Removed {path}")
except Exception as e:
logger.info(f"Error while deleting file {e}")
except Exception as e:
logger.info(f"An error occured: {e}")
def get_auido_duration(file):
ffprobe_cmd = [
'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', file
]
duration_seconds = float(subprocess.check_output(ffprobe_cmd, text=True).strip())
duration_formatted = f"{int(duration_seconds // 3600):02}:{int((duration_seconds % 3600) // 60):02}:{int(duration_seconds % 60):02}"
return duration_formatted
def is_503_error(exception):
return isinstance(exception, Exception) and "503" in str(exception)
@retry(
stop_max_attempt_number=3,
wait_fixed=600000, # Задержка 10 минут (600 секунд)
retry_on_exception=is_503_error
)
def generate_content_with_retry(client, model, contents, generate_images=0):
if generate_images == 1 or generate_images == '1':
gemini_model = "gemini-2.0-flash-preview-image-generation"
return client.models.generate_content(model=gemini_model,
contents = contents,
config=types.GenerateContentConfig(response_modalities=['TEXT','IMAGE'])
)
else:
gemini_model = "gemini-2.0-flash"
return client.models.generate_content(model=model, contents=contents)
def get_audio_duration(file_path):
try:
cmd = [
'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', file_path
]
duration_seconds = float(subprocess.check_output(cmd, text=True).strip())
duration_formatted = f"{int(duration_seconds // 3600):02d}:{int((duration_seconds % 3600) // 60):02d}:{int(duration_seconds % 60):02d}"
return duration_formatted, duration_seconds
except subprocess.CalledProcessError as e:
logger.error(f"Error getting duration: {e.stderr}",exc_info=True)
raise
def create_and_trim_segments(file_path, segment_duration=7200):
segments = []
try:
output_dir = os.path.dirname(file_path)
base_name = os.path.splitext(os.path.basename(file_path))[0]
segment_index = 0
while os.path.exists(file_path):
try:
duration_formatted, remaining_seconds = get_audio_duration(file_path)
except subprocess.CalledProcessError as e:
logger.warning(f"Cannot get duration: {e.stderr}. File may be corrupted - deleting")
os.remove(file_path)
break
# Определяем реальную длительность для текущего сегмента
current_segment_duration = min(segment_duration, remaining_seconds)
if current_segment_duration < 1: # Если меньше 1 секунды - выходим
logger.info(f"Remaining duration too short ({remaining_seconds:.2f}s), deleting source file")
os.remove(file_path)
break
segment_path = os.path.join(output_dir, f"{base_name}_segment_{segment_index:03d}.mp3")
temp_file = os.path.join(output_dir, f"{base_name}_temp.mp3")
# Создаём сегмент с реальной длительностью
cmd_segment = [
'ffmpeg', '-y', '-i', file_path,
'-t', str(current_segment_duration),
'-c', 'copy', segment_path
]
try:
subprocess.run(cmd_segment, check=True, capture_output=True, text=True)
logger.info(f"Created segment: {segment_path} ({current_segment_duration:.2f}s)")
except subprocess.CalledProcessError as e:
logger.error(f"Error creating segment: {e.stderr}",exc_info=True)
if os.path.exists(segment_path):
os.remove(segment_path)
raise
if not os.path.exists(segment_path):
raise RuntimeError(f"Segment {segment_path} was not created")
segments.append(segment_path)
# Если это был последний сегмент - удаляем исходный файл и выходим
if remaining_seconds <= segment_duration:
logger.info("Last segment created, deleting source file")
os.remove(file_path)
break
# Обрезаем исходный файл только если осталось больше segment_duration
cmd_trim = [
'ffmpeg', '-y',
'-ss', str(current_segment_duration),
'-i', file_path,
'-c', 'copy', temp_file
]
try:
subprocess.run(cmd_trim, check=True, capture_output=True, text=True)
# Проверяем результат обрезки
if os.path.exists(temp_file):
os.remove(file_path)
os.rename(temp_file, file_path)
# Проверяем, что новый файл валиден
try:
_, new_duration = get_audio_duration(file_path)
logger.info(f"Trimmed original file, new duration: {new_duration:.2f}s")
except:
logger.error("Trimmed file is invalid")
raise
else:
raise RuntimeError("Temp file was not created")
except subprocess.CalledProcessError as e:
if os.path.exists(temp_file):
os.remove(temp_file)
logger.error(f"Error trimming file: {e.stderr}")
raise
segment_index += 1
return segments
except Exception as e:
logger.error(f"Fatal error: {str(e)}",exc_info=True)
# Очистка временных файлов при ошибке
if 'temp_file' in locals() and os.path.exists(temp_file):
os.remove(temp_file)
raise
def convert_to_mp3(input_path: str, output_dir: str = None, bitrate: str = '192k') -> str:
# Проверяем существование исходного файла
if not os.path.exists(input_path):
raise FileNotFoundError(f"Input file not found: {input_path}")
# Создаем директорию для выходного файла, если нужно
if output_dir:
os.makedirs(output_dir, exist_ok=True)
def is_mp3(filepath):
try:
cmd = ['ffprobe', '-v', 'error', '-select_streams', 'a:0',
'-show_entries', 'stream=codec_name', '-of',
'default=noprint_wrappers=1:nokey=1', filepath]
result = subprocess.run(cmd, capture_output=True, text=True)
return 'mp3' in result.stdout.lower()
except:
return filepath.lower().endswith('.mp3')
if is_mp3(input_path):
logger.info(f"File {input_path} is already MP3, skipping conversion")
return input_path
# Формируем путь для выходного файла
original_name = os.path.splitext(os.path.basename(input_path))[0]
original_name = os.path.splitext(os.path.basename(input_path))[0]
output_path = os.path.join(
output_dir or os.path.dirname(input_path),
f"{original_name}_converted.mp3" # Добавляем суффикс
)
# Команда для конвертации
command = [
'ffmpeg',
'-i', input_path,
'-codec:a', 'libmp3lame',
'-b:a', '192k', # Постоянный битрейт
'-ar', '44100', # Частота дискретизации
'-ac', '1', # Моно (улучшает распознавание речи)
'-af', 'highpass=f=200,lowpass=f=3000', # Фильтр речевых частот
'-metadata:s:a:0', 'language=rus', # Явное указание языка
'-fflags', '+bitexact', # Стандартизация формата
'-map_metadata', '0', # Сохранение метаданных
'-y',
output_path
]
logger.info(f"Converting {input_path} to MP3...")
try:
# Запускаем процесс конвертации
subprocess.run(command,check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,text=True)
logger.info(f"Successfully converted to {output_path}")
check_cmd = ['ffprobe', '-v', 'error', '-select_streams', 'a',
'-show_entries', 'format=bit_rate', '-of',
'default=noprint_wrappers=1:nokey=1', output_path]
result = subprocess.run(check_cmd, capture_output=True, text=True)
actual_bitrate = int(result.stdout.strip()) // 1000
if actual_bitrate != int(bitrate[:-1]):
logger.warning(f"Requested {bitrate}, but got {actual_bitrate}k")
return output_path
except subprocess.CalledProcessError as e:
error_msg = f"FFmpeg conversion failed: {e.stderr}"
logger.error(error_msg)
if os.path.exists(output_path):
os.remove(output_path)
raise RuntimeError(error_msg) from e
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=True)