Spaces:

dtfyu3
/

gemini-proxy

Running

App Files Files Community

gemini-proxy / main.py

dtfyu3

Init

3c2a312 14 days ago

raw

history blame contribute delete

19.5 kB

	import soundfile as sf
	import json
	import os
	import uuid
	import numpy as np
	from flask import Flask, request, jsonify
	import sys
	import requests
	import yt_dlp
	from google import genai
	import logging
	import subprocess
	from retrying import retry
	import time
	from google.genai import types
	from PIL import Image
	from io import BytesIO
	import PIL.Image
	import base64

	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s',
	handlers=[
	logging.StreamHandler(sys.stdout)
	]
	)
	logger = logging.getLogger(__name__)

	client = genai.Client(api_key=os.getenv("API_KEY"))
	gemini_model = "gemini-2.0-flash"
	app = Flask(__name__)
	output_dir = "/tmp"

	@app.before_request
	def auth():
	if request.path != '/':
	if not (request.headers.get('x-secret-token') and request.headers.get('x-secret-token') == os.getenv('GOOGLE_SECRET')):
	logger.info("Invalid token")
	return jsonify({"error":"Invalid token"}), 403

	@app.route('/', methods=['GET'])
	def hello():
	return "Server is alive"

	@app.route('/proxy', methods=['POST'])
	def proxy():
	clear_temp_dir()
	if request.is_json:
	body = request.get_json()
	else:
	body = request.form
	AUDIO_EXTENSIONS = {'mp3', 'ogg', 'wav'}
	IMAGE_EXTENSIONS = {'jpg','jpeg','png','gif'}
	VIDEO_EXTENSIONS = {'mp4','mov','avi'}
	def is_audio(filename):
	return '.' in filename and filename.rsplit('.', 1)[1].lower() in AUDIO_EXTENSIONS
	try:
	prompt = body.get('prompt')
	logger.info(f"prompt: {prompt}")
	file_type = body.get('file_type')
	generate_images = body.get('generate_images')
	if file_type == 'image':
	files = request.files.getlist('image')
	elif file_type == 'audio':
	audio_file = request.files.get('audio')
	files = [audio_file] if audio_file else []
	elif file_type == 'video':
	video_file = request.files.get('video')
	files = [video_file] if video_file else []
	else:
	files = []
	uploaded_files = []
	saved_paths = []
	logger.info(f"files: {files}")
	for file in files:
	if file:
	unique_filename = str(uuid.uuid4())
	file_extension = file.filename.rsplit('.', 1)[-1].lower()
	saved_path = os.path.join(output_dir, f"{unique_filename}.{file_extension}")
	logger.info(f"Saving file {saved_path}")
	file.save(saved_path)
	saved_paths.append(saved_path)
	if file_type == 'audio':
	saved_path = convert_to_mp3(input_path=saved_path, output_dir=output_dir)
	if os.path.exists(saved_path):
	logger.info(f"Uploading {saved_path}")
	myfile = client.files.upload(file=saved_path)
	uploaded_files.append(myfile)
	else:
	return jsonify({"error": "Error while processing file"}), 500
	if uploaded_files:
	logger.info(f"Uploaded {len(uploaded_files)} file(s)")
	response = generate_content_with_retry(
	client,
	model=gemini_model,
	contents=[prompt] + uploaded_files,
	generate_images=generate_images
	)
	for myfile in uploaded_files:
	client.files.delete(name=myfile.name)
	elif prompt is not None:
	response = generate_content_with_retry(client,model=gemini_model,contents=prompt,generate_images=generate_images)
	else:
	return jsonify({"error":"No prompt provided"}),400
	for path in saved_paths:
	if os.path.exists(path):
	logger.info(f"Removing local file {path}")
	os.remove(path)
	clear_temp_dir()
	if generate_images == 1 or generate_images == '1':
	result = {'text': '', 'images': []}
	for part in response.candidates[0].content.parts:
	if part.text:
	result['text'] += part.text
	elif part.inline_data:
	img = Image.open(BytesIO(part.inline_data.data))
	img_io = BytesIO()
	img.save(img_io, 'PNG')
	img_base64 = base64.b64encode(img_io.getvalue()).decode('utf-8')
	result['images'].append({
	'data': f"data:image/png;base64,{img_base64}", 'format': 'png'})
	img.close()
	img_io.close()
	return jsonify({"status": "ok", "response": result}), 200
	else:
	result = {'text': response.text}
	return jsonify({"status": "ok", "response": result}), 200
	except Exception as e:
	logger.error(str(e), exc_info=True)
	return jsonify({"error": str(e)}), 500


	@app.route('/notifier', methods=['POST'])
	def notifier():
	try:
	clear_temp_dir()
	body = request.get_json()
	logger.info(jsonify(body))
	prompt = body['prompt']
	url = body['url']
	unique_filename = f"audio_{uuid.uuid4()}"
	for f in client.files.list():
	client.files.delete(name=f.name)
	file_path = download_audio(url, os.path.join(output_dir, unique_filename))
	duration = get_auido_duration(file_path)
	logger.info(f"file duration: {duration}")
	segments = create_and_trim_segments(file_path, segment_duration=7200) # 2 часа
	logger.info(f"segments: {segments}")
	responses = []
	for idx, segment in enumerate(segments):
	if idx >= 1: # Задержка 10 минут перед следующими сегментами
	logger.info(f"Waiting 10 minutes before processing segment {idx+1}")
	time.sleep(600)
	segment_duration, _ = get_audio_duration(segment)
	start_time_seconds = idx * 7200
	start_time = f"{int(start_time_seconds // 3600):02d}:{int((start_time_seconds % 3600) // 60):02d}:{int(start_time_seconds % 60):02d}"
	logger.info(f"Processing segment {idx+1}, start: {start_time}, duration: {segment_duration}")
	file_size = os.path.getsize(segment)
	logger.info(f"Uploading segment {segment}, size: {file_size / (1024 * 1024):.2f} MB")
	myfile = client.files.upload(file=segment)
	logger.info(f"Uploaded segment: {myfile.name}, URI: {myfile.uri}")
	if idx == 0:
	print(f"Prompt: {prompt}")
	full_prompt = f"Этот аудиофайл — сегмент стрима по GTA 5 RP, начиная с {start_time}." + prompt
	try:
	response = generate_content_with_retry(client,model=gemini_model,contents=[full_prompt, myfile])
	logger.info(f"Segment {idx+1}")
	client.files.delete(name=myfile.name)
	segment_response = f"Сегмент {start_time} ({segment_duration}):\n{response.text}\n"
	responses.append(segment_response)
	payload = {"summary":segment_response,"token": os.getenv("GOOGLE_SECRET")}
	headers = {'Content-Type': 'application/json'}
	requests.post(os.getenv("GOOGLE_SCRIPT_URL"),json=payload,headers=headers)
	except Exception as e:
	logger.error(f"Failed to process segment {idx+1}: {str(e)}", exc_info=True)
	raise
	finally:
	if os.path.exists(segment):
	logger.info(f"Removing segment: {segment}")
	os.remove(segment)
	combined_response = "\n\n".join(responses) if responses else "Нет данных для хард RP."
	# myfile = client.files.upload(file=file_path)
	# try:
	# response = generate_content_with_retry(client,model="gemini-2.0-flash",contents=[prompt, myfile])
	# client.files.delete(name=myfile.name)
	# except Exception as e:
	# logger.error(f"Failed to generate content after retries: {str(e)}")
	# raise
	# response = client.models.generate_content(model="gemini-2.0-flash", contents=[prompt, myfile])
	# payload = {"summary":combined_response,"token": os.getenv("GOOGLE_SECRET")}
	# headers = {'Content-Type': 'application/json'}
	clear_temp_dir();
	# requests.post(os.getenv("GOOGLE_SCRIPT_URL"),json=payload,headers=headers)
	return jsonify({"status":"ok", "response": response.text}), 200
	except Exception as e:
	logger.error(str(e), exc_info=True)
	return jsonify({"error": str(e)}), 500

	def download_audio(url,output_path):
	ydl_opts = {
	'format': 'bestaudio',
	'outtmpl': f"{output_path}.%(ext)s",
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192'
	}],
	'postprocessor_args':{
	'FFmpegExtractAudio': ['-b:a', '192k'],
	},
	'quiet': True,
	}
	logger.info("Starting download")
	output_path_with_ext = f"{output_path}.mp3"
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])

	if not os.path.exists(output_path_with_ext):
	raise FileNotFoundError(f"Expected output file {output_path_with_ext} not found")
	logger.info(f"Download finished to {output_path_with_ext}")
	return output_path_with_ext
	def remove_temp_file(path):
	if os.path.exists(path):
	logger.info(f"Removing {path}")
	os.remove(path)

	def clear_temp_dir():
	try:
	for file in os.listdir(output_dir):
	path = os.path.join(output_dir,file)
	if os.path.isfile(path):
	try:
	os.remove(path)
	logger.info(f"Removed {path}")
	except Exception as e:
	logger.info(f"Error while deleting file {e}")
	except Exception as e:
	logger.info(f"An error occured: {e}")

	def get_auido_duration(file):
	ffprobe_cmd = [
	'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
	'-of', 'default=noprint_wrappers=1:nokey=1', file
	]
	duration_seconds = float(subprocess.check_output(ffprobe_cmd, text=True).strip())
	duration_formatted = f"{int(duration_seconds // 3600):02}:{int((duration_seconds % 3600) // 60):02}:{int(duration_seconds % 60):02}"
	return duration_formatted

	def is_503_error(exception):
	return isinstance(exception, Exception) and "503" in str(exception)
	@retry(
	stop_max_attempt_number=3,
	wait_fixed=600000, # Задержка 10 минут (600 секунд)
	retry_on_exception=is_503_error
	)
	def generate_content_with_retry(client, model, contents, generate_images=0):
	if generate_images == 1 or generate_images == '1':
	gemini_model = "gemini-2.0-flash-preview-image-generation"
	return client.models.generate_content(model=gemini_model,
	contents = contents,
	config=types.GenerateContentConfig(response_modalities=['TEXT','IMAGE'])
	)
	else:
	gemini_model = "gemini-2.0-flash"
	return client.models.generate_content(model=model, contents=contents)

	def get_audio_duration(file_path):
	try:
	cmd = [
	'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
	'-of', 'default=noprint_wrappers=1:nokey=1', file_path
	]
	duration_seconds = float(subprocess.check_output(cmd, text=True).strip())
	duration_formatted = f"{int(duration_seconds // 3600):02d}:{int((duration_seconds % 3600) // 60):02d}:{int(duration_seconds % 60):02d}"
	return duration_formatted, duration_seconds
	except subprocess.CalledProcessError as e:
	logger.error(f"Error getting duration: {e.stderr}",exc_info=True)
	raise

	def create_and_trim_segments(file_path, segment_duration=7200):
	segments = []
	try:
	output_dir = os.path.dirname(file_path)
	base_name = os.path.splitext(os.path.basename(file_path))[0]
	segment_index = 0

	while os.path.exists(file_path):
	try:
	duration_formatted, remaining_seconds = get_audio_duration(file_path)
	except subprocess.CalledProcessError as e:
	logger.warning(f"Cannot get duration: {e.stderr}. File may be corrupted - deleting")
	os.remove(file_path)
	break

	# Определяем реальную длительность для текущего сегмента
	current_segment_duration = min(segment_duration, remaining_seconds)

	if current_segment_duration < 1: # Если меньше 1 секунды - выходим
	logger.info(f"Remaining duration too short ({remaining_seconds:.2f}s), deleting source file")
	os.remove(file_path)
	break

	segment_path = os.path.join(output_dir, f"{base_name}_segment_{segment_index:03d}.mp3")
	temp_file = os.path.join(output_dir, f"{base_name}_temp.mp3")

	# Создаём сегмент с реальной длительностью
	cmd_segment = [
	'ffmpeg', '-y', '-i', file_path,
	'-t', str(current_segment_duration),
	'-c', 'copy', segment_path
	]
	try:
	subprocess.run(cmd_segment, check=True, capture_output=True, text=True)
	logger.info(f"Created segment: {segment_path} ({current_segment_duration:.2f}s)")
	except subprocess.CalledProcessError as e:
	logger.error(f"Error creating segment: {e.stderr}",exc_info=True)
	if os.path.exists(segment_path):
	os.remove(segment_path)
	raise

	if not os.path.exists(segment_path):
	raise RuntimeError(f"Segment {segment_path} was not created")
	segments.append(segment_path)

	# Если это был последний сегмент - удаляем исходный файл и выходим
	if remaining_seconds <= segment_duration:
	logger.info("Last segment created, deleting source file")
	os.remove(file_path)
	break

	# Обрезаем исходный файл только если осталось больше segment_duration
	cmd_trim = [
	'ffmpeg', '-y',
	'-ss', str(current_segment_duration),
	'-i', file_path,
	'-c', 'copy', temp_file
	]
	try:
	subprocess.run(cmd_trim, check=True, capture_output=True, text=True)

	# Проверяем результат обрезки
	if os.path.exists(temp_file):
	os.remove(file_path)
	os.rename(temp_file, file_path)

	# Проверяем, что новый файл валиден
	try:
	_, new_duration = get_audio_duration(file_path)
	logger.info(f"Trimmed original file, new duration: {new_duration:.2f}s")
	except:
	logger.error("Trimmed file is invalid")
	raise
	else:
	raise RuntimeError("Temp file was not created")

	except subprocess.CalledProcessError as e:
	if os.path.exists(temp_file):
	os.remove(temp_file)
	logger.error(f"Error trimming file: {e.stderr}")
	raise

	segment_index += 1

	return segments

	except Exception as e:
	logger.error(f"Fatal error: {str(e)}",exc_info=True)
	# Очистка временных файлов при ошибке
	if 'temp_file' in locals() and os.path.exists(temp_file):
	os.remove(temp_file)
	raise
	def convert_to_mp3(input_path: str, output_dir: str = None, bitrate: str = '192k') -> str:
	# Проверяем существование исходного файла
	if not os.path.exists(input_path):
	raise FileNotFoundError(f"Input file not found: {input_path}")

	# Создаем директорию для выходного файла, если нужно
	if output_dir:
	os.makedirs(output_dir, exist_ok=True)
	def is_mp3(filepath):
	try:
	cmd = ['ffprobe', '-v', 'error', '-select_streams', 'a:0',
	'-show_entries', 'stream=codec_name', '-of',
	'default=noprint_wrappers=1:nokey=1', filepath]
	result = subprocess.run(cmd, capture_output=True, text=True)
	return 'mp3' in result.stdout.lower()
	except:
	return filepath.lower().endswith('.mp3')

	if is_mp3(input_path):
	logger.info(f"File {input_path} is already MP3, skipping conversion")
	return input_path
	# Формируем путь для выходного файла
	original_name = os.path.splitext(os.path.basename(input_path))[0]
	original_name = os.path.splitext(os.path.basename(input_path))[0]
	output_path = os.path.join(
	output_dir or os.path.dirname(input_path),
	f"{original_name}_converted.mp3" # Добавляем суффикс
	)

	# Команда для конвертации
	command = [
	'ffmpeg',
	'-i', input_path,
	'-codec:a', 'libmp3lame',
	'-b:a', '192k', # Постоянный битрейт
	'-ar', '44100', # Частота дискретизации
	'-ac', '1', # Моно (улучшает распознавание речи)
	'-af', 'highpass=f=200,lowpass=f=3000', # Фильтр речевых частот
	'-metadata:s:a:0', 'language=rus', # Явное указание языка
	'-fflags', '+bitexact', # Стандартизация формата
	'-map_metadata', '0', # Сохранение метаданных
	'-y',
	output_path
	]

	logger.info(f"Converting {input_path} to MP3...")

	try:
	# Запускаем процесс конвертации
	subprocess.run(command,check=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE,text=True)
	logger.info(f"Successfully converted to {output_path}")
	check_cmd = ['ffprobe', '-v', 'error', '-select_streams', 'a',
	'-show_entries', 'format=bit_rate', '-of',
	'default=noprint_wrappers=1:nokey=1', output_path]
	result = subprocess.run(check_cmd, capture_output=True, text=True)
	actual_bitrate = int(result.stdout.strip()) // 1000
	if actual_bitrate != int(bitrate[:-1]):
	logger.warning(f"Requested {bitrate}, but got {actual_bitrate}k")
	return output_path

	except subprocess.CalledProcessError as e:
	error_msg = f"FFmpeg conversion failed: {e.stderr}"
	logger.error(error_msg)
	if os.path.exists(output_path):
	os.remove(output_path)
	raise RuntimeError(error_msg) from e

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860, debug=True)