Spaces:

osheina
/

Sign_language_project

Running

App Files Files Community

Sign_language_project / pages /Video Upload.py

osheina

Update pages/Video Upload.py

473c0a0 verified 24 days ago

raw

history blame

4.3 kB

	import logging
	import queue
	from collections import deque
	from concurrent.futures import ThreadPoolExecutor

	import streamlit as st
	import cv2
	from streamlit_webrtc import WebRtcMode, webrtc_streamer
	from model import Predictor
	import openai

	# Настройки
	DEFAULT_WIDTH = 50
	openai.api_key = 'sk-proj-GDxupB1DFvTTWBg38VyST3BlbkFJ7MdcACLwu3u0U1QvWeMb'
	logger = logging.getLogger(__name__)

	def correct_text_gpt3(input_text):
	prompt = f"Исправь грамматические ошибки в тексте: '{input_text}'"
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant that corrects grammatical errors."},
	{"role": "user", "content": prompt}
	],
	max_tokens=50,
	n=1,
	stop=None,
	temperature=0.5,
	)
	return response.choices[0].message['content'].strip()

	# Центрируем контент
	width = 50
	side = max((100 - width) / 1.2, 0.01)
	_, container, _ = st.columns([side, width, side])

	# Модель инференса
	class SLInference:
	def __init__(self, config_path):
	self.config = self.load_config(config_path)
	self.predictor = Predictor(self.config)
	self.input_queue = deque(maxlen=32)
	self.pred = ''

	def load_config(self, config_path):
	import json
	with open(config_path, 'r') as f:
	return json.load(f)

	def start(self):
	pass

	def predict(self, frames):
	frames_resized = [cv2.resize(frame, (224, 224)) for frame in frames]
	while len(frames_resized) < 32:
	frames_resized.append(frames_resized[-1])
	result = self.predictor.predict(frames_resized)
	if result:
	return result["labels"][0]
	return 'no'

	def process_batch(inference_thread, frames, gestures):
	gesture = inference_thread.predict(frames)
	if gesture not in ['no', ''] and gesture not in gestures:
	gestures.append(gesture)

	# Основной интерфейс
	def main(config_path):
	# --- Заголовок блока ---
	st.markdown("""
	<div class="upload-section">
	<h3>🎥 Sign Language Recognition Demo</h3>
	<p>Upload a short video clip to detect sign gestures:</p>
	</div>
	""", unsafe_allow_html=True)

	# --- Скрытый лейбл uploader'а в стилизованной обёртке ---
	with st.container():
	uploaded_file = st.file_uploader(" ", type=["mp4", "avi", "mov", "gif"], label_visibility="collapsed")

	if uploaded_file is not None:
	video_bytes = uploaded_file.read()
	container.video(data=video_bytes)

	inference_thread = SLInference(config_path)
	inference_thread.start()

	text_output = st.empty()

	if st.button("🔍 Predict Gestures"):
	import tempfile
	tfile = tempfile.NamedTemporaryFile(delete=False)
	tfile.write(video_bytes)
	cap = cv2.VideoCapture(tfile.name)

	gestures = []
	frames = []
	batch_size = 32

	def process_frames(batch):
	process_batch(inference_thread, batch, gestures)

	with ThreadPoolExecutor() as executor:
	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frames.append(frame)
	if len(frames) == batch_size:
	executor.submit(process_frames, frames)
	frames = []

	if frames:
	executor.submit(process_frames, frames)

	cap.release()

	# Вывод результата
	text_output.markdown(
	f'<div class="section"><p style="font-size:20px">🖐️ Detected gestures: <b>{" ".join(gestures)}</b></p></div>',
	unsafe_allow_html=True
	)

	# Исправление текста
	st.text(correct_text_gpt3(" ".join(gestures)))

	if __name__ == "__main__":
	logging.basicConfig(level=logging.INFO)
	main("configs/config.json")