Spaces:

jobsm
/

AI_FOR_DISABLED

Sleeping

App Files Files Community

AI_FOR_DISABLED / app.py

jobsm

Update app.py

7077adc verified 5 months ago

raw

history blame

7.15 kB

	import gradio as gr
	import whisper
	from transformers import pipeline
	import requests
	import cv2
	import string
	import numpy as np
	import tensorflow as tf

	# Load models
	whisper_model = whisper.load_model("base")
	sentiment_analysis = pipeline(
	"sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")


	def load_sign_language_model():
	return tf.keras.models.load_model('best_model.h5')


	sign_language_model = load_sign_language_model()

	# Audio-based functions


	def analyze_sentiment(text):
	results = sentiment_analysis(text)
	sentiment_results = {result['label']: result['score']
	for result in results}
	return sentiment_results


	def display_sentiment_results(sentiment_results, option):
	sentiment_text = ""
	for sentiment, score in sentiment_results.items():
	if option == "Sentiment Only":
	sentiment_text += f"{sentiment}\n"
	elif option == "Sentiment + Score":
	sentiment_text += f"{sentiment}: {score}\n"
	return sentiment_text


	def search_text(text, api_key):
	api_endpoint = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
	headers = {"Content-Type": "application/json"}
	payload = {"contents": [{"parts": [{"text": text}]}]}

	try:
	response = requests.post(
	api_endpoint, headers=headers, json=payload, params={"key": api_key})
	response.raise_for_status()
	response_json = response.json()
	if 'candidates' in response_json and len(response_json['candidates']) > 0:
	content_parts = response_json['candidates'][0]['content']['parts']
	if len(content_parts) > 0:
	return content_parts[0]['text'].strip()
	return "No relevant content found."
	except requests.exceptions.RequestException as e:
	return {"error": str(e)}


	def inference_audio(audio, sentiment_option, api_key):
	if audio is None:
	return "No audio file provided.", "", "", ""

	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)
	mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)

	_, probs = whisper_model.detect_language(mel)
	lang = max(probs, key=probs.get)

	options = whisper.DecodingOptions(fp16=False)
	result = whisper.decode(whisper_model, mel, options)

	sentiment_results = analyze_sentiment(result.text)
	sentiment_output = display_sentiment_results(
	sentiment_results, sentiment_option)

	search_results = search_text(result.text, api_key)

	return lang.upper(), result.text, sentiment_output, search_results

	# Image-based functions


	def get_explanation(letter, api_key):
	url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
	headers = {"Content-Type": "application/json"}
	data = {
	"contents": [
	{"parts": [{"text": f"Explain how the American Sign Language letter '{letter}' is shown, its significance, and why it is represented this way."}]}
	]
	}
	params = {"key": api_key}

	try:
	response = requests.post(url, headers=headers,
	json=data, params=params)
	response.raise_for_status()
	response_data = response.json()
	explanation = response_data.get("contents", [{}])[0].get("parts", [{}])[
	0].get("text", "No explanation available.")
	return explanation
	except requests.RequestException as e:
	return f"Error fetching explanation: {e}"


	def classify_sign_language(image, api_key):
	img = np.array(image)
	gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	gray_img = cv2.resize(gray_img, (28, 28))
	normalized_img = gray_img / 255.0
	input_img = np.expand_dims(normalized_img, axis=0)

	output = sign_language_model.predict(input_img)
	output = np.argmax(output, axis=1).item()
	uppercase_alphabet = string.ascii_uppercase
	output = output + 1 if output > 7 else output
	pred = uppercase_alphabet[output]

	explanation = get_explanation(pred, api_key)

	return pred, explanation

	# Gradio interface


	def process_input(input_type, audio=None, image=None, sentiment_option=None, api_key=None):
	if input_type == "Audio":
	return inference_audio(audio, sentiment_option, api_key)
	elif input_type == "Image":
	pred, explanation = classify_sign_language(image, api_key)
	return "N/A", pred, "N/A", explanation


	def main():
	with gr.Blocks() as demo:
	gr.Markdown("# 🤖 Speak & Sign AI Assistant")

	# Layout: Split user input and bot response sides
	with gr.Row():
	# User Input Side
	with gr.Column():
	gr.Markdown("### User Input")
	# Input selection
	input_type = gr.Radio(label="Choose Input Type", choices=[
	"Audio", "Image"], value="Audio")

	# API key input
	api_key_input = gr.Textbox(
	label="API Key", placeholder="Your API key here", type="password")

	# Audio input
	audio_input = gr.Audio(
	label="Upload or Record Audio", type="filepath", visible=True)
	sentiment_option = gr.Radio(choices=[
	"Sentiment Only", "Sentiment + Score"], label="Sentiment Output", value="Sentiment Only", visible=True)

	# Image input
	image_input = gr.Image(
	label="Upload Image", type="pil", visible=False)

	# Change input visibility based on selection
	def update_visibility(input_type):
	if input_type == "Audio":
	return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)

	input_type.change(update_visibility, inputs=input_type, outputs=[
	audio_input, sentiment_option, image_input])

	# Submit button
	submit_btn = gr.Button("Submit")

	# Bot Response Side
	with gr.Column():
	gr.Markdown("### Bot Response")

	lang_str = gr.Textbox(
	label="Detected Language", interactive=False)
	text = gr.Textbox(
	label="Transcription or Prediction", interactive=False)
	sentiment_output = gr.Textbox(
	label="Sentiment Analysis Results", interactive=False)
	search_results = gr.Textbox(
	label="Explanation or API Search Results", interactive=False)

	submit_btn.click(process_input, inputs=[input_type, audio_input, image_input, sentiment_option, api_key_input], outputs=[
	lang_str, text, sentiment_output, search_results])

	demo.launch(share=True)


	if __name__ == "__main__":
	main()