Spaces:

wasmdashai
/

wasm-spad

Sleeping

wasm-spad / app.py

ASG Models

Update app.py

5808bd1 verified 3 months ago

4.91 kB

	import gradio as gr
	import os
	from transformers import AutoTokenizer,VitsModel

	import google.generativeai as genai
	import torch

	api_key =os.environ.get("id_gmkey")
	token=os.environ.get("key_")
	genai.configure(api_key=api_key)
	tokenizer = AutoTokenizer.from_pretrained("asg2024/vits-ar-sa-huba",token=token)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model=VitsModel.from_pretrained("asg2024/vits-ar-sa-huba",token=token).to(device)


	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 64,
	"max_output_tokens": 8192,
	"response_mime_type": "text/plain",
	}

	model = genai.GenerativeModel(
	model_name="gemini-1.5-flash",
	generation_config=generation_config,
	# safety_settings = Adjust safety settings
	# See https://ai.google.dev/gemini-api/docs/safety-settings
	)

	def create_chat_session():
	chat_session = model.start_chat(
	history=[
	{
	"role": "user",
	"parts": [
	"السلام عليكم اريد منك ان ترد على اسئلتي دائما باللهجة السعودية النجدية \n\n",
	],
	},
	{
	"role": "model",
	"parts": [
	"هلا والله، إسأل ما في خاطرك وأنا حاضر أساعدك، بس بشرط واحد، أسئلتك تكون واضحة عشان أفهم عليك عدل وأعطيك الجواب الزين. قل وش تبي وأنا حاضر! \n",
	],
	},
	{
	"role": "user",
	"parts": [
	"كيف حالك اخبارك\n",
	],
	},
	{
	"role": "model",
	"parts": [
	"هلا والله وغلا، أنا طيب وبخير الحمد لله، انت كيفك؟ عساك طيب؟ \n \n وش عندك أخبار؟ عسى كلها زينة. \n",
	],
	},
	{
	"role": "user",
	"parts": [
	"اريد ايضا ان تكون اجابتك مختصره على سبيل المثال ااكثر اجابة سطرين\n",
	],
	},
	{
	"role": "model",
	"parts": [
	"خلاص، فهمتك. من عيوني، أسئلتك من اليوم وطالع أجوبتها ما تتعدى سطرين. \n \n إسأل وشف! \n",
	],
	},
	]
	)
	return chat_session

	AI=create_chat_session()



	def get_answer_ai(text):
	global AI
	try:
	response = AI.send_message(text,stream=True)
	return response


	except :
	AI=create_chat_session()
	response = AI.send_message(text,stream=True)
	return response

	def modelspeech(text):
	with torch.no_grad():
	inputs = tokenizer(text, return_tensors="pt")#.cuda()

	wav = model(input_ids=inputs["input_ids"].to(device)).waveform.cpu().numpy().reshape(-1)
	# display(Audio(wav, rate=model.config.sampling_rate))
	return model.config.sampling_rate,wav#remove_noise_nr(wav)

	import re
	def clean_text(text):
	# Remove symbols and extra spaces
	cleaned_text = re.sub(r'[^\w\s]', '', text) # Remove symbols
	cleaned_text = re.sub(r'\s+', ' ', cleaned_text) # Normalize spaces
	return cleaned_text.strip() # Remove leading/trailing spaces


	def text_to_speech(text):

	response = get_answer_ai(text)
	pad_text=''
	k=0
	for chunk in response:

	pad_text+=str(clean_text(chunk))

	if pad_text!='' and len(pad_text)>10:
	out=pad_text
	pad_text=''
	k+=1

	yield modelspeech(out)
	if k==0:
	out=pad_text
	yield modelspeech(pad_text)
	def dash(text):

	response=get_answer_ai(text)
	for chunk in response:
	yield chunk.text
	# return textai


	# demo = gr.Interface(fn=dash, inputs=["text"], outputs=['text'])
	# demo.launch()

	with gr.Blocks() as demo:
	with gr.Tab("AI Text "):
	gr.Markdown("# Text to Speech")
	text_input = gr.Textbox(label="Enter Text")
	text_out = gr.Textbox()
	text_input.submit(dash, text_input, text_out)
	with gr.Tab("AI Speech"):
	gr.Markdown("# Text to Speech")
	text_input2 = gr.Textbox(label="Enter Text")
	audio_output = gr.Audio(streaming=True)
	text_input2.submit(text_to_speech, text_input2, audio_output)


	demo.launch(show_error=True)