Sajjo's picture
Update app.py
17793fe verified
# import gradio as gr
# import os
# HF_TOKEN = os.getenv('HW_Token')
# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "save_audio")
# import gradio as gr
# import os
# class TextFileReader:
# def __init__(self):
# self.lines = []
# self.current_index = 0
# def read_lines(self, file):
# self.lines = file.decode('utf-8').splitlines()
# self.current_index = 0
# return self.get_current_line()
# def get_current_line(self):
# if 0 <= self.current_index < len(self.lines):
# return self.lines[self.current_index]
# else:
# return "End of file reached."
# def forward_line(self):
# self.current_index = min(self.current_index + 1, len(self.lines) - 1)
# return self.get_current_line()
# def backward_line(self):
# self.current_index = max(self.current_index - 1, 0)
# return self.get_current_line()
# reader = TextFileReader()
# # Define a function to save the text lines to a file
# def save_text_lines(file):
# lines = reader.read_lines(file)
# with open("text_lines.txt", "w") as f:
# f.write("\n".join(reader.lines))
# return lines
# # Define a function to save the audio file and corresponding text
# def save_audio_text(audio, text):
# if not os.path.exists("recordings"):
# os.makedirs("/recordings")
# # Debugging to print out the structure of the audio variable
# print("Received audio data:", audio)
# # Check if audio is a dictionary and contains 'data'
# if isinstance(audio, dict) and 'data' in audio:
# audio_data = audio['data']
# audio_path = f"/recordings/line_{reader.current_index}.wav"
# text_path = f"/recordings/line_{reader.current_index}.txt"
# with open(audio_path, "wb") as f:
# f.write(audio_data)
# with open(text_path, "w") as f:
# f.write(text)
# # Move to the next line after saving
# next_line = reader.forward_line()
# return next_line
# else:
# return "Audio data is not in the expected format."
# # Define the Gradio interface
# with gr.Blocks() as demo:
# with gr.Row():
# file_upload = gr.File(label="Upload a text file", type="binary")
# generate_button = gr.Button("Generate Lines")
# current_line = gr.Textbox(label="Current Line")
# def update_output(file):
# lines = reader.read_lines(file)
# save_text_lines(file) # Save the text lines to a file
# return lines
# generate_button.click(fn=update_output, inputs=file_upload, outputs=current_line)
# with gr.Row():
# audio_record = gr.Audio(sources=["microphone","upload"], type="filepath")
# save_button = gr.Button("Save Audio and Next Line")
# save_button.click(fn=save_audio_text, inputs=[audio_record, current_line], outputs=current_line)
# demo.launch()
# import gradio as gr
# def calculator(num1, operation, num2):
# if operation == "add":
# return num1 + num2
# elif operation == "subtract":
# return num1 - num2
# elif operation == "multiply":
# return num1 * num2
# elif operation == "divide":
# return num1 / num2
# iface = gr.Interface(
# calculator,
# ["number", gr.Radio(["add", "subtract", "multiply", "divide"]), "number"],
# "number",
# allow_flagging="manual",
# flagging_options=["correct", "wrong"]
# )
# iface.launch()
# import os
# HF_TOKEN = os.getenv('HF_TOKEN')
# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced")
# iface = gr.Interface(
# calculator,
# ["number", gr.Radio(["add", "subtract", "multiply", "divide"]), "number"],
# "number",
# description="Check out the crowd-sourced dataset at: [https://huggingface.co/Sajjo/crowdsourced](https://huggingface.co/Sajjo/crowdsourced)",
# allow_flagging="manual",
# flagging_options=["wrong sign", "off by one", "other"],
# flagging_callback=hf_writer
# )
# iface.launch()
# import numpy as np
# import gradio as gr
# def sepia(input_img, strength):
# sepia_filter = strength * np.array(
# [[0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131]]
# ) + (1-strength) * np.identity(3)
# sepia_img = input_img.dot(sepia_filter.T)
# sepia_img /= sepia_img.max()
# return sepia_img
# callback = gr.CSVLogger()
# with gr.Blocks() as demo:
# with gr.Row():
# with gr.Column():
# img_input = gr.Image()
# strength = gr.Slider(0, 1, 0.5)
# img_output = gr.Image()
# with gr.Row():
# btn = gr.Button("Flag")
# # This needs to be called at some point prior to the first call to callback.flag()
# callback.setup([img_input, strength, img_output], "flagged_data_points")
# img_input.change(sepia, [img_input, strength], img_output)
# strength.change(sepia, [img_input, strength], img_output)
# # We can choose which components to flag -- in this case, we'll flag all of them
# btn.click(lambda *args: callback.flag(args), [img_input, strength, img_output], None, preprocess=False)
# demo.launch()
# import gradio as gr
# import os
# import wave
# import tempfile
# import numpy as np
# # Global variables to store file and line index
# file_index = 0
# line_index = 0
# lines = []
# # Hugging Face token and dataset saver
# HF_TOKEN = os.getenv('HF_TOKEN')
# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-calculator-demo")
# # Function to read lines from a file
# def read_lines_from_file(file_path):
# global lines
# with open(file_path, 'r') as file:
# lines = file.readlines()
# # Function to save audio to a WAV file
# def save_audio_to_file(audio):
# sample_rate, data = audio # audio is a tuple (sample_rate, data)
# # Save the audio data as a WAV file in a temporary location
# with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
# with wave.open(tmp_file.name, 'wb') as wav_file:
# wav_file.setnchannels(1) # Mono audio
# wav_file.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
# wav_file.setframerate(sample_rate)
# wav_file.writeframes(data.tobytes())
# # Return the path to the saved WAV file
# return tmp_file.name
# # Function to save data to the Hugging Face dataset
# def save_to_hf_dataset(text, audio_path):
# with open(audio_path, "rb") as f:
# audio_data = f.read()
# hf_writer.save({"text": text, "audio": audio_data})
# # Gradio interface function
# def audio_capture_interface():
# global file_index, line_index, lines
# # Initial file to read
# files = os.listdir('./audio_samples')
# read_lines_from_file(os.path.join('./audio_samples', files[file_index]))
# # Define the interface components
# audio_input = gr.Audio(source="microphone", type="numpy", label="Speak and click submit")
# output_text = gr.Textbox(label="Status", placeholder="Status will appear here")
# # Function to capture and process the audio input
# def process_audio(audio):
# global line_index, lines
# try:
# text_line = lines[line_index].strip()
# file_path = save_audio_to_file(audio)
# save_to_hf_dataset(text_line, file_path)
# return f"Audio saved to {file_path} and uploaded to Hugging Face Dataset."
# except Exception as e:
# return f"Error saving audio: {str(e)}"
# # Function to handle navigation buttons
# def navigate_lines(button):
# global line_index, lines
# if button == 'forward':
# line_index = min(line_index + 1, len(lines) - 1)
# elif button == 'previous':
# line_index = max(line_index - 1, 0)
# output_text.value = lines[line_index]
# # Create the Gradio interface
# with gr.Blocks() as iface:
# with gr.Row():
# gr.Textbox(label="Text", value=lines[line_index], interactive=False)
# with gr.Row():
# audio_input.render()
# with gr.Row():
# gr.Button("Previous").click(lambda: navigate_lines('previous'), outputs=output_text)
# gr.Button("Forward").click(lambda: navigate_lines('forward'), outputs=output_text)
# gr.Button("Submit").click(process_audio, inputs=audio_input, outputs=output_text)
# return iface
# # Launch the interface
# iface = audio_capture_interface()
# iface.launch()
import gradio as gr
import os
import wave
import tempfile
import numpy as np
# Global variables to store line index and lines
line_index = 0
lines = []
# Hugging Face token and dataset saver
HF_TOKEN = os.getenv('HF_TOKEN')
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "crowdsourced-calculator-demo")
# Function to read lines from a file
def read_lines_from_file(file_path):
global lines, line_index
with open(file_path, 'r') as file:
lines = file.readlines()
line_index = 0 # Reset line index when a new file is loaded
return lines[line_index].strip() if lines else "No lines found in the file."
# Function to save audio to a WAV file
def save_audio_to_file(audio):
sample_rate, data = audio # audio is a tuple (sample_rate, data)
# Save the audio data as a WAV file in a temporary location
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
with wave.open(tmp_file.name, 'wb') as wav_file:
wav_file.setnchannels(1) # Mono audio
wav_file.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
wav_file.setframerate(sample_rate)
wav_file.writeframes(data.tobytes())
# Return the path to the saved WAV file
return tmp_file.name
# Function to save data to the Hugging Face dataset
def save_to_hf_dataset(text, audio_path):
with open(audio_path, "rb") as f:
audio_data = f.read()
hf_writer.save({"text": text, "audio": audio_data})
# Function to capture and process the audio input
def process_audio(audio):
global line_index, lines
try:
text_line = lines[line_index].strip()
file_path = save_audio_to_file(audio)
save_to_hf_dataset(text_line, file_path)
return f"Audio saved to {file_path} and uploaded to Hugging Face Dataset."
except Exception as e:
return f"Error saving audio: {str(e)}"
# Function to handle navigation buttons
def navigate_lines(button):
global line_index, lines
if button == 'forward':
line_index = min(line_index + 1, len(lines) - 1)
elif button == 'previous':
line_index = max(line_index - 1, 0)
return lines[line_index].strip() if lines else "No lines found."
# Function to handle file upload
def upload_file(file):
if file is not None:
file_path = file.name
return read_lines_from_file(file_path)
else:
return "No file uploaded."
# Gradio interface function
def audio_capture_interface():
with gr.Blocks() as iface:
with gr.Row():
file_upload = gr.File(label="Upload a text file", file_types=["text"])
text_display = gr.Textbox(label="Text", value="Please upload a file to begin.", interactive=False)
with gr.Row():
audio_input = gr.Audio(sources=["microphone","upload"], type="filepath", label="Speak and click submit")
with gr.Row():
status_output = gr.Textbox(label="Status", placeholder="Status will appear here")
with gr.Row():
gr.Button("Previous").click(lambda: navigate_lines('previous'), None, text_display)
gr.Button("Forward").click(lambda: navigate_lines('forward'), None, text_display)
gr.Button("Submit").click(process_audio, inputs=audio_input, outputs=status_output)
file_upload.upload(upload_file, outputs=text_display)
return iface
# Launch the interface
iface = audio_capture_interface()
iface.launch()