import gradio as gr import json def validate_base_format(data): entries = data.split("\n") # Moved outside try for broader error handling scope for i, entry in enumerate(entries): try: if not entry.strip(): continue json_data = json.loads(entry) messages = json_data["messages"] if not isinstance(messages, list) or not all("role" in message and "content" in message for message in messages): return False, i + 1 except json.JSONDecodeError: # Catch decoding errors specifically for each entry return False, i + 1 return True, None def validate_conversational_format(data): entries = data.split("\n") for i, entry in enumerate(entries): try: if not entry.strip(): continue json_data = json.loads(entry) if "prompt" not in json_data or "completion" not in json_data: return False, i + 1 except json.JSONDecodeError: # Catch decoding errors specifically for each entry return False, i + 1 return True, None def validate_multi_turn_format(data): entries = data.split("\n") for i, entry in enumerate(entries): try: if not entry.strip(): continue json_data = json.loads(entry) messages = json_data["messages"] if not isinstance(messages, list) or not all("role" in message and "content" in message for message in messages): return False, i + 1 for message in messages: if message["role"] == "assistant" and "weight" in message: if message["weight"] not in [0, 1]: return False, i + 1 except json.JSONDecodeError: # Catch decoding errors specifically for each entry return False, i + 1 return True, None def process_data(text, file, option): try: if file: data = file.read().decode("utf-8") else: data = text if option == "Base": is_valid, line_error = validate_base_format(data) if not is_valid: return f"Error: Input does not follow the required 'Base' format at line {line_error}." return "Input follows the 'Base' format." elif option == "Conversational": is_valid, line_error = validate_conversational_format(data) if not is_valid: return f"Error: Input does not follow the required 'Conversational' format at line {line_error}." return "Input follows the 'Conversational' format." elif option == "Multi-turn": is_valid, line_error = validate_multi_turn_format(data) if not is_valid: return f"Error: Input does not follow the required 'Multi-turn' format at line {line_error}." return "Input follows the 'Multi-turn' format." return "Option selected but no specific format validation implemented for this option." except Exception as e: return f"An error occurred: {str(e)}" with gr.Blocks(title="Fine-tuning Formatter") as demo: with gr.Row(): text_input = gr.Textbox(label="Paste your text here", lines=10, placeholder="Enter text here or upload a file...") file_input = gr.File(label="Upload CSV or JSONL file", file_types=['.csv', '.jsonl']) with gr.Row(): option = gr.Radio(choices=["Base", "Conversational", "Multi-turn"], label="Select the processing mode", value="Base") with gr.Row(): submit_button = gr.Button("Submit") output = gr.Textbox(label="Output", lines=2) submit_button.click( fn=process_data, inputs=[text_input, file_input, option], outputs=output ) demo.launch()