import gradio as gr from transformers import pipeline import numpy as np import json from datetime import datetime import os # Speech to text model and question-answering model s2t_model = "openai/whisper-base.en" qa_model = 'distilbert/distilbert-base-cased-distilled-squad' transcriber = pipeline('automatic-speech-recognition', model=s2t_model) qa_generator = pipeline('question-answering', model=qa_model) # List of prompts for generating the report prompts = [ {"question": "What is the business name?", "key": "business_name"}, {"question": "Who is the technician name?", "key": "technician_name"}, {"question": "Who is the customer name?", "key": "customer_name"}, {"question": "When was the inspection conducted?", "key": "inspection_date"}, {"question": "What was the vehicle inspected?", "key": "vehicle_inspected"}, {"question": "What is the VIN of the vehicle?", "key": "vehicle_vin"}, {"question": "What was the mileage of the vehicle?", "key": "vehicle_mileage"}, {"question": "What is the repair order number?", "key": "repair_order_number"}, {"question": "What was the condition of the left front tire?", "key": "left_front_tire_condition"}, {"question": "What was the condition of the right front tire?", "key": "right_front_tire_condition"}, {"question": "What was the condition of the left rear tire?", "key": "left_rear_tire_condition"}, {"question": "What was the condition of the right rear tire?", "key": "right_rear_tire_condition"}, {"question": "What was the engine oil level?", "key": "engine_oil_level"}, {"question": "What was the engine oil viscosity?", "key": "engine_oil_viscosity"}, {"question": "What was the inflation pressure of the left front tire?", "key": "left_front_inflation_pressure"}, {"question": "What was the inflation pressure of the right front tire?", "key": "right_front_inflation_pressure"}, {"question": "What was the inflation pressure of the left rear tire?", "key": "left_rear_inflation_pressure"}, {"question": "What was the inflation pressure of the right rear tire?", "key": "right_rear_inflation_pressure"}, {"question": "What was the wear pattern of the left front tire?", "key": "left_front_wear_pattern"}, {"question": "What was the wear pattern of the right front tire?", "key": "right_front_wear_pattern"}, {"question": "What was the wear pattern of the left rear tire?", "key": "left_rear_wear_pattern"}, {"question": "What was the wear pattern of the right rear tire?", "key": "right_rear_wear_pattern"}, ] def trascribe_and_answer(audio): sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) context = transcriber({"sampling_rate": sr, "raw": y})["text"] # Initialize an empty dictionary to store the results results = {} # Iterate through the context and generate questions for prompt in prompts: question = prompt["question"] key = prompt["key"] # Use the question-answering model to get the answer answer = qa_generator(question=question, context=context) # Extract the answer extracted_answer = answer["answer"] # Add the result to the dictionary results[key] = extracted_answer report = { "business_name": results["business_name"], "technician_name": results["technician_name"], "customer_name": results["customer_name"], "inspection_date": results["inspection_date"], "vehicle_inspected": results["vehicle_inspected"], "vehicle_details": { "year_make_model": results["vehicle_inspected"], "VIN": results["vehicle_vin"], "mileage": results["vehicle_mileage"] }, "repair_order_number": results["repair_order_number"], "inspection_details": { "tire_condition": { "left_front": { "tread_depth": results["left_front_tire_condition"], "inflation_pressure": results["left_front_inflation_pressure"], "wear_pattern": results["left_front_wear_pattern"] }, "right_front": { "tread_depth": results["right_front_tire_condition"], "inflation_pressure": results["right_front_inflation_pressure"], "wear_pattern": results["right_front_wear_pattern"] }, "left_rear": { "tread_depth": results["left_rear_tire_condition"], "inflation_pressure": results["left_rear_inflation_pressure"], "wear_pattern": results["left_rear_wear_pattern"] }, "right_rear": { "tread_depth": results["right_rear_tire_condition"], "inflation_pressure": results["right_rear_inflation_pressure"], "wear_pattern": results["right_rear_wear_pattern"] } }, "under_hood": { "fluid_levels": { "engine_oil": { "level": results["engine_oil_level"], "viscosity": results["engine_oil_viscosity"] } } } } } current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Add timestamp to the report report["timestamp"] = current_time filename = f"vehicle_inspection_report_{current_time}.json" # Save JSON report to a file with open(filename, "w") as json_file: json.dump(report, json_file, indent=4) print("Generated Vehicle Inspection Report in JSON format.") # Return JSON-like formatted output return json.dumps(report, indent=4) # Gradio interface for the audio input qa_report = gr.Interface( fn=trascribe_and_answer, inputs=gr.Audio(label="Record your inspection report:"), outputs=gr.Textbox(label="Output in JSON-like format"), title="Vehicle Inspection Report Generator", description="This tool generates a Vehicle Inspection Report based on spoken inspection results.", examples = [ [os.path.join(os.path.dirname("audio"), "Vehicle_Inspection_example.mp3")]], ) qa_report.launch()