zewdu444's picture
Update app.py
7ff5afd verified
import gradio as gr
from transformers import pipeline
import numpy as np
import json
from datetime import datetime
import os
# Speech to text model and question-answering model
s2t_model = "openai/whisper-base.en"
qa_model = 'distilbert/distilbert-base-cased-distilled-squad'
transcriber = pipeline('automatic-speech-recognition', model=s2t_model)
qa_generator = pipeline('question-answering', model=qa_model)
# List of prompts for generating the report
prompts = [
{"question": "What is the business name?", "key": "business_name"},
{"question": "Who is the technician name?", "key": "technician_name"},
{"question": "Who is the customer name?", "key": "customer_name"},
{"question": "When was the inspection conducted?", "key": "inspection_date"},
{"question": "What was the vehicle inspected?", "key": "vehicle_inspected"},
{"question": "What is the VIN of the vehicle?", "key": "vehicle_vin"},
{"question": "What was the mileage of the vehicle?", "key": "vehicle_mileage"},
{"question": "What is the repair order number?", "key": "repair_order_number"},
{"question": "What was the condition of the left front tire?", "key": "left_front_tire_condition"},
{"question": "What was the condition of the right front tire?", "key": "right_front_tire_condition"},
{"question": "What was the condition of the left rear tire?", "key": "left_rear_tire_condition"},
{"question": "What was the condition of the right rear tire?", "key": "right_rear_tire_condition"},
{"question": "What was the engine oil level?", "key": "engine_oil_level"},
{"question": "What was the engine oil viscosity?", "key": "engine_oil_viscosity"},
{"question": "What was the inflation pressure of the left front tire?", "key": "left_front_inflation_pressure"},
{"question": "What was the inflation pressure of the right front tire?", "key": "right_front_inflation_pressure"},
{"question": "What was the inflation pressure of the left rear tire?", "key": "left_rear_inflation_pressure"},
{"question": "What was the inflation pressure of the right rear tire?", "key": "right_rear_inflation_pressure"},
{"question": "What was the wear pattern of the left front tire?", "key": "left_front_wear_pattern"},
{"question": "What was the wear pattern of the right front tire?", "key": "right_front_wear_pattern"},
{"question": "What was the wear pattern of the left rear tire?", "key": "left_rear_wear_pattern"},
{"question": "What was the wear pattern of the right rear tire?", "key": "right_rear_wear_pattern"},
]
def trascribe_and_answer(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
context = transcriber({"sampling_rate": sr, "raw": y})["text"]
# Initialize an empty dictionary to store the results
results = {}
# Iterate through the context and generate questions
for prompt in prompts:
question = prompt["question"]
key = prompt["key"]
# Use the question-answering model to get the answer
answer = qa_generator(question=question, context=context)
# Extract the answer
extracted_answer = answer["answer"]
# Add the result to the dictionary
results[key] = extracted_answer
report = {
"business_name": results["business_name"],
"technician_name": results["technician_name"],
"customer_name": results["customer_name"],
"inspection_date": results["inspection_date"],
"vehicle_inspected": results["vehicle_inspected"],
"vehicle_details": {
"year_make_model": results["vehicle_inspected"],
"VIN": results["vehicle_vin"],
"mileage": results["vehicle_mileage"]
},
"repair_order_number": results["repair_order_number"],
"inspection_details": {
"tire_condition": {
"left_front": {
"tread_depth": results["left_front_tire_condition"],
"inflation_pressure": results["left_front_inflation_pressure"],
"wear_pattern": results["left_front_wear_pattern"]
},
"right_front": {
"tread_depth": results["right_front_tire_condition"],
"inflation_pressure": results["right_front_inflation_pressure"],
"wear_pattern": results["right_front_wear_pattern"]
},
"left_rear": {
"tread_depth": results["left_rear_tire_condition"],
"inflation_pressure": results["left_rear_inflation_pressure"],
"wear_pattern": results["left_rear_wear_pattern"]
},
"right_rear": {
"tread_depth": results["right_rear_tire_condition"],
"inflation_pressure": results["right_rear_inflation_pressure"],
"wear_pattern": results["right_rear_wear_pattern"]
}
},
"under_hood": {
"fluid_levels": {
"engine_oil": {
"level": results["engine_oil_level"],
"viscosity": results["engine_oil_viscosity"]
}
}
}
}
}
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Add timestamp to the report
report["timestamp"] = current_time
filename = f"vehicle_inspection_report_{current_time}.json"
# Save JSON report to a file
with open(filename, "w") as json_file:
json.dump(report, json_file, indent=4)
print("Generated Vehicle Inspection Report in JSON format.")
# Return JSON-like formatted output
return json.dumps(report, indent=4)
# Gradio interface for the audio input
qa_report = gr.Interface(
fn=trascribe_and_answer,
inputs=gr.Audio(label="Record your inspection report:"),
outputs=gr.Textbox(label="Output in JSON-like format"),
title="Vehicle Inspection Report Generator",
description="This tool generates a Vehicle Inspection Report based on spoken inspection results.",
examples = [
[os.path.join(os.path.dirname("audio"), "Vehicle_Inspection_example.mp3")]],
)
qa_report.launch()