Spaces:
Sleeping
Sleeping
import os | |
import openai | |
import gradio as gr | |
from azure.cognitiveservices.vision.computervision import ComputerVisionClient | |
from msrest.authentication import CognitiveServicesCredentials | |
from azure.storage.blob import BlobClient | |
#import utils functions | |
from preprocessing_images import preprocessing_function | |
from extract_text import azure_ocr | |
from sentence_transformers import SentenceTransformer | |
!pip install -U sentence-transformers | |
from numpy.linalg import norm | |
import numpy as np | |
my_container = os.getenv("AZURE_CONTAINER") | |
subscription_key = os.getenv("SUB_KEY") | |
endpoint = os.getenv("AZURE_ENDPOINT") | |
connection_string = os.getenv("AZURE_CON_STRING") | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) | |
def ocr_pdf(pdf_url1): | |
preprocessing_function(pdf_url1) | |
my_blob = pdf_url1.split('/')[-1] | |
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) | |
with open("answer_paper.pdf", "rb") as data: | |
blob.upload_blob(data,overwrite=True) | |
text = azure_ocr(blob.url,computervision_client) | |
return text.strip() | |
def ocr_pdf(pdf_url2): | |
preprocessing_function(pdf_url2) | |
my_blob = pdf_url2.split('/')[-1] | |
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) | |
with open("answer_paper.pdf", "rb") as data: | |
blob.upload_blob(data,overwrite=True) | |
text = azure_ocr(blob.url,computervision_client) | |
return text.strip() | |
def classify_cause(incident_description): | |
response = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt= f"Identify the root cause from the below list:\nincident_description:{incident_description}\n", | |
temperature= 0, | |
max_tokens= 50, | |
n=1, | |
stop=None | |
#timeout=15, | |
) | |
classification = response.choices[0].text.strip() | |
return classification | |
def classify_class(incident_description): | |
response = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt= f"Classify the following incident description into one of the given classes:Aircraft Autopilot Problem, Auxiliary Power Problem,Cabin Pressure Problem, Engine Problem,Fuel System Problem,Avionics Problem,Communications Problem,Electrical System Problem,Engine Problem,Fire/Smoke Problem,Fuel System Problem,Ground Service Problem,Hydraulic System Problem,Ice/Frost Problem,Landing Gear Problem,Maintenance Problem,Oxygen System Problem,other problem\nincident_description:{incident_description}\n", | |
temperature= 0, | |
max_tokens= 50, | |
n=1, | |
stop=None | |
#timeout=15, | |
) | |
classification = response.choices[0].text.strip() | |
return classification | |
def text_similarity(pdftext1,pdftext12): | |
sentences =["pdftext1","pdftext2"] | |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
embeddings = model.encode(sentences) | |
cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1])) | |
return cosine | |
def avatiation(pdf_url1,pdf_url2): | |
pdftext1 = ocr_pdf(pdf_url1) | |
pdftext2 = ocr_pdf(pdf_url2) | |
defect_class1 = classify_class(pdftext1) | |
main_issue1 = classify_cause(pdftext1) | |
defect_class2 = classify_class(pdftext2) | |
main_issue2 = classify_cause(pdftext2) | |
cosine = text_similarity(pdftext1,pdftext2) | |
return main_issue1, defect_class1,main_issue2, defect_class2,cosine | |
inputs1 = gr.inputs.Textbox(label="Link for aviation log reports") | |
inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2") | |
outputs = [gr.outputs.Textbox(label="Main Issue of the log report"), | |
gr.outputs.Textbox(label="category of the log report"), | |
gr.outputs.Textbox(label="Main Issue of the log report2"), | |
gr.outputs.Textbox(label="category of the log report2"), | |
gr.outputs.Textbox(label="log similarity") | |
] | |
demo = gr.Interface(fn=avatiation,inputs= [inputs1,inputs2],outputs=outputs, title="ATA Auto classification using OCR and GPT3 ") | |
demo.launch() | |