import streamlit as st import torch from PIL import Image import json from tqdm import tqdm from transformers import AutoModelForQuestionAnswering, LayoutLMv2Processor, AutoTokenizer class Config(): def __init__(self): self.data_dir = "/opt/ml/input/data/" self.model = "layoutlmv2" self.device = "cpu" self.checkpoint = "microsoft/layoutlmv2-base-uncased" self.use_ocr_library = False self.debug = False self.batch_data = 1 self.num_proc = 1 self.shuffle = True self.lr = 5e-6 self.seed = 42 self.batch = 1 self.max_len = 512 self.epochs = 1000 self.fuzzy = False self.model_name = '' config = Config() def predict_start_first(outputs): start_logits = outputs.start_logits end_logits = outputs.end_logits predicted_start_idx_list = [] predicted_end_idx_list = [] start_position = start_logits.argmax(1) for i in range(len(start_logits)): start = start_position[i] predicted_start_idx_list.append(start) max_score = -float('inf') predicted_end_idx = 0 for end in range(start, len(end_logits[i])): score = end_logits[i][end] if score > max_score: max_score = score predicted_end_idx = end predicted_end_idx_list.append(predicted_end_idx) return predicted_start_idx_list, predicted_end_idx_list # Define function to make predictions def predict(config, model, image, question): processor = LayoutLMv2Processor.from_pretrained("microsoft/layoutlmv2-base-uncased") encoding = processor(image, question, return_tensors="pt") # model with torch.no_grad(): output = model( input_ids=encoding['input_ids'], attention_mask=encoding['attention_mask'], token_type_ids=encoding['token_type_ids'], bbox=encoding['bbox'], image=encoding['image'] ) predicted_start_idx, predicted_end_idx = predict_start_first(output) answer = processor.tokenizer.decode(encoding['input_ids'][0, predicted_start_idx[0]:predicted_end_idx[0]+1]) return answer def main(config): # Load deep learning model checkpoint = '' model = AutoModelForQuestionAnswering.from_pretrained('microsoft/layoutlmv2-base-uncased').to(config.device) # model.load_state_dict(torch.load("model")) # Create Streamlit app st.title('Deep Learning Pipeline') st.write('Upload an image and ask a question to get a prediction') # Create file uploader and text input widgets uploaded_file = st.file_uploader("Choose an image", type=['jpg', 'jpeg', 'png']) question = st.text_input('Ask a question') # If file is uploaded, show the image if uploaded_file is not None: image = Image.open(uploaded_file).convert("RGB") st.image(image, caption='Uploaded Image', use_column_width=True) # If question is asked and file is uploaded, make a prediction if st.button('Get Prediction') and uploaded_file is not None and question != '': # Preprocess the image and question as needed # ... # Make the prediction with st.spinner('Predicting...'): output = predict(config, model, image, question) # Show the output st.write('Output:', output) if __name__ == '__main__': config = Config() main(config)