import streamlit as st import streamlit.components.v1 as stc import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from PIL import Image import exifread # Extracts Meta data of images import os from datetime import datetime import mutagen # Extracts Meta data of Audio from PIL.ExifTags import TAGS, GPSTAGS import base64 import time from PyPDF2 import PdfReader timestr = time.strftime("%Y%m%d-%H%M%S") import sqlite3 details = """ Metadata is defined as the data providing information about one or more aspects of the data; it is used to summarize basic information about data which can make tracking and working with specific data easier """ HTML_BANNER = """

MetaData Extractor App

""" def file_download(data): csv_file= data.to_csv() b64=base64.b64encode(csv_file.encode()).decode() new_filename="result_{}.csv".format(timestr) st.markdown('### 🗃️ Download csv file ') href=f' Click Here! ' st.markdown(href, unsafe_allow_html=True) conn=sqlite3.connect('data.db') c=conn.cursor() def create_filestable(): c.execute('CREATE TABLE IF NOT EXISTS filestable(filename TEXT,filetype TEXT,filesize TEXT,uploadDate TIMESTAMP)') def add_file_details(): c.execute('INSERT INTO filestable(filename, filetype, filesize, uploadDate) VALUES (?, ?, ?, ?)', (filename, filetype, filesize, uploadDate)) conn.commit() def view_all_data(): c.execute('SELECT * FROM filestable') data = c.fetchall() return data def load_image(file): img = Image.open(file) return img def get_readable_time(time): return datetime.fromtimestamp(time).strftime('%Y-%m-%d-%H:%M') def get_exif(filename): exif = Image.open(filename).getexif() if exif is not None and isinstance(exif, dict): for key, value in exif.items(): name = TAGS.get(key, value) exif[name] = exif.pop(key) if 'GPSInfo' in exif: for key in exif['GPSInfo'].keys(): name = GPSTAGS.get(key,key) exif['GPSInfo'][name] = exif['GPSInfo'].pop(key) return exif def metadata(): # st.title('Meta-Data Extractor App') stc.html(HTML_BANNER) menu=['Home','Image','Audio','Document_Files','Analytics'] choice=st.sidebar.selectbox('Menu',menu) create_filestable() if choice=='Home': st.image(load_image('extraction_process.png')) st.write(details) col1, col2, col3 = st.columns(3) with col1: with st.expander("Get Image Metadata 📷"): st.info("Image Metadata") st.markdown("📷") st.text("Upload JPEG,JPG,PNG Images") with col2: with st.expander("Get Audio Metadata 🔉"): st.info("Audio Metadata") st.markdown("🔉") st.text("Upload Mp3,Ogg") with col3: with st.expander("Get Document Metadata 📄📁"): st.info("Document Files Metadata") st.markdown("📄📁") st.text("Upload PDF,Docx") elif choice=='Image': st.subheader('Image MetaData Extractor') image_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"]) if image_file is not None: with st.expander('File Stats'): file_details={'Filename':image_file.name, 'Filesize':image_file.size, 'Filetype':image_file.type} statinfo=os.stat(image_file.readable()) statdetails={ 'Accessed Time': get_readable_time(statinfo.st_atime), 'Creation Time':get_readable_time(statinfo.st_ctime), 'Modified Time':get_readable_time(statinfo.st_mtime)} full_details={ 'Filename':image_file.name, 'Filesize':image_file.size, 'Filetype':image_file.type, 'Accessed Time': get_readable_time(statinfo.st_atime), 'Creation Time':get_readable_time(statinfo.st_ctime), 'Modified Time':get_readable_time(statinfo.st_mtime) } # st.write(full_details) file_details_df = pd.DataFrame( list(full_details.items()), columns=["Meta Tags", "Value"] ) st.dataframe(file_details_df) c1, c2 = st.columns(2) with c1: with st.expander("View Image"): img = load_image(image_file) st.image(img,width=250) with c2: with st.expander("Default(JPEG)"): st.info("Using PILLOW") img = load_image(image_file) img_details = { "format": img.format, "format_desc": img.format_description, "filename": img.filename, "size": img.size, "height": img.height, "width": img.width, "info": img.info, } df_img_details = pd.DataFrame( list(img_details.items()), columns=["Meta Tags", "Value"] ) st.dataframe(df_img_details) c3,c4=st.columns(2) with c3: with st.expander('Using ExifRead Tool'): meta_data=exifread.process_file(image_file) # st.write(meta_data) meta_data_df=pd.DataFrame( list(meta_data.items()),columns=['Meta Data','Values']) st.dataframe(meta_data_df) with c4: with st.expander('Image geo Coordinates'): img_gps_details=get_exif(image_file) latitude = img_gps_details.get('GPSLatitude') longitude = img_gps_details.get('GPSLongitude') try: gps_info = img_gps_details lat=latitude long=longitude except: gps_info = "None Found" st.write(gps_info) st.write(lat) st.write(long) add_file_details(img.filename,img.format,img.size,datetime.now()) with st.expander('Download Results'): final_df=pd.concat([file_details_df,df_img_details,meta_data_df]) st.dataframe(final_df) file_download(final_df) elif choice=='Audio': st.subheader('Audio MetaData Extractor') audio_file = st.file_uploader("Upload Audio", type=["mp3", "ogg"]) if audio_file is not None: col1, col2 = st.columns(2) with col1: st.audio(audio_file.read()) with col2: with st.expander("File Stats"): file_details = { "FileName": audio_file.name, "FileSize": audio_file.size, "FileType": audio_file.type, } add_file_details(audio_file.name,audio_file.type,audio_file.size,datetime.now()) st.write(file_details) statinfo = os.stat(audio_file.readable()) stats_details = { "Accessed_Time": get_readable_time(statinfo.st_atime), "Creation_Time": get_readable_time(statinfo.st_ctime), "Modified_Time": get_readable_time(statinfo.st_mtime), } st.write(stats_details) file_details_combined = { "FileName": audio_file.name, "FileSize": audio_file.size, "FileType": audio_file.type, "Accessed_Time": get_readable_time(statinfo.st_atime), "Creation_Time": get_readable_time(statinfo.st_ctime), "Modified_Time": get_readable_time(statinfo.st_mtime), } df_file_details = pd.DataFrame( list(file_details_combined.items()), columns=["Meta Tags", "Value"], ) st.dataframe(df_file_details) with st.expander('Metadata using Mutagen'): meta_data=mutagen.File(audio_file) meta_data_dict={str(key):str(value) for key,value in meta_data.items()} meta_data_audio_df=pd.DataFrame( list(meta_data_dict.items()),columns=['Tag','Values']) st.dataframe(meta_data_audio_df) with st.expander("Download Results"): combined_df = pd.concat([df_file_details, meta_data_audio_df]) st.dataframe(combined_df) file_download(combined_df) elif choice=='Document_Files': st.subheader('Document MetaData Extractor') text_file = st.file_uploader("Upload File", type=["PDF"]) if text_file is not None: col1, col2 = st.columns([1, 2]) with col1: with st.expander("File Stats"): file_details = { "FileName": text_file.name, "FileSize": text_file.size, "FileType": text_file.type, } add_file_details(text_file.name,text_file.type,text_file.size,datetime.now()) st.write(file_details) statinfo = os.stat(text_file.readable()) stats_details = { "Accessed_Time": get_readable_time(statinfo.st_atime), "Creation_Time": get_readable_time(statinfo.st_ctime), "Modified_Time": get_readable_time(statinfo.st_mtime), } st.write(stats_details) # Combine All Details file_details_combined = { "FileName": text_file.name, "FileSize": text_file.size, "FileType": text_file.type, "Accessed_Time": get_readable_time(statinfo.st_atime), "Creation_Time": get_readable_time(statinfo.st_ctime), "Modified_Time": get_readable_time(statinfo.st_mtime), } # Convert to DataFrame df_file_details = pd.DataFrame( list(file_details_combined.items()), columns=["Meta Tags", "Value"], ) with col2: with st.expander("Metadata"): pdf_file = PdfReader(text_file) pdf_info = pdf_file.metadata df_file_details_with_pdf = pd.DataFrame( list(pdf_info.items()), columns=["Meta Tags", "Value"] ) st.dataframe(df_file_details_with_pdf) with st.expander("Download Results"): pdf_combined_df = pd.concat([df_file_details, df_file_details_with_pdf]) st.dataframe(pdf_combined_df) file_download(pdf_combined_df) elif choice=='Analytics': st.subheader('Analytics') uploaded_files= view_all_data() df=pd.DataFrame(uploaded_files,columns=['Filename','Filetype','Filesize','UploadDate']) with st.expander('Monitor'): st.success('View all uploaded files') st.dataframe(df) #Monitor uploads