|
import streamlit as st |
|
import streamlit.components.v1 as stc |
|
import pandas as pd |
|
import numpy as np |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
|
|
from PIL import Image |
|
import exifread |
|
import os |
|
from datetime import datetime |
|
|
|
import mutagen |
|
from PIL.ExifTags import TAGS, GPSTAGS |
|
import base64 |
|
import time |
|
from PyPDF2 import PdfReader |
|
timestr = time.strftime("%Y%m%d-%H%M%S") |
|
|
|
import sqlite3 |
|
|
|
details = """ |
|
Metadata is defined as the data providing information about one or more aspects of the data; it is used to summarize basic information about data which can make tracking and working with specific data easier |
|
""" |
|
|
|
HTML_BANNER = """ |
|
<div style="background-color:violet;padding:10px;border-radius:10px"> |
|
<h1 style="color:white;text-align:center;">MetaData Extractor App </h1> |
|
</div> |
|
""" |
|
|
|
def file_download(data): |
|
csv_file= data.to_csv() |
|
b64=base64.b64encode(csv_file.encode()).decode() |
|
new_filename="result_{}.csv".format(timestr) |
|
st.markdown('### ποΈ Download csv file ') |
|
href=f'<a href="data:file/csv;base64,{b64}" download="{new_filename}"> Click Here! </a>' |
|
st.markdown(href, unsafe_allow_html=True) |
|
|
|
conn=sqlite3.connect('data.db') |
|
c=conn.cursor() |
|
|
|
def create_filestable(): |
|
c.execute('CREATE TABLE IF NOT EXISTS filestable(filename TEXT,filetype TEXT,filesize TEXT,uploadDate TIMESTAMP)') |
|
|
|
def add_file_details(): |
|
c.execute('INSERT INTO filestable(filename, filetype, filesize, uploadDate) VALUES (?, ?, ?, ?)', (filename, filetype, filesize, uploadDate)) |
|
conn.commit() |
|
|
|
def view_all_data(): |
|
c.execute('SELECT * FROM filestable') |
|
data = c.fetchall() |
|
return data |
|
|
|
|
|
def load_image(file): |
|
img = Image.open(file) |
|
return img |
|
|
|
def get_readable_time(time): |
|
return datetime.fromtimestamp(time).strftime('%Y-%m-%d-%H:%M') |
|
|
|
|
|
def get_exif(filename): |
|
exif = Image.open(filename).getexif() |
|
|
|
if exif is not None and isinstance(exif, dict): |
|
for key, value in exif.items(): |
|
name = TAGS.get(key, value) |
|
exif[name] = exif.pop(key) |
|
|
|
if 'GPSInfo' in exif: |
|
for key in exif['GPSInfo'].keys(): |
|
name = GPSTAGS.get(key,key) |
|
exif['GPSInfo'][name] = exif['GPSInfo'].pop(key) |
|
return exif |
|
|
|
|
|
def metadata(): |
|
|
|
stc.html(HTML_BANNER) |
|
menu=['Home','Image','Audio','Document_Files','Analytics'] |
|
choice=st.sidebar.selectbox('Menu',menu) |
|
create_filestable() |
|
if choice=='Home': |
|
st.image(load_image('extraction_process.png')) |
|
st.write(details) |
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
with st.expander("Get Image Metadata π·"): |
|
st.info("Image Metadata") |
|
st.markdown("π·") |
|
st.text("Upload JPEG,JPG,PNG Images") |
|
|
|
with col2: |
|
with st.expander("Get Audio Metadata π"): |
|
st.info("Audio Metadata") |
|
st.markdown("π") |
|
st.text("Upload Mp3,Ogg") |
|
|
|
with col3: |
|
with st.expander("Get Document Metadata ππ"): |
|
st.info("Document Files Metadata") |
|
st.markdown("ππ") |
|
st.text("Upload PDF,Docx") |
|
|
|
elif choice=='Image': |
|
st.subheader('Image MetaData Extractor') |
|
image_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"]) |
|
if image_file is not None: |
|
with st.expander('File Stats'): |
|
file_details={'Filename':image_file.name, |
|
'Filesize':image_file.size, |
|
'Filetype':image_file.type} |
|
|
|
statinfo=os.stat(image_file.readable()) |
|
statdetails={ |
|
'Accessed Time': get_readable_time(statinfo.st_atime), |
|
'Creation Time':get_readable_time(statinfo.st_ctime), |
|
'Modified Time':get_readable_time(statinfo.st_mtime)} |
|
full_details={ |
|
'Filename':image_file.name, |
|
'Filesize':image_file.size, |
|
'Filetype':image_file.type, |
|
'Accessed Time': get_readable_time(statinfo.st_atime), |
|
'Creation Time':get_readable_time(statinfo.st_ctime), |
|
'Modified Time':get_readable_time(statinfo.st_mtime) |
|
} |
|
|
|
file_details_df = pd.DataFrame( |
|
list(full_details.items()), columns=["Meta Tags", "Value"] |
|
) |
|
st.dataframe(file_details_df) |
|
c1, c2 = st.columns(2) |
|
with c1: |
|
with st.expander("View Image"): |
|
img = load_image(image_file) |
|
st.image(img,width=250) |
|
with c2: |
|
with st.expander("Default(JPEG)"): |
|
st.info("Using PILLOW") |
|
img = load_image(image_file) |
|
img_details = { |
|
"format": img.format, |
|
"format_desc": img.format_description, |
|
"filename": img.filename, |
|
"size": img.size, |
|
"height": img.height, |
|
"width": img.width, |
|
"info": img.info, |
|
} |
|
df_img_details = pd.DataFrame( |
|
list(img_details.items()), columns=["Meta Tags", "Value"] |
|
) |
|
st.dataframe(df_img_details) |
|
|
|
c3,c4=st.columns(2) |
|
with c3: |
|
with st.expander('Using ExifRead Tool'): |
|
meta_data=exifread.process_file(image_file) |
|
|
|
meta_data_df=pd.DataFrame( |
|
list(meta_data.items()),columns=['Meta Data','Values']) |
|
st.dataframe(meta_data_df) |
|
with c4: |
|
with st.expander('Image geo Coordinates'): |
|
img_gps_details=get_exif(image_file) |
|
latitude = img_gps_details.get('GPSLatitude') |
|
longitude = img_gps_details.get('GPSLongitude') |
|
try: |
|
gps_info = img_gps_details |
|
lat=latitude |
|
long=longitude |
|
except: |
|
gps_info = "None Found" |
|
st.write(gps_info) |
|
st.write(lat) |
|
st.write(long) |
|
|
|
add_file_details(img.filename,img.format,img.size,datetime.now()) |
|
with st.expander('Download Results'): |
|
final_df=pd.concat([file_details_df,df_img_details,meta_data_df]) |
|
st.dataframe(final_df) |
|
file_download(final_df) |
|
|
|
|
|
elif choice=='Audio': |
|
st.subheader('Audio MetaData Extractor') |
|
audio_file = st.file_uploader("Upload Audio", type=["mp3", "ogg"]) |
|
|
|
if audio_file is not None: |
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.audio(audio_file.read()) |
|
|
|
with col2: |
|
with st.expander("File Stats"): |
|
file_details = { |
|
"FileName": audio_file.name, |
|
"FileSize": audio_file.size, |
|
"FileType": audio_file.type, |
|
} |
|
add_file_details(audio_file.name,audio_file.type,audio_file.size,datetime.now()) |
|
|
|
st.write(file_details) |
|
|
|
statinfo = os.stat(audio_file.readable()) |
|
stats_details = { |
|
"Accessed_Time": get_readable_time(statinfo.st_atime), |
|
"Creation_Time": get_readable_time(statinfo.st_ctime), |
|
"Modified_Time": get_readable_time(statinfo.st_mtime), |
|
} |
|
st.write(stats_details) |
|
|
|
file_details_combined = { |
|
"FileName": audio_file.name, |
|
"FileSize": audio_file.size, |
|
"FileType": audio_file.type, |
|
"Accessed_Time": get_readable_time(statinfo.st_atime), |
|
"Creation_Time": get_readable_time(statinfo.st_ctime), |
|
"Modified_Time": get_readable_time(statinfo.st_mtime), |
|
} |
|
|
|
df_file_details = pd.DataFrame( |
|
list(file_details_combined.items()), |
|
columns=["Meta Tags", "Value"], |
|
) |
|
st.dataframe(df_file_details) |
|
|
|
with st.expander('Metadata using Mutagen'): |
|
meta_data=mutagen.File(audio_file) |
|
meta_data_dict={str(key):str(value) for key,value in meta_data.items()} |
|
meta_data_audio_df=pd.DataFrame( |
|
list(meta_data_dict.items()),columns=['Tag','Values']) |
|
st.dataframe(meta_data_audio_df) |
|
with st.expander("Download Results"): |
|
combined_df = pd.concat([df_file_details, meta_data_audio_df]) |
|
st.dataframe(combined_df) |
|
file_download(combined_df) |
|
|
|
|
|
elif choice=='Document_Files': |
|
st.subheader('Document MetaData Extractor') |
|
text_file = st.file_uploader("Upload File", type=["PDF"]) |
|
if text_file is not None: |
|
col1, col2 = st.columns([1, 2]) |
|
|
|
with col1: |
|
with st.expander("File Stats"): |
|
file_details = { |
|
"FileName": text_file.name, |
|
"FileSize": text_file.size, |
|
"FileType": text_file.type, |
|
} |
|
add_file_details(text_file.name,text_file.type,text_file.size,datetime.now()) |
|
|
|
st.write(file_details) |
|
|
|
statinfo = os.stat(text_file.readable()) |
|
|
|
stats_details = { |
|
"Accessed_Time": get_readable_time(statinfo.st_atime), |
|
"Creation_Time": get_readable_time(statinfo.st_ctime), |
|
"Modified_Time": get_readable_time(statinfo.st_mtime), |
|
} |
|
st.write(stats_details) |
|
|
|
|
|
file_details_combined = { |
|
"FileName": text_file.name, |
|
"FileSize": text_file.size, |
|
"FileType": text_file.type, |
|
"Accessed_Time": get_readable_time(statinfo.st_atime), |
|
"Creation_Time": get_readable_time(statinfo.st_ctime), |
|
"Modified_Time": get_readable_time(statinfo.st_mtime), |
|
} |
|
|
|
|
|
df_file_details = pd.DataFrame( |
|
list(file_details_combined.items()), |
|
columns=["Meta Tags", "Value"], |
|
) |
|
with col2: |
|
with st.expander("Metadata"): |
|
pdf_file = PdfReader(text_file) |
|
pdf_info = pdf_file.metadata |
|
df_file_details_with_pdf = pd.DataFrame( |
|
list(pdf_info.items()), columns=["Meta Tags", "Value"] |
|
) |
|
|
|
st.dataframe(df_file_details_with_pdf) |
|
|
|
with st.expander("Download Results"): |
|
pdf_combined_df = pd.concat([df_file_details, df_file_details_with_pdf]) |
|
st.dataframe(pdf_combined_df) |
|
file_download(pdf_combined_df) |
|
|
|
elif choice=='Analytics': |
|
st.subheader('Analytics') |
|
uploaded_files= view_all_data() |
|
df=pd.DataFrame(uploaded_files,columns=['Filename','Filetype','Filesize','UploadDate']) |
|
with st.expander('Monitor'): |
|
st.success('View all uploaded files') |
|
st.dataframe(df) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|