import streamlit as st
import streamlit.components.v1 as stc
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
import exifread # Extracts Meta data of images
import os
from datetime import datetime
import mutagen # Extracts Meta data of Audio
from PIL.ExifTags import TAGS, GPSTAGS
import base64
import time
from PyPDF2 import PdfReader
timestr = time.strftime("%Y%m%d-%H%M%S")
import sqlite3
details = """
Metadata is defined as the data providing information about one or more aspects of the data; it is used to summarize basic information about data which can make tracking and working with specific data easier
"""
HTML_BANNER = """
MetaData Extractor App
"""
def file_download(data):
csv_file= data.to_csv()
b64=base64.b64encode(csv_file.encode()).decode()
new_filename="result_{}.csv".format(timestr)
st.markdown('### 🗃️ Download csv file ')
href=f' Click Here! '
st.markdown(href, unsafe_allow_html=True)
conn=sqlite3.connect('data.db')
c=conn.cursor()
def create_filestable():
c.execute('CREATE TABLE IF NOT EXISTS filestable(filename TEXT,filetype TEXT,filesize TEXT,uploadDate TIMESTAMP)')
def add_file_details():
c.execute('INSERT INTO filestable(filename, filetype, filesize, uploadDate) VALUES (?, ?, ?, ?)', (filename, filetype, filesize, uploadDate))
conn.commit()
def view_all_data():
c.execute('SELECT * FROM filestable')
data = c.fetchall()
return data
def load_image(file):
img = Image.open(file)
return img
def get_readable_time(time):
return datetime.fromtimestamp(time).strftime('%Y-%m-%d-%H:%M')
def get_exif(filename):
exif = Image.open(filename).getexif()
if exif is not None and isinstance(exif, dict):
for key, value in exif.items():
name = TAGS.get(key, value)
exif[name] = exif.pop(key)
if 'GPSInfo' in exif:
for key in exif['GPSInfo'].keys():
name = GPSTAGS.get(key,key)
exif['GPSInfo'][name] = exif['GPSInfo'].pop(key)
return exif
def metadata():
# st.title('Meta-Data Extractor App')
stc.html(HTML_BANNER)
menu=['Home','Image','Audio','Document_Files','Analytics']
choice=st.sidebar.selectbox('Menu',menu)
create_filestable()
if choice=='Home':
st.image(load_image('extraction_process.png'))
st.write(details)
col1, col2, col3 = st.columns(3)
with col1:
with st.expander("Get Image Metadata 📷"):
st.info("Image Metadata")
st.markdown("📷")
st.text("Upload JPEG,JPG,PNG Images")
with col2:
with st.expander("Get Audio Metadata 🔉"):
st.info("Audio Metadata")
st.markdown("🔉")
st.text("Upload Mp3,Ogg")
with col3:
with st.expander("Get Document Metadata 📄📁"):
st.info("Document Files Metadata")
st.markdown("📄📁")
st.text("Upload PDF,Docx")
elif choice=='Image':
st.subheader('Image MetaData Extractor')
image_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
if image_file is not None:
with st.expander('File Stats'):
file_details={'Filename':image_file.name,
'Filesize':image_file.size,
'Filetype':image_file.type}
statinfo=os.stat(image_file.readable())
statdetails={
'Accessed Time': get_readable_time(statinfo.st_atime),
'Creation Time':get_readable_time(statinfo.st_ctime),
'Modified Time':get_readable_time(statinfo.st_mtime)}
full_details={
'Filename':image_file.name,
'Filesize':image_file.size,
'Filetype':image_file.type,
'Accessed Time': get_readable_time(statinfo.st_atime),
'Creation Time':get_readable_time(statinfo.st_ctime),
'Modified Time':get_readable_time(statinfo.st_mtime)
}
# st.write(full_details)
file_details_df = pd.DataFrame(
list(full_details.items()), columns=["Meta Tags", "Value"]
)
st.dataframe(file_details_df)
c1, c2 = st.columns(2)
with c1:
with st.expander("View Image"):
img = load_image(image_file)
st.image(img,width=250)
with c2:
with st.expander("Default(JPEG)"):
st.info("Using PILLOW")
img = load_image(image_file)
img_details = {
"format": img.format,
"format_desc": img.format_description,
"filename": img.filename,
"size": img.size,
"height": img.height,
"width": img.width,
"info": img.info,
}
df_img_details = pd.DataFrame(
list(img_details.items()), columns=["Meta Tags", "Value"]
)
st.dataframe(df_img_details)
c3,c4=st.columns(2)
with c3:
with st.expander('Using ExifRead Tool'):
meta_data=exifread.process_file(image_file)
# st.write(meta_data)
meta_data_df=pd.DataFrame(
list(meta_data.items()),columns=['Meta Data','Values'])
st.dataframe(meta_data_df)
with c4:
with st.expander('Image geo Coordinates'):
img_gps_details=get_exif(image_file)
latitude = img_gps_details.get('GPSLatitude')
longitude = img_gps_details.get('GPSLongitude')
try:
gps_info = img_gps_details
lat=latitude
long=longitude
except:
gps_info = "None Found"
st.write(gps_info)
st.write(lat)
st.write(long)
add_file_details(img.filename,img.format,img.size,datetime.now())
with st.expander('Download Results'):
final_df=pd.concat([file_details_df,df_img_details,meta_data_df])
st.dataframe(final_df)
file_download(final_df)
elif choice=='Audio':
st.subheader('Audio MetaData Extractor')
audio_file = st.file_uploader("Upload Audio", type=["mp3", "ogg"])
if audio_file is not None:
col1, col2 = st.columns(2)
with col1:
st.audio(audio_file.read())
with col2:
with st.expander("File Stats"):
file_details = {
"FileName": audio_file.name,
"FileSize": audio_file.size,
"FileType": audio_file.type,
}
add_file_details(audio_file.name,audio_file.type,audio_file.size,datetime.now())
st.write(file_details)
statinfo = os.stat(audio_file.readable())
stats_details = {
"Accessed_Time": get_readable_time(statinfo.st_atime),
"Creation_Time": get_readable_time(statinfo.st_ctime),
"Modified_Time": get_readable_time(statinfo.st_mtime),
}
st.write(stats_details)
file_details_combined = {
"FileName": audio_file.name,
"FileSize": audio_file.size,
"FileType": audio_file.type,
"Accessed_Time": get_readable_time(statinfo.st_atime),
"Creation_Time": get_readable_time(statinfo.st_ctime),
"Modified_Time": get_readable_time(statinfo.st_mtime),
}
df_file_details = pd.DataFrame(
list(file_details_combined.items()),
columns=["Meta Tags", "Value"],
)
st.dataframe(df_file_details)
with st.expander('Metadata using Mutagen'):
meta_data=mutagen.File(audio_file)
meta_data_dict={str(key):str(value) for key,value in meta_data.items()}
meta_data_audio_df=pd.DataFrame(
list(meta_data_dict.items()),columns=['Tag','Values'])
st.dataframe(meta_data_audio_df)
with st.expander("Download Results"):
combined_df = pd.concat([df_file_details, meta_data_audio_df])
st.dataframe(combined_df)
file_download(combined_df)
elif choice=='Document_Files':
st.subheader('Document MetaData Extractor')
text_file = st.file_uploader("Upload File", type=["PDF"])
if text_file is not None:
col1, col2 = st.columns([1, 2])
with col1:
with st.expander("File Stats"):
file_details = {
"FileName": text_file.name,
"FileSize": text_file.size,
"FileType": text_file.type,
}
add_file_details(text_file.name,text_file.type,text_file.size,datetime.now())
st.write(file_details)
statinfo = os.stat(text_file.readable())
stats_details = {
"Accessed_Time": get_readable_time(statinfo.st_atime),
"Creation_Time": get_readable_time(statinfo.st_ctime),
"Modified_Time": get_readable_time(statinfo.st_mtime),
}
st.write(stats_details)
# Combine All Details
file_details_combined = {
"FileName": text_file.name,
"FileSize": text_file.size,
"FileType": text_file.type,
"Accessed_Time": get_readable_time(statinfo.st_atime),
"Creation_Time": get_readable_time(statinfo.st_ctime),
"Modified_Time": get_readable_time(statinfo.st_mtime),
}
# Convert to DataFrame
df_file_details = pd.DataFrame(
list(file_details_combined.items()),
columns=["Meta Tags", "Value"],
)
with col2:
with st.expander("Metadata"):
pdf_file = PdfReader(text_file)
pdf_info = pdf_file.metadata
df_file_details_with_pdf = pd.DataFrame(
list(pdf_info.items()), columns=["Meta Tags", "Value"]
)
st.dataframe(df_file_details_with_pdf)
with st.expander("Download Results"):
pdf_combined_df = pd.concat([df_file_details, df_file_details_with_pdf])
st.dataframe(pdf_combined_df)
file_download(pdf_combined_df)
elif choice=='Analytics':
st.subheader('Analytics')
uploaded_files= view_all_data()
df=pd.DataFrame(uploaded_files,columns=['Filename','Filetype','Filesize','UploadDate'])
with st.expander('Monitor'):
st.success('View all uploaded files')
st.dataframe(df)
#Monitor uploads