Spaces:
Sleeping
Sleeping
from enum import Enum | |
from types import MappingProxyType | |
from typing import List, Set, Mapping | |
""" | |
Base Enums/Consts | |
""" | |
class FileType(Enum): | |
AAC = "AAC" | |
CSV = "CSV" | |
DOC = "DOC" | |
DOCX = "DOCX" | |
FLAC = "FLAC" | |
FLV = "FLV" | |
GIF = "GIF" | |
GOOGLE_DOC = "GOOGLE_DOC" | |
GOOGLE_DRAWINGS = "GOOGLE_DRAWINGS" | |
GOOGLE_SHEETS = "GOOGLE_SHEETS" | |
GOOGLE_SLIDES = "GOOGLE_SLIDES" | |
HEIC = "HEIC" | |
HEIF = "HEIF" | |
HTML = "HTML" | |
JPEG = "JPEG" | |
JSON = "JSON" | |
M4A = "M4A" | |
M4V = "M4V" | |
MOV = "MOV" | |
MP3 = "MP3" | |
MP4 = "MP4" | |
MPEG = "MPEG" | |
MPEGPS = "MPEGPS" | |
MPG = "MPG" | |
MPA = "MPA" | |
MPGA = "MPGA" | |
OGG = "OGG" | |
OPUS = "OPUS" | |
PDF = "PDF" | |
PCM = "PCM" | |
PNG = "PNG" | |
PPT = "PPT" | |
PPTX = "PPTX" | |
RTF = "RTF" | |
THREE_GPP = "3GPP" | |
TXT = "TXT" | |
WAV = "WAV" | |
WEBM = "WEBM" | |
WEBP = "WEBP" | |
WMV = "WMV" | |
XLS = "XLS" | |
XLSX = "XLSX" | |
FILE_EXTENSIONS: Mapping[FileType, List[str]] = MappingProxyType( | |
{ | |
FileType.AAC: ["aac"], | |
FileType.CSV: ["csv"], | |
FileType.DOC: ["doc"], | |
FileType.DOCX: ["docx"], | |
FileType.FLAC: ["flac"], | |
FileType.FLV: ["flv"], | |
FileType.GIF: ["gif"], | |
FileType.GOOGLE_DOC: ["gdoc"], | |
FileType.GOOGLE_DRAWINGS: ["gdraw"], | |
FileType.GOOGLE_SHEETS: ["gsheet"], | |
FileType.GOOGLE_SLIDES: ["gslides"], | |
FileType.HEIC: ["heic"], | |
FileType.HEIF: ["heif"], | |
FileType.HTML: ["html", "htm"], | |
FileType.JPEG: ["jpeg", "jpg"], | |
FileType.JSON: ["json"], | |
FileType.M4A: ["m4a"], | |
FileType.M4V: ["m4v"], | |
FileType.MOV: ["mov"], | |
FileType.MP3: ["mp3"], | |
FileType.MP4: ["mp4"], | |
FileType.MPEG: ["mpeg"], | |
FileType.MPEGPS: ["mpegps"], | |
FileType.MPG: ["mpg"], | |
FileType.MPA: ["mpa"], | |
FileType.MPGA: ["mpga"], | |
FileType.OGG: ["ogg"], | |
FileType.OPUS: ["opus"], | |
FileType.PDF: ["pdf"], | |
FileType.PCM: ["pcm"], | |
FileType.PNG: ["png"], | |
FileType.PPT: ["ppt"], | |
FileType.PPTX: ["pptx"], | |
FileType.RTF: ["rtf"], | |
FileType.THREE_GPP: ["3gpp"], | |
FileType.TXT: ["txt"], | |
FileType.WAV: ["wav"], | |
FileType.WEBM: ["webm"], | |
FileType.WEBP: ["webp"], | |
FileType.WMV: ["wmv"], | |
FileType.XLS: ["xls"], | |
FileType.XLSX: ["xlsx"], | |
} | |
) | |
FILE_MIME_TYPES: Mapping[FileType, str] = MappingProxyType( | |
{ | |
FileType.AAC: "audio/aac", | |
FileType.CSV: "text/csv", | |
FileType.DOC: "application/msword", | |
FileType.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | |
FileType.FLAC: "audio/flac", | |
FileType.FLV: "video/x-flv", | |
FileType.GIF: "image/gif", | |
FileType.GOOGLE_DOC: "application/vnd.google-apps.document", | |
FileType.GOOGLE_DRAWINGS: "application/vnd.google-apps.drawing", | |
FileType.GOOGLE_SHEETS: "application/vnd.google-apps.spreadsheet", | |
FileType.GOOGLE_SLIDES: "application/vnd.google-apps.presentation", | |
FileType.HEIC: "image/heic", | |
FileType.HEIF: "image/heif", | |
FileType.HTML: "text/html", | |
FileType.JPEG: "image/jpeg", | |
FileType.JSON: "application/json", | |
FileType.M4A: "audio/x-m4a", | |
FileType.M4V: "video/x-m4v", | |
FileType.MOV: "video/quicktime", | |
FileType.MP3: "audio/mpeg", | |
FileType.MP4: "video/mp4", | |
FileType.MPEG: "video/mpeg", | |
FileType.MPEGPS: "video/mpegps", | |
FileType.MPG: "video/mpg", | |
FileType.MPA: "audio/m4a", | |
FileType.MPGA: "audio/mpga", | |
FileType.OGG: "audio/ogg", | |
FileType.OPUS: "audio/opus", | |
FileType.PDF: "application/pdf", | |
FileType.PCM: "audio/pcm", | |
FileType.PNG: "image/png", | |
FileType.PPT: "application/vnd.ms-powerpoint", | |
FileType.PPTX: "application/vnd.openxmlformats-officedocument.presentationml.presentation", | |
FileType.RTF: "application/rtf", | |
FileType.THREE_GPP: "video/3gpp", | |
FileType.TXT: "text/plain", | |
FileType.WAV: "audio/wav", | |
FileType.WEBM: "video/webm", | |
FileType.WEBP: "image/webp", | |
FileType.WMV: "video/wmv", | |
FileType.XLS: "application/vnd.ms-excel", | |
FileType.XLSX: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
} | |
) | |
""" | |
Util Functions | |
""" | |
def get_file_extension_from_mime_type(mime_type: str) -> str: | |
for file_type, mime in FILE_MIME_TYPES.items(): | |
if mime.lower() == mime_type.lower(): | |
return FILE_EXTENSIONS[file_type][0] | |
raise ValueError(f"Unknown extension for mime type: {mime_type}") | |
def get_file_type_from_extension(extension: str) -> FileType: | |
for file_type, extensions in FILE_EXTENSIONS.items(): | |
if extension.lower() in extensions: | |
return file_type | |
raise ValueError(f"Unknown file type for extension: {extension}") | |
def get_file_extension_for_file_type(file_type: FileType) -> str: | |
return FILE_EXTENSIONS[file_type][0] | |
def get_file_mime_type_for_file_type(file_type: FileType) -> str: | |
return FILE_MIME_TYPES[file_type] | |
def get_file_mime_type_from_extension(extension: str) -> str: | |
file_type = get_file_type_from_extension(extension) | |
return get_file_mime_type_for_file_type(file_type) | |
""" | |
FileType Type Groupings (Videos, Images, etc) | |
""" | |
# Images | |
IMAGE_FILE_TYPES = { | |
FileType.PNG, | |
FileType.JPEG, | |
FileType.GIF, | |
FileType.WEBP, | |
FileType.HEIC, | |
FileType.HEIF, | |
} | |
def is_image_file_type(file_type): | |
return file_type in IMAGE_FILE_TYPES | |
# Videos | |
VIDEO_FILE_TYPES = { | |
FileType.MOV, | |
FileType.MP4, | |
FileType.MPEG, | |
FileType.M4V, | |
FileType.FLV, | |
FileType.MPEGPS, | |
FileType.MPG, | |
FileType.WEBM, | |
FileType.WMV, | |
FileType.THREE_GPP, | |
} | |
def is_video_file_type(file_type): | |
return file_type in VIDEO_FILE_TYPES | |
# Audio | |
AUDIO_FILE_TYPES = { | |
FileType.AAC, | |
FileType.FLAC, | |
FileType.MP3, | |
FileType.MPA, | |
FileType.MPGA, | |
FileType.OPUS, | |
FileType.PCM, | |
FileType.WAV, | |
} | |
def is_audio_file_type(file_type): | |
return file_type in AUDIO_FILE_TYPES | |
# Text | |
TEXT_FILE_TYPES = {FileType.CSV, FileType.HTML, FileType.RTF, FileType.TXT} | |
def is_text_file_type(file_type): | |
return file_type in TEXT_FILE_TYPES | |
""" | |
Other FileType Groupings | |
""" | |
# Accepted file types for GEMINI 1.5 through Vertex AI | |
# https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-images-nodejs | |
GEMINI_1_5_ACCEPTED_FILE_TYPES: Set[FileType] = { | |
# Image | |
FileType.PNG, | |
FileType.JPEG, | |
FileType.WEBP, | |
# Audio | |
FileType.AAC, | |
FileType.FLAC, | |
FileType.MP3, | |
FileType.MPA, | |
FileType.MPEG, | |
FileType.MPGA, | |
FileType.OPUS, | |
FileType.PCM, | |
FileType.WAV, | |
FileType.WEBM, | |
# Video | |
FileType.FLV, | |
FileType.MOV, | |
FileType.MPEG, | |
FileType.MPEGPS, | |
FileType.MPG, | |
FileType.MP4, | |
FileType.WEBM, | |
FileType.WMV, | |
FileType.THREE_GPP, | |
FileType.PDF, | |
FileType.TXT, | |
} | |
def is_gemini_1_5_accepted_file_type(file_type: FileType) -> bool: | |
return file_type in GEMINI_1_5_ACCEPTED_FILE_TYPES | |