hydrus/hydrus/core/files/HydrusFileHandling.py

925 lines
31 KiB
Python

import collections
import hashlib
import os
import typing
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusTemp
from hydrus.core import HydrusText
from hydrus.core import HydrusTime
from hydrus.core.files import HydrusAnimationHandling
from hydrus.core.files import HydrusArchiveHandling
from hydrus.core.files import HydrusClipHandling
from hydrus.core.files import HydrusFlashHandling
from hydrus.core.files import HydrusKritaHandling
from hydrus.core.files import HydrusPDFHandling
from hydrus.core.files import HydrusProcreateHandling
from hydrus.core.files import HydrusPSDHandling
from hydrus.core.files import HydrusSVGHandling
from hydrus.core.files import HydrusUgoiraHandling
from hydrus.core.files import HydrusVideoHandling
from hydrus.core.files import HydrusOfficeOpenXMLHandling
from hydrus.core.files.images import HydrusImageHandling
from hydrus.core.networking import HydrusNetwork
try:
import speedcopy
speedcopy.patch_copyfile()
SPEEDCOPY_OK = True
except Exception as e:
if not isinstance( e, ImportError ):
HydrusData.Print( 'Failed to initialise speedcopy:' )
HydrusData.PrintException( e )
SPEEDCOPY_OK = False
mimes_to_default_thumbnail_paths = collections.defaultdict( lambda: os.path.join( HC.STATIC_DIR, 'hydrus.png' ) )
for mime in HC.AUDIO:
mimes_to_default_thumbnail_paths[ mime ] = os.path.join( os.path.join( HC.STATIC_DIR, 'audio.png' ) )
for mime in HC.VIDEO:
mimes_to_default_thumbnail_paths[ mime ] = os.path.join( os.path.join( HC.STATIC_DIR, 'video.png' ) )
for mime in HC.ANIMATIONS:
mimes_to_default_thumbnail_paths[ mime ] = os.path.join( os.path.join( HC.STATIC_DIR, 'video.png' ) )
for mime in HC.ARCHIVES:
mimes_to_default_thumbnail_paths[ mime ] = png_path = os.path.join( HC.STATIC_DIR, 'zip.png' )
for mime in HC.IMAGES:
mimes_to_default_thumbnail_paths[ mime ] = png_path = os.path.join( HC.STATIC_DIR, 'image.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_UNKNOWN ] = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PDF ] = os.path.join( HC.STATIC_DIR, 'pdf.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_DOCX ] = os.path.join( HC.STATIC_DIR, 'docx.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_XLSX ] = os.path.join( HC.STATIC_DIR, 'xlsx.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PPTX ] = os.path.join( HC.STATIC_DIR, 'pptx.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_EPUB ] = os.path.join( HC.STATIC_DIR, 'epub.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_DJVU ] = os.path.join( HC.STATIC_DIR, 'djvu.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PSD ] = os.path.join( HC.STATIC_DIR, 'psd.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_CLIP ] = os.path.join( HC.STATIC_DIR, 'clip.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_SAI2 ] = os.path.join( HC.STATIC_DIR, 'sai.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_KRITA ] = os.path.join( HC.STATIC_DIR, 'krita.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_FLASH ] = os.path.join( HC.STATIC_DIR, 'flash.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_XCF ] = os.path.join( HC.STATIC_DIR, 'xcf.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PROCREATE ] = os.path.join( HC.STATIC_DIR, 'procreate.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_RTF ] = os.path.join( HC.STATIC_DIR, 'rtf.png' )
mimes_to_default_thumbnail_paths[ HC.IMAGE_SVG ] = os.path.join( HC.STATIC_DIR, 'svg.png' )
def GenerateDefaultThumbnail( mime: int, target_resolution: typing.Tuple[ int, int ] ):
thumb_path = mimes_to_default_thumbnail_paths[mime]
return HydrusImageHandling.GenerateDefaultThumbnailNumPyFromPath( thumb_path, target_resolution )
def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames, percentage_in = 35 ):
thumbnail_numpy = GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, percentage_in = percentage_in )
return HydrusImageHandling.GenerateThumbnailBytesFromNumPy( thumbnail_numpy )
def PrintMoreThumbErrorInfo( e: Exception, message, extra_description: typing.Optional[ str ] = None ):
if not isinstance( e, HydrusExceptions.NoThumbnailFileException ):
HydrusData.Print( message )
if extra_description is not None:
HydrusData.Print( f'Extra info: {extra_description}' )
HydrusData.PrintException( e )
def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, percentage_in = 35, extra_description = None ):
if mime == HC.APPLICATION_CBZ:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusArchiveHandling.ExtractCoverPage( path, temp_path )
cover_mime = GetMime( temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, cover_mime )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime == HC.APPLICATION_CLIP:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusClipHandling.ExtractDBPNGToPath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime == HC.APPLICATION_KRITA:
try:
thumbnail_numpy = HydrusKritaHandling.GenerateThumbnailNumPyFromKraPath( path, target_resolution )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
elif mime == HC.APPLICATION_PROCREATE:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusProcreateHandling.ExtractZippedThumbnailToPath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime == HC.APPLICATION_PSD:
try:
thumbnail_numpy = HydrusPSDHandling.GenerateThumbnailNumPyFromPSDPath( path, target_resolution )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
HydrusData.Print( 'Attempting ffmpeg PSD thumbnail fallback' )
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath( suffix = '.png' )
try:
HydrusVideoHandling.RenderImageToImagePath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Secondary problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime == HC.IMAGE_SVG:
try:
thumbnail_numpy = HydrusSVGHandling.GenerateThumbnailNumPyFromSVGPath( path, target_resolution )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
elif mime == HC.APPLICATION_PDF:
try:
thumbnail_numpy = HydrusPDFHandling.GenerateThumbnailNumPyFromPDFPath( path, target_resolution )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
elif mime == HC.APPLICATION_PPTX:
try:
thumbnail_numpy = HydrusOfficeOpenXMLHandling.GenerateThumbnailNumPyFromOfficePath( path, target_resolution )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
elif mime == HC.APPLICATION_FLASH:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusFlashHandling.RenderPageToFile( path, temp_path, 1 )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime in HC.IMAGES:
# TODO: it would be nice to have gif and apng generating their thumb x frames in, like with videos. maybe we should add animation thumb fetcher to hydrusanimationhandling
try:
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( path, target_resolution, mime )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
elif mime == HC.ANIMATION_UGOIRA:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
desired_thumb_frame_index = int( ( percentage_in / 100.0 ) * ( num_frames - 1 ) )
HydrusUgoiraHandling.ExtractFrame( path, desired_thumb_frame_index, temp_path )
cover_mime = GetMime( temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, cover_mime )
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
else: # animations and video
renderer = None
desired_thumb_frame_index = int( ( percentage_in / 100.0 ) * ( num_frames - 1 ) )
try:
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution, start_pos = desired_thumb_frame_index )
numpy_image = renderer.read_frame()
except Exception as e:
message = 'Problem generating thumbnail for "{}" at frame {} ({})--FFMPEG could not render it.'.format( path, desired_thumb_frame_index, HydrusData.ConvertFloatToPercentage( percentage_in / 100.0 ) )
PrintMoreThumbErrorInfo( e, message, extra_description = extra_description )
numpy_image = None
if numpy_image is None and desired_thumb_frame_index != 0:
if renderer is not None:
renderer.Stop()
# try first frame instead
try:
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution )
numpy_image = renderer.read_frame()
except Exception as e:
message = 'Problem generating thumbnail for "{}" at first frame--FFMPEG could not render it.'.format( path )
PrintMoreThumbErrorInfo( e, message, extra_description = extra_description )
numpy_image = None
if numpy_image is None:
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
else:
thumbnail_numpy = HydrusImageHandling.ResizeNumPyImage( numpy_image, target_resolution ) # just in case ffmpeg doesn't deliver right
if renderer is not None:
renderer.Stop()
return thumbnail_numpy
def GetExtraHashesFromPath( path ):
h_md5 = hashlib.md5()
h_sha1 = hashlib.sha1()
h_sha512 = hashlib.sha512()
with open( path, 'rb' ) as f:
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
h_md5.update( block )
h_sha1.update( block )
h_sha512.update( block )
md5 = h_md5.digest()
sha1 = h_sha1.digest()
sha512 = h_sha512.digest()
return ( md5, sha1, sha512 )
def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ):
size = os.path.getsize( path )
if size == 0:
raise HydrusExceptions.ZeroSizeFileException( 'File is of zero length!' )
if mime is None:
mime = GetMime( path, ok_to_look_for_hydrus_updates = ok_to_look_for_hydrus_updates )
if mime not in HC.ALLOWED_MIMES:
if mime == HC.TEXT_HTML:
raise HydrusExceptions.UnsupportedFileException( 'Looks like HTML -- maybe the client needs to be taught how to parse this?' )
elif mime == HC.APPLICATION_UNKNOWN:
raise HydrusExceptions.UnsupportedFileException( 'Unknown filetype!' )
else:
raise HydrusExceptions.UnsupportedFileException( 'Filetype is not permitted!' )
if mime in HC.PIL_HEIF_MIMES and not HydrusImageHandling.HEIF_OK:
raise HydrusExceptions.UnsupportedFileException( 'Sorry, you need the pillow-heif library to support this filetype ({})! Please rebuild your venv.'.format( HC.mime_string_lookup[ mime ] ) )
width = None
height = None
duration = None
num_frames = None
num_words = None
if mime in HC.MIMES_THAT_DEFINITELY_HAVE_AUDIO:
has_audio = True
else:
has_audio = False
# keep this in the specific-first, general-last test order
if mime == HC.APPLICATION_CBZ:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusArchiveHandling.ExtractCoverPage( path, temp_path )
cover_mime = GetMime( temp_path )
( width, height ) = HydrusImageHandling.GetImageResolution( temp_path, cover_mime )
except:
( width, height ) = ( None, None )
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime == HC.APPLICATION_CLIP:
( ( width, height ), duration, num_frames ) = HydrusClipHandling.GetClipProperties( path )
elif mime == HC.APPLICATION_KRITA:
try:
( width, height ) = HydrusKritaHandling.GetKraProperties( path )
except HydrusExceptions.NoResolutionFileException:
pass
elif mime == HC.APPLICATION_PROCREATE:
try:
( width, height ) = HydrusProcreateHandling.GetProcreateResolution( path )
except:
pass
elif mime == HC.IMAGE_SVG:
try:
( width, height ) = HydrusSVGHandling.GetSVGResolution( path )
except HydrusExceptions.NoResolutionFileException:
pass
elif mime == HC.APPLICATION_PDF:
try:
( num_words, ( width, height ) ) = HydrusPDFHandling.GetPDFInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
elif mime == HC.APPLICATION_PPTX:
try:
( num_words, ( width, height ) ) = HydrusOfficeOpenXMLHandling.GetPPTXInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
elif mime == HC.APPLICATION_DOCX:
try:
( num_words ) = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
elif mime == HC.APPLICATION_FLASH:
( ( width, height ), duration, num_frames ) = HydrusFlashHandling.GetFlashProperties( path )
elif mime == HC.APPLICATION_PSD:
try:
( width, height ) = HydrusPSDHandling.GetPSDResolution( path )
except Exception as e:
HydrusData.Print( 'Problem calculating resolution for "{}":'.format( path ) )
HydrusData.PrintException( e )
HydrusData.Print( 'Attempting PSD resolution fallback' )
( width, height ) = HydrusPSDHandling.GetPSDResolutionFallback( path )
elif mime in HC.VIDEO or mime in HC.HEIF_TYPE_SEQUENCES:
( ( width, height ), duration, num_frames, has_audio ) = HydrusVideoHandling.GetFFMPEGVideoProperties( path )
elif mime in HC.VIEWABLE_ANIMATIONS:
( ( width, height ), duration, num_frames ) = HydrusAnimationHandling.GetAnimationProperties( path, mime )
elif mime == HC.ANIMATION_UGOIRA:
( ( width, height ), num_frames ) = HydrusUgoiraHandling.GetUgoiraProperties( path )
elif mime in HC.IMAGES:
( width, height ) = HydrusImageHandling.GetImageResolution( path, mime )
elif mime in HC.AUDIO:
ffmpeg_lines = HydrusVideoHandling.GetFFMPEGInfoLines( path )
( file_duration_in_s, stream_duration_in_s ) = HydrusVideoHandling.ParseFFMPEGDuration( ffmpeg_lines )
duration = int( file_duration_in_s * 1000 )
if width is not None and width < 0:
width *= -1
if height is not None and height < 0:
width *= -1
if duration is not None and duration < 0:
duration *= -1
if num_frames is not None and num_frames < 0:
num_frames *= -1
if num_words is not None and num_words < 0:
num_words *= -1
return ( size, mime, width, height, duration, num_frames, has_audio, num_words )
def GetFileModifiedTimestampMS( path ) -> int:
return HydrusTime.MillisecondiseS( os.path.getmtime( path ) )
def GetHashFromPath( path ):
h = hashlib.sha256()
with open( path, 'rb' ) as f:
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
h.update( block )
return h.digest()
# TODO: replace this with a FileTypeChecker class or something that tucks all this messy data away more neatly
# do this the next time you visit this place
headers_and_mime = [
( ( ( [0], [b'\xff\xd8'] ), ), HC.IMAGE_JPEG ),
( ( ( [0], [b'\x89PNG'] ), ), HC.UNDETERMINED_PNG ),
( ( ( [0], [b'GIF87a', b'GIF89a'] ), ), HC.UNDETERMINED_GIF ),
( ( ( [8], [b'WEBP'] ), ), HC.IMAGE_WEBP ),
( ( ( [0], [b'II*\x00', b'MM\x00*'] ), ), HC.IMAGE_TIFF ),
( ( ( [0], [b'BM'] ), ), HC.IMAGE_BMP ),
( ( ( [0], [b'\x00\x00\x01\x00', b'\x00\x00\x02\x00'] ), ), HC.IMAGE_ICON ),
( ( ( [0], [b'qoif'] ), ), HC.IMAGE_QOI ),
( ( ( [0], [b'CWS', b'FWS', b'ZWS'] ), ), HC.APPLICATION_FLASH ),
( ( ( [0], [b'FLV'] ), ), HC.VIDEO_FLV ),
( ( ( [0], [b'%PDF'] ), ), HC.APPLICATION_PDF ),
( ( ( [0], [b'8BPS\x00\x01', b'8BPS\x00\x02'] ), ), HC.APPLICATION_PSD ),
( ( ( [0], [b'CSFCHUNK'] ), ), HC.APPLICATION_CLIP ),
( ( ( [0], [b'SAI-CANVAS'] ), ), HC.APPLICATION_SAI2 ),
( ( ( [0], [b'gimp xcf '] ), ), HC.APPLICATION_XCF ),
( ( ( [38, 42, 58, 63],[ b'application/x-krita'] ), ), HC.APPLICATION_KRITA ), # important this comes before zip files because this is also a zip file
( ( ( [38, 43],[ b'application/epub+zip'] ), ), HC.APPLICATION_EPUB ),
( ( ( [4], [b'FORM'] ), ( [12], [b'DJVU', b'DJVM', b'PM44', b'BM44', b'SDJV'] ), ), HC.APPLICATION_DJVU ),
( ( ( [0], [b'{\\rtf'] ), ), HC.APPLICATION_RTF ),
( ( ( [0], [b'PK\x03\x04', b'PK\x05\x06', b'PK\x07\x08'] ), ), HC.APPLICATION_ZIP ),
( ( ( [0], [b'7z\xBC\xAF\x27\x1C'] ), ), HC.APPLICATION_7Z ),
( ( ( [0], [b'\x52\x61\x72\x21\x1A\x07\x00', b'\x52\x61\x72\x21\x1A\x07\x01\x00'] ), ), HC.APPLICATION_RAR ),
( ( ( [0], [b'\x1f\x8b'] ), ), HC.APPLICATION_GZIP ),
( ( ( [0], [b'hydrus encrypted zip'] ), ), HC.APPLICATION_HYDRUS_ENCRYPTED_ZIP ),
( ( ( [4], [b'ftypavif'] ), ), HC.IMAGE_AVIF ),
( ( ( [4], [b'ftypavis'] ), ), HC.IMAGE_AVIF_SEQUENCE ),
( ( ( [4], [b'ftypmif1'] ), ( [16, 20, 24], [b'avif'] ), ), HC.IMAGE_AVIF ),
( ( ( [4], [b'ftypheic', b'ftypheix', b'ftypheim', b'ftypheis'] ), ), HC.IMAGE_HEIC ),
( ( ( [4], [b'ftyphevc', b'ftyphevx', b'ftyphevm', b'ftyphevs'] ), ), HC.IMAGE_HEIC_SEQUENCE ),
( ( ( [4], [b'ftypmif1'] ), ), HC.IMAGE_HEIF ),
( ( ( [4], [b'ftypmsf1'] ), ), HC.IMAGE_HEIF_SEQUENCE ),
( ( ( [4], [b'ftypmp4', b'ftypisom', b'ftypM4V', b'ftypMSNV', b'ftypavc1', b'ftypavc1', b'ftypFACE', b'ftypdash'] ), ), HC.UNDETERMINED_MP4 ),
( ( ( [4], [b'ftypqt'] ), ), HC.VIDEO_MOV ),
( ( ( [0], [b'fLaC'] ), ), HC.AUDIO_FLAC ),
( ( ( [0], [b'RIFF'] ), ( [8], [ b'WAVE' ] ) ), HC.AUDIO_WAVE ),
( ( ( [0], [b'wvpk'] ), ), HC.AUDIO_WAVPACK ),
( ( ( [8], [b'AVI '] ), ), HC.VIDEO_AVI ),
( ( ( [0], [b'\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C'] ), ), HC.UNDETERMINED_WM ),
( ( ( [0], [b'\x4D\x5A\x90\x00\x03'], ), ), HC.APPLICATION_WINDOWS_EXE )
]
def passes_offsets_and_headers_pair( offsets, headers, first_bytes_of_file ) -> bool:
# TODO: rewrite this garbage
for offset in offsets:
for header in headers:
if first_bytes_of_file[ offset : offset + len( header ) ] == header:
return True
return False
def passes_offsets_and_headers( offsets_and_headers, first_bytes_of_file ) -> bool:
# ok we need to match every pair here
for ( offsets, headers ) in offsets_and_headers:
if not passes_offsets_and_headers_pair( offsets, headers, first_bytes_of_file ):
return False
return True
def GetMime( path, ok_to_look_for_hydrus_updates = False ):
size = os.path.getsize( path )
if size == 0:
raise HydrusExceptions.ZeroSizeFileException( 'File is of zero length!' )
if ok_to_look_for_hydrus_updates and size < 64 * 1024 * 1024:
with open( path, 'rb' ) as f:
update_network_bytes = f.read()
try:
update = HydrusSerialisable.CreateFromNetworkBytes( update_network_bytes )
if isinstance( update, HydrusNetwork.ContentUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_CONTENT
elif isinstance( update, HydrusNetwork.DefinitionsUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_DEFINITIONS
except:
pass
with open( path, 'rb' ) as f:
first_bytes_of_file = f.read( 256 )
for ( offsets_and_headers, mime ) in headers_and_mime:
if passes_offsets_and_headers( offsets_and_headers, first_bytes_of_file ):
if mime == HC.APPLICATION_ZIP:
try:
if HydrusArchiveHandling.IsEncryptedZip( path ):
return HC.APPLICATION_ZIP
except HydrusExceptions.DamagedOrUnusualFileException:
return HC.APPLICATION_ZIP
opendoc_mime = HydrusArchiveHandling.MimeFromOpenDocument( path )
if opendoc_mime is not None:
return opendoc_mime
microsoft_mime = HydrusOfficeOpenXMLHandling.MimeFromMicrosoftOpenXMLDocument( path )
if microsoft_mime is not None:
return microsoft_mime
if HydrusProcreateHandling.ZipLooksLikeProcreate( path ):
return HC.APPLICATION_PROCREATE
if HydrusUgoiraHandling.ZipLooksLikeUgoira( path ):
return HC.ANIMATION_UGOIRA
if HydrusArchiveHandling.ZipLooksLikeCBZ( path ):
return HC.APPLICATION_CBZ
return HC.APPLICATION_ZIP
if mime in ( HC.UNDETERMINED_WM, HC.UNDETERMINED_MP4 ):
return HydrusVideoHandling.GetMime( path )
elif mime == HC.UNDETERMINED_PNG:
if HydrusAnimationHandling.IsPNGAnimated( first_bytes_of_file ):
return HC.ANIMATION_APNG
else:
return HC.IMAGE_PNG
elif mime == HC.UNDETERMINED_GIF:
if HydrusAnimationHandling.PILAnimationHasDuration( path ):
return HC.ANIMATION_GIF
else:
return HC.IMAGE_GIF
else:
return mime
# If the file starts with '{' it is probably JSON
# but we can't know for sure so we send it over to be checked
if first_bytes_of_file.startswith( b'{' ) or first_bytes_of_file.startswith( b'[' ):
with open( path, 'rb' ) as f:
potential_json_document_bytes = f.read()
if HydrusText.LooksLikeJSON( potential_json_document_bytes ):
return HC.APPLICATION_JSON
if HydrusText.LooksLikeHTML( first_bytes_of_file ):
return HC.TEXT_HTML
if HydrusText.LooksLikeSVG( first_bytes_of_file ):
return HC.IMAGE_SVG
# it is important this goes at the end, because ffmpeg has a billion false positives! and it takes CPU to true negative
# for instance, it once thought some hydrus update files were mpegs
# it also thinks txt files can be mpegs
likely_to_false_positive = True in ( path.endswith( ext ) for ext in ( '.txt', '.log', '.json' ) )
if not likely_to_false_positive:
try:
mime = HydrusVideoHandling.GetMime( path )
if mime != HC.APPLICATION_UNKNOWN:
return mime
except HydrusExceptions.UnsupportedFileException:
pass
except Exception as e:
HydrusData.Print( 'FFMPEG had trouble with: ' + path )
HydrusData.PrintException( e, do_wait = False )
return HC.APPLICATION_UNKNOWN
headers_and_mime_thumbnails = [ ( offsets_and_headers, mime ) for ( offsets_and_headers, mime ) in headers_and_mime if mime in ( HC.IMAGE_JPEG, HC.IMAGE_PNG ) ]
def GetThumbnailMime( path ):
with open( path, 'rb' ) as f:
bit_to_check = f.read( 256 )
for ( offsets_and_headers, mime ) in headers_and_mime_thumbnails:
if passes_offsets_and_headers( offsets_and_headers, bit_to_check ):
return mime
return GetMime( path )