hydrus/hydrus/core/HydrusFileHandling.py

723 lines
24 KiB
Python
Raw Normal View History

2013-08-07 22:25:18 +00:00
import hashlib
2020-07-29 20:52:44 +00:00
import os
2023-08-09 21:12:17 +00:00
from hydrus.core import HydrusAnimationHandling
2023-08-02 08:16:59 +00:00
from hydrus.core import HydrusPSDHandling
2021-11-03 20:49:56 +00:00
from hydrus.core import HydrusClipHandling
from hydrus.core import HydrusArchiveHandling
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusDocumentHandling
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusFlashHandling
2023-07-26 20:57:00 +00:00
from hydrus.core import HydrusKritaHandling
from hydrus.core import HydrusProcreateHandling
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
2023-07-26 20:57:00 +00:00
from hydrus.core import HydrusSVGHandling
from hydrus.core import HydrusPDFHandling
from hydrus.core import HydrusTemp
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusText
from hydrus.core import HydrusVideoHandling
2023-10-04 20:51:17 +00:00
from hydrus.core.images import HydrusImageHandling
2021-04-07 21:26:45 +00:00
from hydrus.core.networking import HydrusNetwork
2013-07-17 20:56:13 +00:00
2022-12-07 22:41:53 +00:00
try:
import speedcopy
speedcopy.patch_copyfile()
SPEEDCOPY_OK = True
except Exception as e:
if not isinstance( e, ImportError ):
HydrusData.Print( 'Failed to initialise speedcopy:' )
HydrusData.PrintException( e )
SPEEDCOPY_OK = False
2015-03-25 22:04:19 +00:00
2022-02-02 22:14:01 +00:00
def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames, clip_rect = None, percentage_in = 35 ):
2014-05-21 21:37:35 +00:00
thumbnail_numpy = GenerateThumbnailNumPy(path, target_resolution, mime, duration, num_frames, clip_rect, percentage_in )
2023-09-27 21:12:55 +00:00
return HydrusImageHandling.GenerateThumbnailBytesFromNumPy( thumbnail_numpy )
def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, clip_rect = None, percentage_in = 35 ):
2023-09-27 21:12:55 +00:00
2021-11-03 20:49:56 +00:00
if target_resolution == ( 0, 0 ):
target_resolution = ( 128, 128 )
2023-08-16 20:46:51 +00:00
if mime == HC.APPLICATION_PSD:
2014-05-21 21:37:35 +00:00
2021-07-28 21:12:00 +00:00
try:
thumbnail_numpy = HydrusPSDHandling.GenerateThumbnailNumPyFromPSDPath( path, target_resolution, clip_rect = clip_rect )
2021-07-28 21:12:00 +00:00
except Exception as e:
2023-09-06 19:49:46 +00:00
HydrusData.Print( 'Problem generating thumbnail for "{}":'.format( path ) )
HydrusData.PrintException( e )
HydrusData.Print( 'Attempting ffmpeg PSD thumbnail fallback' )
2023-08-16 20:46:51 +00:00
try:
2023-08-16 20:46:51 +00:00
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath( suffix = '.png' )
2023-08-16 20:46:51 +00:00
HydrusVideoHandling.RenderImageToImagePath( path, temp_path )
2023-08-16 20:46:51 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-08-16 20:46:51 +00:00
except Exception as e:
2023-08-16 20:46:51 +00:00
thumb_path = os.path.join( HC.STATIC_DIR, 'psd.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-09-06 19:49:46 +00:00
finally:
2023-09-06 19:49:46 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-09-06 19:49:46 +00:00
2021-07-28 21:12:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_CLIP:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusClipHandling.ExtractDBPNGToPath( path, temp_path )
2014-05-21 21:37:35 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
except:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
thumb_path = os.path.join( HC.STATIC_DIR, 'clip.png' )
2015-11-25 22:00:57 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2018-06-06 21:27:02 +00:00
2021-11-03 20:49:56 +00:00
finally:
2018-05-30 20:13:21 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-07-26 20:57:00 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC.APPLICATION_KRITA:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusKritaHandling.ExtractZippedImageToPath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-07-06 08:14:19 +00:00
except Exception as e:
thumb_path = os.path.join( HC.STATIC_DIR, 'krita.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-07-06 08:14:19 +00:00
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-07-26 20:57:00 +00:00
2023-09-27 21:12:55 +00:00
elif mime == HC.APPLICATION_PROCREATE:
2023-09-06 19:49:46 +00:00
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusProcreateHandling.ExtractZippedThumbnailToPath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
except Exception as e:
thumb_path = os.path.join( HC.STATIC_DIR, 'procreate.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-09-06 19:49:46 +00:00
2023-07-08 18:35:49 +00:00
elif mime == HC.IMAGE_SVG:
2023-07-26 20:57:00 +00:00
2023-07-08 18:35:49 +00:00
try:
thumbnail_numpy = HydrusSVGHandling.GenerateThumbnailNumPyFromSVGPath( path, target_resolution, clip_rect = clip_rect )
2023-07-08 18:35:49 +00:00
except Exception as e:
2023-09-06 19:49:46 +00:00
if not isinstance( e, HydrusExceptions.NoThumbnailFileException ):
2023-07-26 20:57:00 +00:00
HydrusData.Print( 'Problem generating thumbnail for "{}":'.format( path ) )
HydrusData.PrintException( e )
2023-07-08 18:35:49 +00:00
thumb_path = os.path.join( HC.STATIC_DIR, 'svg.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-09-06 19:49:46 +00:00
elif mime == HC.APPLICATION_PDF:
try:
thumbnail_numpy = HydrusPDFHandling.GenerateThumbnailNumPyFromPDFPath( path, target_resolution, clip_rect = clip_rect )
except Exception as e:
2023-09-06 19:49:46 +00:00
if not isinstance( e, HydrusExceptions.NoThumbnailFileException ):
HydrusData.Print( 'Problem generating thumbnail for "{}":'.format( path ) )
HydrusData.PrintException( e )
thumb_path = os.path.join( HC.STATIC_DIR, 'pdf.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2018-05-30 20:13:21 +00:00
2023-07-26 20:57:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_FLASH:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
HydrusFlashHandling.RenderPageToFile( path, temp_path, 1 )
2015-11-25 22:00:57 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2021-11-03 20:49:56 +00:00
except:
thumb_path = os.path.join( HC.STATIC_DIR, 'flash.png' )
2015-11-25 22:00:57 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2021-11-03 20:49:56 +00:00
finally:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-08-16 20:46:51 +00:00
elif mime in HC.IMAGES or mime == HC.ANIMATION_GIF: # not apng atm
# TODO: it would be nice to have gif and apng generating their thumb x frames in, like with videos. maybe we should add animation thumb fetcher to hydrusanimationhandling
try:
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( path, target_resolution, mime, clip_rect = clip_rect )
2023-08-16 20:46:51 +00:00
except Exception as e:
HydrusData.Print( 'Problem generating thumbnail for "{}":'.format( path ) )
HydrusData.PrintException( e )
thumb_path = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-08-16 20:46:51 +00:00
2021-11-03 20:49:56 +00:00
else:
2023-03-08 21:52:17 +00:00
renderer = None
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
desired_thumb_frame = int( ( percentage_in / 100.0 ) * num_frames )
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
try:
2019-03-06 23:06:22 +00:00
2023-03-08 21:52:17 +00:00
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution, clip_rect = clip_rect, start_pos = desired_thumb_frame )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = renderer.read_frame()
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
except Exception as e:
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
HydrusData.Print( 'Problem generating thumbnail for "{}" at frame {} ({})--FFMPEG could not render it.'.format( path, desired_thumb_frame, HydrusData.ConvertFloatToPercentage( percentage_in / 100.0 ) ) )
HydrusData.PrintException( e )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = None
2019-03-06 23:06:22 +00:00
2014-05-21 21:37:35 +00:00
2023-03-08 21:52:17 +00:00
if numpy_image is None and desired_thumb_frame != 0:
if renderer is not None:
renderer.Stop()
# try first frame instead
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
try:
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution, clip_rect = clip_rect )
numpy_image = renderer.read_frame()
except Exception as e:
HydrusData.Print( 'Problem generating thumbnail for "{}" at first frame--FFMPEG could not render it.'.format( path ) )
HydrusData.PrintException( e )
numpy_image = None
if numpy_image is None:
2023-02-15 21:26:44 +00:00
thumb_path = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
2023-02-15 21:26:44 +00:00
else:
thumbnail_numpy = HydrusImageHandling.ResizeNumPyImage( numpy_image, target_resolution ) # just in case ffmpeg doesn't deliver right
2023-02-15 21:26:44 +00:00
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
if renderer is not None:
renderer.Stop()
2021-11-03 20:49:56 +00:00
2015-11-25 22:00:57 +00:00
return thumbnail_numpy
2019-03-27 22:01:02 +00:00
2015-03-25 22:04:19 +00:00
def GetExtraHashesFromPath( path ):
h_md5 = hashlib.md5()
h_sha1 = hashlib.sha1()
h_sha512 = hashlib.sha512()
with open( path, 'rb' ) as f:
2015-11-04 22:30:28 +00:00
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
2015-03-25 22:04:19 +00:00
h_md5.update( block )
h_sha1.update( block )
h_sha512.update( block )
md5 = h_md5.digest()
sha1 = h_sha1.digest()
sha512 = h_sha512.digest()
return ( md5, sha1, sha512 )
2023-08-23 20:43:26 +00:00
2019-07-10 22:38:30 +00:00
def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ):
2013-07-17 20:56:13 +00:00
2016-04-14 01:54:29 +00:00
size = os.path.getsize( path )
2013-07-17 20:56:13 +00:00
2017-07-19 21:21:41 +00:00
if size == 0:
raise HydrusExceptions.ZeroSizeFileException( 'File is of zero length!' )
2017-07-19 21:21:41 +00:00
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
if mime is None:
2019-07-10 22:38:30 +00:00
mime = GetMime( path, ok_to_look_for_hydrus_updates = ok_to_look_for_hydrus_updates )
2017-10-04 17:51:58 +00:00
2013-07-17 20:56:13 +00:00
2017-03-08 23:23:12 +00:00
if mime not in HC.ALLOWED_MIMES:
2018-09-05 20:52:32 +00:00
if mime == HC.TEXT_HTML:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Looks like HTML -- maybe the client needs to be taught how to parse this?' )
2018-09-05 20:52:32 +00:00
elif mime == HC.APPLICATION_UNKNOWN:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Unknown filetype!' )
2018-09-05 20:52:32 +00:00
else:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Filetype is not permitted!' )
2018-09-05 20:52:32 +00:00
2017-03-08 23:23:12 +00:00
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
if mime in HC.PIL_HEIF_MIMES and not HydrusImageHandling.HEIF_OK:
raise HydrusExceptions.UnsupportedFileException( 'Sorry, you need the pillow-heif library to support this filetype ({})! Please rebuild your venv.'.format( HC.mime_string_lookup[ mime ] ) )
2013-07-17 20:56:13 +00:00
width = None
height = None
duration = None
num_frames = None
num_words = None
2021-08-18 21:10:01 +00:00
if mime in HC.MIMES_THAT_DEFINITELY_HAVE_AUDIO:
has_audio = True
else:
has_audio = False
2023-08-16 20:46:51 +00:00
# keep this in the specific-first, general-last test order
if mime == HC.APPLICATION_CLIP:
2021-11-03 20:49:56 +00:00
2021-11-10 21:53:57 +00:00
( ( width, height ), duration, num_frames ) = HydrusClipHandling.GetClipProperties( path )
2021-11-03 20:49:56 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC.APPLICATION_KRITA:
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusKritaHandling.GetKraProperties( path )
except HydrusExceptions.NoResolutionFileException:
pass
elif mime == HC.APPLICATION_PROCREATE:
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusProcreateHandling.GetProcreateResolution( path )
except:
pass
2023-07-19 21:38:23 +00:00
2023-07-08 18:35:49 +00:00
elif mime == HC.IMAGE_SVG:
2023-07-19 21:38:23 +00:00
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusSVGHandling.GetSVGResolution( path )
except HydrusExceptions.NoResolutionFileException:
pass
elif mime == HC.APPLICATION_PDF:
2023-09-06 19:49:46 +00:00
try:
( num_words, ( width, height ) ) = HydrusPDFHandling.GetPDFInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
2023-07-19 21:38:23 +00:00
2013-07-17 20:56:13 +00:00
elif mime == HC.APPLICATION_FLASH:
2013-08-07 22:25:18 +00:00
( ( width, height ), duration, num_frames ) = HydrusFlashHandling.GetFlashProperties( path )
2013-07-17 20:56:13 +00:00
2017-06-28 20:23:21 +00:00
elif mime == HC.APPLICATION_PDF:
2018-11-07 23:09:40 +00:00
num_words = HydrusDocumentHandling.GetPDFNumWords( path ) # this now give None until a better solution can be found
2017-06-28 20:23:21 +00:00
2019-03-20 21:22:10 +00:00
elif mime == HC.APPLICATION_PSD:
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusPSDHandling.GetPSDResolution( path )
except Exception as e:
HydrusData.Print( 'Problem calculating resolution for "{}":'.format( path ) )
HydrusData.PrintException( e )
HydrusData.Print( 'Attempting PSD resolution fallback' )
( width, height ) = HydrusPSDHandling.GetPSDResolutionFallback( path )
2019-03-20 21:22:10 +00:00
2023-08-09 21:12:17 +00:00
elif mime in HC.VIDEO or mime in HC.HEIF_TYPE_SEQUENCES:
2021-11-24 21:59:58 +00:00
( ( width, height ), duration, num_frames, has_audio ) = HydrusVideoHandling.GetFFMPEGVideoProperties( path )
2023-08-16 20:46:51 +00:00
elif mime in HC.ANIMATIONS:
( ( width, height ), duration, num_frames ) = HydrusAnimationHandling.GetAnimationProperties( path, mime )
elif mime in HC.IMAGES:
2023-09-06 19:49:46 +00:00
( width, height ) = HydrusImageHandling.GetImageResolution( path, mime )
2023-08-16 20:46:51 +00:00
2017-12-06 22:06:56 +00:00
elif mime in HC.AUDIO:
2017-06-28 20:23:21 +00:00
2017-12-06 22:06:56 +00:00
ffmpeg_lines = HydrusVideoHandling.GetFFMPEGInfoLines( path )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
( file_duration_in_s, stream_duration_in_s ) = HydrusVideoHandling.ParseFFMPEGDuration( ffmpeg_lines )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
duration = int( file_duration_in_s * 1000 )
2017-06-28 20:23:21 +00:00
2013-07-17 20:56:13 +00:00
2018-03-22 00:03:33 +00:00
if width is not None and width < 0:
width *= -1
if height is not None and height < 0:
width *= -1
if duration is not None and duration < 0:
duration *= -1
if num_frames is not None and num_frames < 0:
num_frames *= -1
if num_words is not None and num_words < 0:
num_words *= -1
2019-08-07 22:59:53 +00:00
return ( size, mime, width, height, duration, num_frames, has_audio, num_words )
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
2022-01-26 21:57:04 +00:00
def GetFileModifiedTimestamp( path ) -> int:
2019-09-25 21:34:18 +00:00
2022-01-26 21:57:04 +00:00
return int( os.path.getmtime( path ) )
2019-09-25 21:34:18 +00:00
2023-08-09 21:12:17 +00:00
2013-08-07 22:25:18 +00:00
def GetHashFromPath( path ):
h = hashlib.sha256()
2013-08-14 20:21:49 +00:00
with open( path, 'rb' ) as f:
2013-08-07 22:25:18 +00:00
2017-07-19 21:21:41 +00:00
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
h.update( block )
2013-08-07 22:25:18 +00:00
2014-11-12 23:33:13 +00:00
return h.digest()
2023-08-09 21:12:17 +00:00
headers_and_mime = [
( ( ( [0], [b'\xff\xd8'] ), ), HC.IMAGE_JPEG ),
( ( ( [0], [b'\x89PNG'] ), ), HC.UNDETERMINED_PNG ),
( ( ( [0], [b'GIF87a', b'GIF89a'] ), ), HC.UNDETERMINED_GIF ),
( ( ( [8], [b'WEBP'] ), ), HC.IMAGE_WEBP ),
( ( ( [0], [b'II*\x00', b'MM\x00*'] ), ), HC.IMAGE_TIFF ),
( ( ( [0], [b'BM'] ), ), HC.IMAGE_BMP ),
( ( ( [0], [b'\x00\x00\x01\x00', b'\x00\x00\x02\x00'] ), ), HC.IMAGE_ICON ),
( ( ( [0], [b'qoif'] ), ), HC.IMAGE_QOI ),
( ( ( [0], [b'CWS', b'FWS', b'ZWS'] ), ), HC.APPLICATION_FLASH ),
( ( ( [0], [b'FLV'] ), ), HC.VIDEO_FLV ),
( ( ( [0], [b'%PDF'] ), ), HC.APPLICATION_PDF ),
( ( ( [0], [b'8BPS\x00\x01', b'8BPS\x00\x02'] ), ), HC.APPLICATION_PSD ),
( ( ( [0], [b'CSFCHUNK'] ), ), HC.APPLICATION_CLIP ),
( ( ( [0], [b'SAI-CANVAS'] ), ), HC.APPLICATION_SAI2 ),
( ( ( [0], [b'gimp xcf '] ), ), HC.APPLICATION_XCF ),
( ( ( [38, 42, 58, 63],[ b'application/x-krita'] ), ), HC.APPLICATION_KRITA ), # important this comes before zip files because this is also a zip file
( ( ( [38, 43],[ b'application/epub+zip'] ), ), HC.APPLICATION_EPUB ),
( ( ( [4], [b'FORM'] ), ( [12], [b'DJVU', b'DJVM', b'PM44', b'BM44', b'SDJV'] ), ), HC.APPLICATION_DJVU ),
( ( ( [0], [b'PK\x03\x04', b'PK\x05\x06', b'PK\x07\x08'] ), ), HC.APPLICATION_ZIP ),
( ( ( [0], [b'7z\xBC\xAF\x27\x1C'] ), ), HC.APPLICATION_7Z ),
( ( ( [0], [b'\x52\x61\x72\x21\x1A\x07\x00', b'\x52\x61\x72\x21\x1A\x07\x01\x00'] ), ), HC.APPLICATION_RAR ),
( ( ( [0], [b'\x1f\x8b'] ), ), HC.APPLICATION_GZIP ),
( ( ( [0], [b'hydrus encrypted zip'] ), ), HC.APPLICATION_HYDRUS_ENCRYPTED_ZIP ),
( ( ( [4], [b'ftypavif'] ), ), HC.IMAGE_AVIF ),
( ( ( [4], [b'ftypavis'] ), ), HC.IMAGE_AVIF_SEQUENCE ),
( ( ( [4], [b'ftypmif1'] ), ( [16, 20, 24], [b'avif'] ), ), HC.IMAGE_AVIF ),
( ( ( [4], [b'ftypheic', b'ftypheix', b'ftypheim', b'ftypheis'] ), ), HC.IMAGE_HEIC ),
( ( ( [4], [b'ftyphevc', b'ftyphevx', b'ftyphevm', b'ftyphevs'] ), ), HC.IMAGE_HEIC_SEQUENCE ),
( ( ( [4], [b'ftypmif1'] ), ), HC.IMAGE_HEIF ),
( ( ( [4], [b'ftypmsf1'] ), ), HC.IMAGE_HEIF_SEQUENCE ),
( ( ( [4], [b'ftypmp4', b'ftypisom', b'ftypM4V', b'ftypMSNV', b'ftypavc1', b'ftypavc1', b'ftypFACE', b'ftypdash'] ), ), HC.UNDETERMINED_MP4 ),
( ( ( [4], [b'ftypqt'] ), ), HC.VIDEO_MOV ),
( ( ( [0], [b'fLaC'] ), ), HC.AUDIO_FLAC ),
( ( ( [0], [b'RIFF'] ), ( 8, b'WAVE' ) ), HC.AUDIO_WAVE ),
( ( ( [0], [b'wvpk'] ), ), HC.AUDIO_WAVPACK ),
( ( ( [8], [b'AVI '] ), ), HC.VIDEO_AVI ),
( ( ( [0], [b'\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C'] ), ), HC.UNDETERMINED_WM ),
( ( ( [0], [b'\x4D\x5A\x90\x00\x03'], ), ), HC.APPLICATION_WINDOWS_EXE )
]
2019-07-10 22:38:30 +00:00
def GetMime( path, ok_to_look_for_hydrus_updates = False ):
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
size = os.path.getsize( path )
if size == 0:
raise HydrusExceptions.ZeroSizeFileException( 'File is of zero length!' )
2017-10-04 17:51:58 +00:00
2022-06-22 20:43:12 +00:00
if ok_to_look_for_hydrus_updates and size < 64 * 1024 * 1024:
with open( path, 'rb' ) as f:
update_network_bytes = f.read()
try:
update = HydrusSerialisable.CreateFromNetworkBytes( update_network_bytes )
if isinstance( update, HydrusNetwork.ContentUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_CONTENT
elif isinstance( update, HydrusNetwork.DefinitionsUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_DEFINITIONS
except:
pass
2013-08-14 20:21:49 +00:00
with open( path, 'rb' ) as f:
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
first_bytes_of_file = f.read( 256 )
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
for ( offsets_and_headers, mime ) in headers_and_mime:
2013-08-07 22:25:18 +00:00
it_passes = False not in ( True in ( True in (first_bytes_of_file[ offset: ].startswith( header ) for offset in offsets) for header in headers) for ( offsets, headers ) in offsets_and_headers )
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
if it_passes:
2013-08-14 20:21:49 +00:00
2023-07-19 20:38:06 +00:00
if mime == HC.APPLICATION_ZIP:
2023-09-27 21:12:55 +00:00
opendoc_mime = HydrusArchiveHandling.MimeFromOpenDocument( path )
if opendoc_mime is not None:
2023-09-27 21:12:55 +00:00
return opendoc_mime
2023-09-27 21:12:55 +00:00
2023-07-19 20:38:06 +00:00
if HydrusProcreateHandling.ZipLooksLikeProcreate( path ):
2023-09-27 21:12:55 +00:00
return HC.APPLICATION_PROCREATE
2023-07-19 20:38:06 +00:00
2023-09-27 21:12:55 +00:00
return HC.APPLICATION_ZIP
2023-07-19 20:38:06 +00:00
if mime in ( HC.UNDETERMINED_WM, HC.UNDETERMINED_MP4 ):
2013-08-14 20:21:49 +00:00
return HydrusVideoHandling.GetMime( path )
2013-08-14 20:21:49 +00:00
2016-02-24 21:42:54 +00:00
elif mime == HC.UNDETERMINED_PNG:
2023-08-09 21:12:17 +00:00
if HydrusAnimationHandling.IsPNGAnimated( first_bytes_of_file ):
2017-06-28 20:23:21 +00:00
2023-08-16 20:46:51 +00:00
return HC.ANIMATION_APNG
2017-06-28 20:23:21 +00:00
else:
return HC.IMAGE_PNG
2016-02-24 21:42:54 +00:00
2023-08-16 20:46:51 +00:00
elif mime == HC.UNDETERMINED_GIF:
if HydrusAnimationHandling.PILAnimationHasDuration( path ):
return HC.ANIMATION_GIF
else:
return HC.IMAGE_GIF
2016-02-24 21:42:54 +00:00
else:
return mime
2013-08-14 20:21:49 +00:00
2013-07-17 20:56:13 +00:00
2023-07-26 20:57:00 +00:00
# If the file starts with '{' it is probably JSON
# but we can't know for sure so we send it over to be checked
2023-08-09 21:12:17 +00:00
if first_bytes_of_file.startswith( b'{' ) or first_bytes_of_file.startswith( b'[' ):
2023-07-26 20:57:00 +00:00
with open( path, 'rb' ) as f:
2023-08-09 21:12:17 +00:00
potential_json_document_bytes = f.read()
if HydrusText.LooksLikeJSON( potential_json_document_bytes ):
2023-07-26 20:57:00 +00:00
return HC.APPLICATION_JSON
2023-08-09 21:12:17 +00:00
if HydrusText.LooksLikeHTML( first_bytes_of_file ):
2022-06-22 20:43:12 +00:00
return HC.TEXT_HTML
2023-07-19 20:38:06 +00:00
2023-08-09 21:12:17 +00:00
if HydrusText.LooksLikeSVG( first_bytes_of_file ):
2023-07-03 16:27:14 +00:00
return HC.IMAGE_SVG
2023-07-19 20:38:06 +00:00
2022-06-22 20:43:12 +00:00
2023-09-20 19:58:17 +00:00
# it is important this goes at the end, because ffmpeg has a billion false positives! and it takes CPU to true negative
2022-06-22 20:43:12 +00:00
# for instance, it once thought some hydrus update files were mpegs
2023-09-20 19:58:17 +00:00
# it also thinks txt files can be mpegs
likely_to_false_positive = True in ( path.endswith( ext ) for ext in ( '.txt', '.log', '.json' ) )
if not likely_to_false_positive:
2014-04-30 21:31:40 +00:00
2023-09-20 19:58:17 +00:00
try:
2017-01-04 22:48:23 +00:00
2023-09-20 19:58:17 +00:00
mime = HydrusVideoHandling.GetMime( path )
if mime != HC.APPLICATION_UNKNOWN:
return mime
except HydrusExceptions.UnsupportedFileException:
pass
except Exception as e:
HydrusData.Print( 'FFMPEG had trouble with: ' + path )
HydrusData.PrintException( e, do_wait = False )
2017-01-04 22:48:23 +00:00
2016-08-17 20:07:22 +00:00
2014-04-30 21:31:40 +00:00
2013-08-07 22:25:18 +00:00
return HC.APPLICATION_UNKNOWN
headers_and_mime_thumbnails = [
( ( ( 0, b'\xff\xd8' ), ), HC.IMAGE_JPEG ),
( ( ( 0, b'\x89PNG' ), ), HC.UNDETERMINED_PNG )
]
2022-04-06 20:40:17 +00:00
def GetThumbnailMime( path ):
with open( path, 'rb' ) as f:
bit_to_check = f.read( 256 )
for ( offsets_and_headers, mime ) in headers_and_mime_thumbnails:
it_passes = False not in ( bit_to_check[ offset: ].startswith( header ) for ( offset, header ) in offsets_and_headers )
if it_passes:
return mime
return HC.APPLICATION_OCTET_STREAM