2013-08-07 22:25:18 +00:00
import hashlib
2020-07-29 20:52:44 +00:00
import os
2023-08-09 21:12:17 +00:00
from hydrus . core import HydrusAnimationHandling
2023-08-02 08:16:59 +00:00
from hydrus . core import HydrusPSDHandling
2021-11-03 20:49:56 +00:00
from hydrus . core import HydrusClipHandling
2023-09-23 19:21:26 +00:00
from hydrus . core import HydrusArchiveHandling
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusConstants as HC
from hydrus . core import HydrusData
from hydrus . core import HydrusDocumentHandling
from hydrus . core import HydrusExceptions
from hydrus . core import HydrusFlashHandling
2023-07-26 20:57:00 +00:00
from hydrus . core import HydrusKritaHandling
2023-08-26 19:05:45 +00:00
from hydrus . core import HydrusProcreateHandling
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusPaths
from hydrus . core import HydrusSerialisable
2023-07-26 20:57:00 +00:00
from hydrus . core import HydrusSVGHandling
2023-09-02 19:36:17 +00:00
from hydrus . core import HydrusPDFHandling
2021-10-27 21:12:33 +00:00
from hydrus . core import HydrusTemp
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusText
from hydrus . core import HydrusVideoHandling
2023-10-04 20:51:17 +00:00
from hydrus . core . images import HydrusImageHandling
2021-04-07 21:26:45 +00:00
from hydrus . core . networking import HydrusNetwork
2013-07-17 20:56:13 +00:00
2022-12-07 22:41:53 +00:00
try :
import speedcopy
speedcopy . patch_copyfile ( )
SPEEDCOPY_OK = True
except Exception as e :
if not isinstance ( e , ImportError ) :
HydrusData . Print ( ' Failed to initialise speedcopy: ' )
HydrusData . PrintException ( e )
SPEEDCOPY_OK = False
2015-03-25 22:04:19 +00:00
2022-02-02 22:14:01 +00:00
def GenerateThumbnailBytes ( path , target_resolution , mime , duration , num_frames , clip_rect = None , percentage_in = 35 ) :
2014-05-21 21:37:35 +00:00
2023-09-23 19:13:21 +00:00
thumbnail_numpy = GenerateThumbnailNumPy ( path , target_resolution , mime , duration , num_frames , clip_rect , percentage_in )
2023-09-27 21:12:55 +00:00
return HydrusImageHandling . GenerateThumbnailBytesFromNumPy ( thumbnail_numpy )
2023-09-23 19:13:21 +00:00
def GenerateThumbnailNumPy ( path , target_resolution , mime , duration , num_frames , clip_rect = None , percentage_in = 35 ) :
2023-09-27 21:12:55 +00:00
2021-11-03 20:49:56 +00:00
if target_resolution == ( 0 , 0 ) :
target_resolution = ( 128 , 128 )
2023-08-16 20:46:51 +00:00
if mime == HC . APPLICATION_PSD :
2014-05-21 21:37:35 +00:00
2021-07-28 21:12:00 +00:00
try :
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusPSDHandling . GenerateThumbnailNumPyFromPSDPath ( path , target_resolution , clip_rect = clip_rect )
2021-07-28 21:12:00 +00:00
2023-08-30 16:27:55 +00:00
except Exception as e :
2023-09-06 19:49:46 +00:00
2023-08-30 16:27:55 +00:00
HydrusData . Print ( ' Problem generating thumbnail for " {} " : ' . format ( path ) )
HydrusData . PrintException ( e )
HydrusData . Print ( ' Attempting ffmpeg PSD thumbnail fallback ' )
2023-08-16 20:46:51 +00:00
2023-07-21 21:49:53 +00:00
try :
2023-08-16 20:46:51 +00:00
2023-07-21 21:49:53 +00:00
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( suffix = ' .png ' )
2023-08-16 20:46:51 +00:00
2023-07-21 21:49:53 +00:00
HydrusVideoHandling . RenderImageToImagePath ( path , temp_path )
2023-08-16 20:46:51 +00:00
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-08-16 20:46:51 +00:00
2023-08-30 16:27:55 +00:00
except Exception as e :
2023-08-16 20:46:51 +00:00
2023-07-21 21:49:53 +00:00
thumb_path = os . path . join ( HC . STATIC_DIR , ' psd.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-09-06 19:49:46 +00:00
2023-08-30 16:27:55 +00:00
finally :
2023-09-06 19:49:46 +00:00
2023-08-30 16:27:55 +00:00
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-09-06 19:49:46 +00:00
2021-07-28 21:12:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC . APPLICATION_CLIP :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusClipHandling . ExtractDBPNGToPath ( path , temp_path )
2014-05-21 21:37:35 +00:00
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
except :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
thumb_path = os . path . join ( HC . STATIC_DIR , ' clip.png ' )
2015-11-25 22:00:57 +00:00
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2018-06-06 21:27:02 +00:00
2021-11-03 20:49:56 +00:00
finally :
2018-05-30 20:13:21 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-07-26 20:57:00 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC . APPLICATION_KRITA :
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
HydrusKritaHandling . ExtractZippedImageToPath ( path , temp_path )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-07-06 08:14:19 +00:00
except Exception as e :
thumb_path = os . path . join ( HC . STATIC_DIR , ' krita.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-07-06 08:14:19 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-07-26 20:57:00 +00:00
2023-09-27 21:12:55 +00:00
2023-08-26 19:05:45 +00:00
elif mime == HC . APPLICATION_PROCREATE :
2023-09-06 19:49:46 +00:00
2023-08-26 19:05:45 +00:00
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
HydrusProcreateHandling . ExtractZippedThumbnailToPath ( path , temp_path )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-08-26 19:05:45 +00:00
except Exception as e :
thumb_path = os . path . join ( HC . STATIC_DIR , ' procreate.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-08-26 19:05:45 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-09-06 19:49:46 +00:00
2023-07-08 19:52:38 +00:00
2023-07-08 18:35:49 +00:00
elif mime == HC . IMAGE_SVG :
2023-07-26 20:57:00 +00:00
2023-07-08 18:35:49 +00:00
try :
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusSVGHandling . GenerateThumbnailNumPyFromSVGPath ( path , target_resolution , clip_rect = clip_rect )
2023-07-08 18:35:49 +00:00
except Exception as e :
2023-09-06 19:49:46 +00:00
if not isinstance ( e , HydrusExceptions . NoThumbnailFileException ) :
2023-07-26 20:57:00 +00:00
HydrusData . Print ( ' Problem generating thumbnail for " {} " : ' . format ( path ) )
HydrusData . PrintException ( e )
2023-07-08 18:35:49 +00:00
thumb_path = os . path . join ( HC . STATIC_DIR , ' svg.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-09-06 19:49:46 +00:00
2023-09-02 19:36:17 +00:00
elif mime == HC . APPLICATION_PDF :
try :
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusPDFHandling . GenerateThumbnailNumPyFromPDFPath ( path , target_resolution , clip_rect = clip_rect )
2023-09-02 19:36:17 +00:00
except Exception as e :
2023-09-06 19:49:46 +00:00
if not isinstance ( e , HydrusExceptions . NoThumbnailFileException ) :
2023-09-02 19:36:17 +00:00
HydrusData . Print ( ' Problem generating thumbnail for " {} " : ' . format ( path ) )
HydrusData . PrintException ( e )
thumb_path = os . path . join ( HC . STATIC_DIR , ' pdf.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2018-05-30 20:13:21 +00:00
2023-07-26 20:57:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC . APPLICATION_FLASH :
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
HydrusFlashHandling . RenderPageToFile ( path , temp_path , 1 )
2015-11-25 22:00:57 +00:00
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2021-11-03 20:49:56 +00:00
except :
thumb_path = os . path . join ( HC . STATIC_DIR , ' flash.png ' )
2015-11-25 22:00:57 +00:00
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2021-11-03 20:49:56 +00:00
finally :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-08-16 20:46:51 +00:00
elif mime in HC . IMAGES or mime == HC . ANIMATION_GIF : # not apng atm
# TODO: it would be nice to have gif and apng generating their thumb x frames in, like with videos. maybe we should add animation thumb fetcher to hydrusanimationhandling
try :
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( path , target_resolution , mime , clip_rect = clip_rect )
2023-08-16 20:46:51 +00:00
except Exception as e :
HydrusData . Print ( ' Problem generating thumbnail for " {} " : ' . format ( path ) )
HydrusData . PrintException ( e )
thumb_path = os . path . join ( HC . STATIC_DIR , ' hydrus.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-08-16 20:46:51 +00:00
2021-11-03 20:49:56 +00:00
else :
2023-03-08 21:52:17 +00:00
renderer = None
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
desired_thumb_frame = int ( ( percentage_in / 100.0 ) * num_frames )
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
try :
2019-03-06 23:06:22 +00:00
2023-03-08 21:52:17 +00:00
renderer = HydrusVideoHandling . VideoRendererFFMPEG ( path , mime , duration , num_frames , target_resolution , clip_rect = clip_rect , start_pos = desired_thumb_frame )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = renderer . read_frame ( )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
except Exception as e :
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
HydrusData . Print ( ' Problem generating thumbnail for " {} " at frame {} ( {} )--FFMPEG could not render it. ' . format ( path , desired_thumb_frame , HydrusData . ConvertFloatToPercentage ( percentage_in / 100.0 ) ) )
HydrusData . PrintException ( e )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = None
2019-03-06 23:06:22 +00:00
2014-05-21 21:37:35 +00:00
2023-03-08 21:52:17 +00:00
if numpy_image is None and desired_thumb_frame != 0 :
if renderer is not None :
renderer . Stop ( )
# try first frame instead
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
try :
renderer = HydrusVideoHandling . VideoRendererFFMPEG ( path , mime , duration , num_frames , target_resolution , clip_rect = clip_rect )
numpy_image = renderer . read_frame ( )
except Exception as e :
HydrusData . Print ( ' Problem generating thumbnail for " {} " at first frame--FFMPEG could not render it. ' . format ( path ) )
HydrusData . PrintException ( e )
numpy_image = None
if numpy_image is None :
2023-02-15 21:26:44 +00:00
thumb_path = os . path . join ( HC . STATIC_DIR , ' hydrus.png ' )
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( thumb_path , target_resolution , HC . IMAGE_PNG , clip_rect = clip_rect )
2023-02-15 21:26:44 +00:00
else :
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . ResizeNumPyImage ( numpy_image , target_resolution ) # just in case ffmpeg doesn't deliver right
2023-02-15 21:26:44 +00:00
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
if renderer is not None :
renderer . Stop ( )
2021-11-03 20:49:56 +00:00
2015-11-25 22:00:57 +00:00
2023-09-23 19:13:21 +00:00
return thumbnail_numpy
2019-03-27 22:01:02 +00:00
2015-03-25 22:04:19 +00:00
def GetExtraHashesFromPath ( path ) :
h_md5 = hashlib . md5 ( )
h_sha1 = hashlib . sha1 ( )
h_sha512 = hashlib . sha512 ( )
with open ( path , ' rb ' ) as f :
2015-11-04 22:30:28 +00:00
for block in HydrusPaths . ReadFileLikeAsBlocks ( f ) :
2015-03-25 22:04:19 +00:00
h_md5 . update ( block )
h_sha1 . update ( block )
h_sha512 . update ( block )
md5 = h_md5 . digest ( )
sha1 = h_sha1 . digest ( )
sha512 = h_sha512 . digest ( )
return ( md5 , sha1 , sha512 )
2023-08-23 20:43:26 +00:00
2019-07-10 22:38:30 +00:00
def GetFileInfo ( path , mime = None , ok_to_look_for_hydrus_updates = False ) :
2013-07-17 20:56:13 +00:00
2016-04-14 01:54:29 +00:00
size = os . path . getsize ( path )
2013-07-17 20:56:13 +00:00
2017-07-19 21:21:41 +00:00
if size == 0 :
2022-01-05 22:15:56 +00:00
raise HydrusExceptions . ZeroSizeFileException ( ' File is of zero length! ' )
2017-07-19 21:21:41 +00:00
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
if mime is None :
2019-07-10 22:38:30 +00:00
mime = GetMime ( path , ok_to_look_for_hydrus_updates = ok_to_look_for_hydrus_updates )
2017-10-04 17:51:58 +00:00
2013-07-17 20:56:13 +00:00
2017-03-08 23:23:12 +00:00
if mime not in HC . ALLOWED_MIMES :
2018-09-05 20:52:32 +00:00
if mime == HC . TEXT_HTML :
2020-05-27 21:27:52 +00:00
raise HydrusExceptions . UnsupportedFileException ( ' Looks like HTML -- maybe the client needs to be taught how to parse this? ' )
2018-09-05 20:52:32 +00:00
elif mime == HC . APPLICATION_UNKNOWN :
2020-05-27 21:27:52 +00:00
raise HydrusExceptions . UnsupportedFileException ( ' Unknown filetype! ' )
2018-09-05 20:52:32 +00:00
else :
2020-05-27 21:27:52 +00:00
raise HydrusExceptions . UnsupportedFileException ( ' Filetype is not permitted! ' )
2018-09-05 20:52:32 +00:00
2017-03-08 23:23:12 +00:00
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
if mime in HC . PIL_HEIF_MIMES and not HydrusImageHandling . HEIF_OK :
raise HydrusExceptions . UnsupportedFileException ( ' Sorry, you need the pillow-heif library to support this filetype ( {} )! Please rebuild your venv. ' . format ( HC . mime_string_lookup [ mime ] ) )
2013-07-17 20:56:13 +00:00
width = None
height = None
duration = None
num_frames = None
num_words = None
2021-08-18 21:10:01 +00:00
if mime in HC . MIMES_THAT_DEFINITELY_HAVE_AUDIO :
has_audio = True
else :
has_audio = False
2023-08-16 20:46:51 +00:00
# keep this in the specific-first, general-last test order
if mime == HC . APPLICATION_CLIP :
2021-11-03 20:49:56 +00:00
2021-11-10 21:53:57 +00:00
( ( width , height ) , duration , num_frames ) = HydrusClipHandling . GetClipProperties ( path )
2021-11-03 20:49:56 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC . APPLICATION_KRITA :
2023-09-06 19:49:46 +00:00
try :
( width , height ) = HydrusKritaHandling . GetKraProperties ( path )
except HydrusExceptions . NoResolutionFileException :
pass
2023-08-26 19:05:45 +00:00
elif mime == HC . APPLICATION_PROCREATE :
2023-09-06 19:49:46 +00:00
try :
( width , height ) = HydrusProcreateHandling . GetProcreateResolution ( path )
except :
pass
2023-07-19 21:38:23 +00:00
2023-07-08 18:35:49 +00:00
elif mime == HC . IMAGE_SVG :
2023-07-19 21:38:23 +00:00
2023-09-06 19:49:46 +00:00
try :
( width , height ) = HydrusSVGHandling . GetSVGResolution ( path )
except HydrusExceptions . NoResolutionFileException :
pass
2023-09-02 19:36:17 +00:00
elif mime == HC . APPLICATION_PDF :
2023-09-06 19:49:46 +00:00
try :
( num_words , ( width , height ) ) = HydrusPDFHandling . GetPDFInfo ( path )
except HydrusExceptions . LimitedSupportFileException :
pass
2023-07-19 21:38:23 +00:00
2013-07-17 20:56:13 +00:00
elif mime == HC . APPLICATION_FLASH :
2013-08-07 22:25:18 +00:00
( ( width , height ) , duration , num_frames ) = HydrusFlashHandling . GetFlashProperties ( path )
2013-07-17 20:56:13 +00:00
2017-06-28 20:23:21 +00:00
elif mime == HC . APPLICATION_PDF :
2018-11-07 23:09:40 +00:00
num_words = HydrusDocumentHandling . GetPDFNumWords ( path ) # this now give None until a better solution can be found
2017-06-28 20:23:21 +00:00
2019-03-20 21:22:10 +00:00
elif mime == HC . APPLICATION_PSD :
2023-09-06 19:49:46 +00:00
2023-08-30 16:27:55 +00:00
try :
( width , height ) = HydrusPSDHandling . GetPSDResolution ( path )
except Exception as e :
HydrusData . Print ( ' Problem calculating resolution for " {} " : ' . format ( path ) )
HydrusData . PrintException ( e )
HydrusData . Print ( ' Attempting PSD resolution fallback ' )
( width , height ) = HydrusPSDHandling . GetPSDResolutionFallback ( path )
2019-03-20 21:22:10 +00:00
2023-08-09 21:12:17 +00:00
elif mime in HC . VIDEO or mime in HC . HEIF_TYPE_SEQUENCES :
2021-11-24 21:59:58 +00:00
( ( width , height ) , duration , num_frames , has_audio ) = HydrusVideoHandling . GetFFMPEGVideoProperties ( path )
2023-08-16 20:46:51 +00:00
elif mime in HC . ANIMATIONS :
( ( width , height ) , duration , num_frames ) = HydrusAnimationHandling . GetAnimationProperties ( path , mime )
elif mime in HC . IMAGES :
2023-09-06 19:49:46 +00:00
( width , height ) = HydrusImageHandling . GetImageResolution ( path , mime )
2023-08-16 20:46:51 +00:00
2017-12-06 22:06:56 +00:00
elif mime in HC . AUDIO :
2017-06-28 20:23:21 +00:00
2017-12-06 22:06:56 +00:00
ffmpeg_lines = HydrusVideoHandling . GetFFMPEGInfoLines ( path )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
( file_duration_in_s , stream_duration_in_s ) = HydrusVideoHandling . ParseFFMPEGDuration ( ffmpeg_lines )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
duration = int ( file_duration_in_s * 1000 )
2017-06-28 20:23:21 +00:00
2013-07-17 20:56:13 +00:00
2018-03-22 00:03:33 +00:00
if width is not None and width < 0 :
width * = - 1
if height is not None and height < 0 :
width * = - 1
if duration is not None and duration < 0 :
duration * = - 1
if num_frames is not None and num_frames < 0 :
num_frames * = - 1
if num_words is not None and num_words < 0 :
num_words * = - 1
2019-08-07 22:59:53 +00:00
return ( size , mime , width , height , duration , num_frames , has_audio , num_words )
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
2022-01-26 21:57:04 +00:00
def GetFileModifiedTimestamp ( path ) - > int :
2019-09-25 21:34:18 +00:00
2022-01-26 21:57:04 +00:00
return int ( os . path . getmtime ( path ) )
2019-09-25 21:34:18 +00:00
2023-08-09 21:12:17 +00:00
2013-08-07 22:25:18 +00:00
def GetHashFromPath ( path ) :
h = hashlib . sha256 ( )
2013-08-14 20:21:49 +00:00
with open ( path , ' rb ' ) as f :
2013-08-07 22:25:18 +00:00
2017-07-19 21:21:41 +00:00
for block in HydrusPaths . ReadFileLikeAsBlocks ( f ) :
h . update ( block )
2013-08-07 22:25:18 +00:00
2014-11-12 23:33:13 +00:00
return h . digest ( )
2023-08-09 21:12:17 +00:00
2023-10-11 20:46:40 +00:00
# TODO: replace this with a FileTypeChecker class or something that tucks all this messy data away more neatly
2023-10-07 19:14:35 +00:00
headers_and_mime = [
( ( ( [ 0 ] , [ b ' \xff \xd8 ' ] ) , ) , HC . IMAGE_JPEG ) ,
( ( ( [ 0 ] , [ b ' \x89 PNG ' ] ) , ) , HC . UNDETERMINED_PNG ) ,
( ( ( [ 0 ] , [ b ' GIF87a ' , b ' GIF89a ' ] ) , ) , HC . UNDETERMINED_GIF ) ,
( ( ( [ 8 ] , [ b ' WEBP ' ] ) , ) , HC . IMAGE_WEBP ) ,
( ( ( [ 0 ] , [ b ' II* \x00 ' , b ' MM \x00 * ' ] ) , ) , HC . IMAGE_TIFF ) ,
( ( ( [ 0 ] , [ b ' BM ' ] ) , ) , HC . IMAGE_BMP ) ,
( ( ( [ 0 ] , [ b ' \x00 \x00 \x01 \x00 ' , b ' \x00 \x00 \x02 \x00 ' ] ) , ) , HC . IMAGE_ICON ) ,
( ( ( [ 0 ] , [ b ' qoif ' ] ) , ) , HC . IMAGE_QOI ) ,
( ( ( [ 0 ] , [ b ' CWS ' , b ' FWS ' , b ' ZWS ' ] ) , ) , HC . APPLICATION_FLASH ) ,
( ( ( [ 0 ] , [ b ' FLV ' ] ) , ) , HC . VIDEO_FLV ) ,
( ( ( [ 0 ] , [ b ' % PDF ' ] ) , ) , HC . APPLICATION_PDF ) ,
( ( ( [ 0 ] , [ b ' 8BPS \x00 \x01 ' , b ' 8BPS \x00 \x02 ' ] ) , ) , HC . APPLICATION_PSD ) ,
( ( ( [ 0 ] , [ b ' CSFCHUNK ' ] ) , ) , HC . APPLICATION_CLIP ) ,
( ( ( [ 0 ] , [ b ' SAI-CANVAS ' ] ) , ) , HC . APPLICATION_SAI2 ) ,
( ( ( [ 0 ] , [ b ' gimp xcf ' ] ) , ) , HC . APPLICATION_XCF ) ,
( ( ( [ 38 , 42 , 58 , 63 ] , [ b ' application/x-krita ' ] ) , ) , HC . APPLICATION_KRITA ) , # important this comes before zip files because this is also a zip file
( ( ( [ 38 , 43 ] , [ b ' application/epub+zip ' ] ) , ) , HC . APPLICATION_EPUB ) ,
( ( ( [ 4 ] , [ b ' FORM ' ] ) , ( [ 12 ] , [ b ' DJVU ' , b ' DJVM ' , b ' PM44 ' , b ' BM44 ' , b ' SDJV ' ] ) , ) , HC . APPLICATION_DJVU ) ,
( ( ( [ 0 ] , [ b ' PK \x03 \x04 ' , b ' PK \x05 \x06 ' , b ' PK \x07 \x08 ' ] ) , ) , HC . APPLICATION_ZIP ) ,
( ( ( [ 0 ] , [ b ' 7z \xBC \xAF \x27 \x1C ' ] ) , ) , HC . APPLICATION_7Z ) ,
( ( ( [ 0 ] , [ b ' \x52 \x61 \x72 \x21 \x1A \x07 \x00 ' , b ' \x52 \x61 \x72 \x21 \x1A \x07 \x01 \x00 ' ] ) , ) , HC . APPLICATION_RAR ) ,
( ( ( [ 0 ] , [ b ' \x1f \x8b ' ] ) , ) , HC . APPLICATION_GZIP ) ,
( ( ( [ 0 ] , [ b ' hydrus encrypted zip ' ] ) , ) , HC . APPLICATION_HYDRUS_ENCRYPTED_ZIP ) ,
( ( ( [ 4 ] , [ b ' ftypavif ' ] ) , ) , HC . IMAGE_AVIF ) ,
( ( ( [ 4 ] , [ b ' ftypavis ' ] ) , ) , HC . IMAGE_AVIF_SEQUENCE ) ,
( ( ( [ 4 ] , [ b ' ftypmif1 ' ] ) , ( [ 16 , 20 , 24 ] , [ b ' avif ' ] ) , ) , HC . IMAGE_AVIF ) ,
( ( ( [ 4 ] , [ b ' ftypheic ' , b ' ftypheix ' , b ' ftypheim ' , b ' ftypheis ' ] ) , ) , HC . IMAGE_HEIC ) ,
( ( ( [ 4 ] , [ b ' ftyphevc ' , b ' ftyphevx ' , b ' ftyphevm ' , b ' ftyphevs ' ] ) , ) , HC . IMAGE_HEIC_SEQUENCE ) ,
( ( ( [ 4 ] , [ b ' ftypmif1 ' ] ) , ) , HC . IMAGE_HEIF ) ,
( ( ( [ 4 ] , [ b ' ftypmsf1 ' ] ) , ) , HC . IMAGE_HEIF_SEQUENCE ) ,
( ( ( [ 4 ] , [ b ' ftypmp4 ' , b ' ftypisom ' , b ' ftypM4V ' , b ' ftypMSNV ' , b ' ftypavc1 ' , b ' ftypavc1 ' , b ' ftypFACE ' , b ' ftypdash ' ] ) , ) , HC . UNDETERMINED_MP4 ) ,
( ( ( [ 4 ] , [ b ' ftypqt ' ] ) , ) , HC . VIDEO_MOV ) ,
( ( ( [ 0 ] , [ b ' fLaC ' ] ) , ) , HC . AUDIO_FLAC ) ,
2023-10-11 20:46:40 +00:00
( ( ( [ 0 ] , [ b ' RIFF ' ] ) , ( [ 8 ] , [ b ' WAVE ' ] ) ) , HC . AUDIO_WAVE ) ,
2023-10-07 19:14:35 +00:00
( ( ( [ 0 ] , [ b ' wvpk ' ] ) , ) , HC . AUDIO_WAVPACK ) ,
( ( ( [ 8 ] , [ b ' AVI ' ] ) , ) , HC . VIDEO_AVI ) ,
( ( ( [ 0 ] , [ b ' \x30 \x26 \xB2 \x75 \x8E \x66 \xCF \x11 \xA6 \xD9 \x00 \xAA \x00 \x62 \xCE \x6C ' ] ) , ) , HC . UNDETERMINED_WM ) ,
( ( ( [ 0 ] , [ b ' \x4D \x5A \x90 \x00 \x03 ' ] , ) , ) , HC . APPLICATION_WINDOWS_EXE )
]
2023-10-11 20:46:40 +00:00
def passes_offsets_and_headers ( offsets_and_headers , first_bytes_of_file ) - > bool :
for ( offsets , headers ) in offsets_and_headers :
for offset in offsets :
for header in headers :
if first_bytes_of_file [ offset : offset + len ( header ) ] == header :
return True
return False
2023-10-07 19:14:35 +00:00
2019-07-10 22:38:30 +00:00
def GetMime ( path , ok_to_look_for_hydrus_updates = False ) :
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
size = os . path . getsize ( path )
if size == 0 :
2022-01-05 22:15:56 +00:00
raise HydrusExceptions . ZeroSizeFileException ( ' File is of zero length! ' )
2017-10-04 17:51:58 +00:00
2022-06-22 20:43:12 +00:00
if ok_to_look_for_hydrus_updates and size < 64 * 1024 * 1024 :
with open ( path , ' rb ' ) as f :
update_network_bytes = f . read ( )
try :
update = HydrusSerialisable . CreateFromNetworkBytes ( update_network_bytes )
if isinstance ( update , HydrusNetwork . ContentUpdate ) :
return HC . APPLICATION_HYDRUS_UPDATE_CONTENT
elif isinstance ( update , HydrusNetwork . DefinitionsUpdate ) :
return HC . APPLICATION_HYDRUS_UPDATE_DEFINITIONS
except :
pass
2013-08-14 20:21:49 +00:00
with open ( path , ' rb ' ) as f :
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
first_bytes_of_file = f . read ( 256 )
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
for ( offsets_and_headers , mime ) in headers_and_mime :
2013-08-07 22:25:18 +00:00
2023-10-11 20:46:40 +00:00
if passes_offsets_and_headers ( offsets_and_headers , first_bytes_of_file ) :
2013-08-14 20:21:49 +00:00
2023-07-19 20:38:06 +00:00
if mime == HC . APPLICATION_ZIP :
2023-09-27 21:12:55 +00:00
2023-09-23 19:21:26 +00:00
opendoc_mime = HydrusArchiveHandling . MimeFromOpenDocument ( path )
if opendoc_mime is not None :
2023-09-27 21:12:55 +00:00
2023-09-23 19:21:26 +00:00
return opendoc_mime
2023-09-27 21:12:55 +00:00
2023-07-19 20:38:06 +00:00
2023-09-23 19:21:26 +00:00
if HydrusProcreateHandling . ZipLooksLikeProcreate ( path ) :
2023-09-27 21:12:55 +00:00
2023-08-26 19:05:45 +00:00
return HC . APPLICATION_PROCREATE
2023-07-19 20:38:06 +00:00
2023-09-27 21:12:55 +00:00
2023-09-23 19:21:26 +00:00
return HC . APPLICATION_ZIP
2023-07-19 20:38:06 +00:00
2022-03-02 21:44:08 +00:00
if mime in ( HC . UNDETERMINED_WM , HC . UNDETERMINED_MP4 ) :
2013-08-14 20:21:49 +00:00
2022-03-02 21:44:08 +00:00
return HydrusVideoHandling . GetMime ( path )
2013-08-14 20:21:49 +00:00
2016-02-24 21:42:54 +00:00
elif mime == HC . UNDETERMINED_PNG :
2023-08-09 21:12:17 +00:00
if HydrusAnimationHandling . IsPNGAnimated ( first_bytes_of_file ) :
2017-06-28 20:23:21 +00:00
2023-08-16 20:46:51 +00:00
return HC . ANIMATION_APNG
2017-06-28 20:23:21 +00:00
else :
return HC . IMAGE_PNG
2016-02-24 21:42:54 +00:00
2023-08-16 20:46:51 +00:00
elif mime == HC . UNDETERMINED_GIF :
if HydrusAnimationHandling . PILAnimationHasDuration ( path ) :
return HC . ANIMATION_GIF
else :
return HC . IMAGE_GIF
2016-02-24 21:42:54 +00:00
else :
return mime
2013-08-14 20:21:49 +00:00
2013-07-17 20:56:13 +00:00
2023-07-26 20:57:00 +00:00
# If the file starts with '{' it is probably JSON
# but we can't know for sure so we send it over to be checked
2023-08-09 21:12:17 +00:00
if first_bytes_of_file . startswith ( b ' { ' ) or first_bytes_of_file . startswith ( b ' [ ' ) :
2023-07-26 20:57:00 +00:00
with open ( path , ' rb ' ) as f :
2023-08-09 21:12:17 +00:00
potential_json_document_bytes = f . read ( )
if HydrusText . LooksLikeJSON ( potential_json_document_bytes ) :
2023-07-26 20:57:00 +00:00
return HC . APPLICATION_JSON
2023-08-09 21:12:17 +00:00
if HydrusText . LooksLikeHTML ( first_bytes_of_file ) :
2022-06-22 20:43:12 +00:00
return HC . TEXT_HTML
2023-07-19 20:38:06 +00:00
2023-08-09 21:12:17 +00:00
if HydrusText . LooksLikeSVG ( first_bytes_of_file ) :
2023-07-03 16:27:14 +00:00
return HC . IMAGE_SVG
2023-07-19 20:38:06 +00:00
2022-06-22 20:43:12 +00:00
2023-09-20 19:58:17 +00:00
# it is important this goes at the end, because ffmpeg has a billion false positives! and it takes CPU to true negative
2022-06-22 20:43:12 +00:00
# for instance, it once thought some hydrus update files were mpegs
2023-09-20 19:58:17 +00:00
# it also thinks txt files can be mpegs
likely_to_false_positive = True in ( path . endswith ( ext ) for ext in ( ' .txt ' , ' .log ' , ' .json ' ) )
if not likely_to_false_positive :
2014-04-30 21:31:40 +00:00
2023-09-20 19:58:17 +00:00
try :
2017-01-04 22:48:23 +00:00
2023-09-20 19:58:17 +00:00
mime = HydrusVideoHandling . GetMime ( path )
if mime != HC . APPLICATION_UNKNOWN :
return mime
except HydrusExceptions . UnsupportedFileException :
pass
except Exception as e :
HydrusData . Print ( ' FFMPEG had trouble with: ' + path )
HydrusData . PrintException ( e , do_wait = False )
2017-01-04 22:48:23 +00:00
2016-08-17 20:07:22 +00:00
2014-04-30 21:31:40 +00:00
2013-08-07 22:25:18 +00:00
return HC . APPLICATION_UNKNOWN
2023-10-11 20:46:40 +00:00
2023-10-07 19:14:35 +00:00
2023-10-11 20:46:40 +00:00
headers_and_mime_thumbnails = [ ( offsets_and_headers , mime ) for ( offsets_and_headers , mime ) in headers_and_mime if mime in ( HC . IMAGE_JPEG , HC . IMAGE_PNG ) ]
2023-10-07 19:14:35 +00:00
2022-04-06 20:40:17 +00:00
def GetThumbnailMime ( path ) :
with open ( path , ' rb ' ) as f :
bit_to_check = f . read ( 256 )
for ( offsets_and_headers , mime ) in headers_and_mime_thumbnails :
2023-10-11 20:46:40 +00:00
if passes_offsets_and_headers ( offsets_and_headers , bit_to_check ) :
2022-04-06 20:40:17 +00:00
return mime
2023-10-11 20:46:40 +00:00
return GetMime ( path )