2024-01-31 21:20:50 +00:00
import collections
2013-08-07 22:25:18 +00:00
import hashlib
2020-07-29 20:52:44 +00:00
import os
2024-01-10 21:27:29 +00:00
import typing
2020-07-29 20:52:44 +00:00
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusConstants as HC
from hydrus . core import HydrusData
from hydrus . core import HydrusExceptions
from hydrus . core import HydrusPaths
from hydrus . core import HydrusSerialisable
2021-10-27 21:12:33 +00:00
from hydrus . core import HydrusTemp
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusText
2024-01-17 18:57:00 +00:00
from hydrus . core import HydrusTime
2024-01-03 21:21:53 +00:00
from hydrus . core . files import HydrusAnimationHandling
from hydrus . core . files import HydrusArchiveHandling
from hydrus . core . files import HydrusClipHandling
from hydrus . core . files import HydrusFlashHandling
from hydrus . core . files import HydrusKritaHandling
from hydrus . core . files import HydrusPDFHandling
from hydrus . core . files import HydrusProcreateHandling
from hydrus . core . files import HydrusPSDHandling
from hydrus . core . files import HydrusSVGHandling
from hydrus . core . files import HydrusUgoiraHandling
from hydrus . core . files import HydrusVideoHandling
2024-03-16 20:35:32 +00:00
from hydrus . core . files import HydrusOfficeOpenXMLHandling
2024-01-03 21:21:53 +00:00
from hydrus . core . files . images import HydrusImageHandling
2021-04-07 21:26:45 +00:00
from hydrus . core . networking import HydrusNetwork
2013-07-17 20:56:13 +00:00
2022-12-07 22:41:53 +00:00
try :
import speedcopy
speedcopy . patch_copyfile ( )
SPEEDCOPY_OK = True
except Exception as e :
if not isinstance ( e , ImportError ) :
HydrusData . Print ( ' Failed to initialise speedcopy: ' )
HydrusData . PrintException ( e )
SPEEDCOPY_OK = False
2024-01-31 21:20:50 +00:00
mimes_to_default_thumbnail_paths = collections . defaultdict ( lambda : os . path . join ( HC . STATIC_DIR , ' hydrus.png ' ) )
for mime in HC . AUDIO :
mimes_to_default_thumbnail_paths [ mime ] = os . path . join ( os . path . join ( HC . STATIC_DIR , ' audio.png ' ) )
for mime in HC . VIDEO :
mimes_to_default_thumbnail_paths [ mime ] = os . path . join ( os . path . join ( HC . STATIC_DIR , ' video.png ' ) )
for mime in HC . ANIMATIONS :
mimes_to_default_thumbnail_paths [ mime ] = os . path . join ( os . path . join ( HC . STATIC_DIR , ' video.png ' ) )
for mime in HC . ARCHIVES :
mimes_to_default_thumbnail_paths [ mime ] = png_path = os . path . join ( HC . STATIC_DIR , ' zip.png ' )
for mime in HC . IMAGES :
mimes_to_default_thumbnail_paths [ mime ] = png_path = os . path . join ( HC . STATIC_DIR , ' image.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_UNKNOWN ] = os . path . join ( HC . STATIC_DIR , ' hydrus.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_PDF ] = os . path . join ( HC . STATIC_DIR , ' pdf.png ' )
2024-03-16 20:35:32 +00:00
mimes_to_default_thumbnail_paths [ HC . APPLICATION_DOCX ] = os . path . join ( HC . STATIC_DIR , ' docx.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_XLSX ] = os . path . join ( HC . STATIC_DIR , ' xlsx.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_PPTX ] = os . path . join ( HC . STATIC_DIR , ' pptx.png ' )
2024-01-31 21:20:50 +00:00
mimes_to_default_thumbnail_paths [ HC . APPLICATION_EPUB ] = os . path . join ( HC . STATIC_DIR , ' epub.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_DJVU ] = os . path . join ( HC . STATIC_DIR , ' djvu.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_PSD ] = os . path . join ( HC . STATIC_DIR , ' psd.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_CLIP ] = os . path . join ( HC . STATIC_DIR , ' clip.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_SAI2 ] = os . path . join ( HC . STATIC_DIR , ' sai.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_KRITA ] = os . path . join ( HC . STATIC_DIR , ' krita.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_FLASH ] = os . path . join ( HC . STATIC_DIR , ' flash.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_XCF ] = os . path . join ( HC . STATIC_DIR , ' xcf.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_PROCREATE ] = os . path . join ( HC . STATIC_DIR , ' procreate.png ' )
mimes_to_default_thumbnail_paths [ HC . APPLICATION_RTF ] = os . path . join ( HC . STATIC_DIR , ' rtf.png ' )
mimes_to_default_thumbnail_paths [ HC . IMAGE_SVG ] = os . path . join ( HC . STATIC_DIR , ' svg.png ' )
2015-03-25 22:04:19 +00:00
2024-03-16 20:35:32 +00:00
def GenerateDefaultThumbnail ( mime : int , target_resolution : typing . Tuple [ int , int ] ) :
thumb_path = mimes_to_default_thumbnail_paths [ mime ]
return HydrusImageHandling . GenerateDefaultThumbnailNumPyFromPath ( thumb_path , target_resolution )
2023-11-01 21:38:03 +00:00
def GenerateThumbnailBytes ( path , target_resolution , mime , duration , num_frames , percentage_in = 35 ) :
2014-05-21 21:37:35 +00:00
2023-11-01 21:38:03 +00:00
thumbnail_numpy = GenerateThumbnailNumPy ( path , target_resolution , mime , duration , num_frames , percentage_in = percentage_in )
2023-09-23 19:13:21 +00:00
2023-09-27 21:12:55 +00:00
return HydrusImageHandling . GenerateThumbnailBytesFromNumPy ( thumbnail_numpy )
2023-09-23 19:13:21 +00:00
2024-01-10 21:27:29 +00:00
def PrintMoreThumbErrorInfo ( e : Exception , message , extra_description : typing . Optional [ str ] = None ) :
if not isinstance ( e , HydrusExceptions . NoThumbnailFileException ) :
HydrusData . Print ( message )
if extra_description is not None :
HydrusData . Print ( f ' Extra info: { extra_description } ' )
HydrusData . PrintException ( e )
def GenerateThumbnailNumPy ( path , target_resolution , mime , duration , num_frames , percentage_in = 35 , extra_description = None ) :
2023-09-27 21:12:55 +00:00
2023-11-29 22:27:53 +00:00
if mime == HC . APPLICATION_CBZ :
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
2014-05-21 21:37:35 +00:00
2021-07-28 21:12:00 +00:00
try :
2023-11-29 22:27:53 +00:00
HydrusArchiveHandling . ExtractCoverPage ( path , temp_path )
2021-07-28 21:12:00 +00:00
2023-11-29 22:27:53 +00:00
cover_mime = GetMime ( temp_path )
2023-09-06 19:49:46 +00:00
2023-11-29 22:27:53 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , cover_mime )
2023-08-16 20:46:51 +00:00
2024-01-10 21:27:29 +00:00
except Exception as e :
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2023-11-01 21:38:03 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-11-29 22:27:53 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2021-07-28 21:12:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC . APPLICATION_CLIP :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusClipHandling . ExtractDBPNGToPath ( path , temp_path )
2014-05-21 21:37:35 +00:00
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG )
2014-05-21 21:37:35 +00:00
2024-01-10 21:27:29 +00:00
except Exception as e :
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2014-05-21 21:37:35 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2018-06-06 21:27:02 +00:00
2021-11-03 20:49:56 +00:00
finally :
2018-05-30 20:13:21 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-07-26 20:57:00 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC . APPLICATION_KRITA :
try :
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusKritaHandling . GenerateThumbnailNumPyFromKraPath ( path , target_resolution )
2023-10-18 20:31:50 +00:00
2023-07-06 08:14:19 +00:00
except Exception as e :
2023-10-18 20:31:50 +00:00
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-07-06 08:14:19 +00:00
2023-09-27 21:12:55 +00:00
2023-08-26 19:05:45 +00:00
elif mime == HC . APPLICATION_PROCREATE :
2023-09-06 19:49:46 +00:00
2023-08-26 19:05:45 +00:00
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
HydrusProcreateHandling . ExtractZippedThumbnailToPath ( path , temp_path )
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG )
2023-08-26 19:05:45 +00:00
except Exception as e :
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-08-26 19:05:45 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-09-06 19:49:46 +00:00
2023-07-08 19:52:38 +00:00
2023-11-29 22:27:53 +00:00
elif mime == HC . APPLICATION_PSD :
try :
thumbnail_numpy = HydrusPSDHandling . GenerateThumbnailNumPyFromPSDPath ( path , target_resolution )
except Exception as e :
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2023-11-29 22:27:53 +00:00
HydrusData . Print ( ' Attempting ffmpeg PSD thumbnail fallback ' )
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( suffix = ' .png ' )
try :
HydrusVideoHandling . RenderImageToImagePath ( path , temp_path )
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG )
except Exception as e :
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Secondary problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-11-29 22:27:53 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-07-08 18:35:49 +00:00
elif mime == HC . IMAGE_SVG :
2023-07-26 20:57:00 +00:00
2023-07-08 18:35:49 +00:00
try :
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusSVGHandling . GenerateThumbnailNumPyFromSVGPath ( path , target_resolution )
2023-07-08 18:35:49 +00:00
except Exception as e :
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2023-07-08 18:35:49 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-09-06 19:49:46 +00:00
2023-11-15 22:40:54 +00:00
elif mime == HC . APPLICATION_PDF :
2023-09-02 19:36:17 +00:00
try :
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusPDFHandling . GenerateThumbnailNumPyFromPDFPath ( path , target_resolution )
2023-09-02 19:36:17 +00:00
except Exception as e :
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2023-09-02 19:36:17 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-09-02 19:36:17 +00:00
2024-03-16 20:35:32 +00:00
elif mime == HC . APPLICATION_PPTX :
try :
thumbnail_numpy = HydrusOfficeOpenXMLHandling . GenerateThumbnailNumPyFromOfficePath ( path , target_resolution )
except Exception as e :
2018-05-30 20:13:21 +00:00
2024-03-16 20:35:32 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-07-26 20:57:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC . APPLICATION_FLASH :
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
HydrusFlashHandling . RenderPageToFile ( path , temp_path , 1 )
2015-11-25 22:00:57 +00:00
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , HC . IMAGE_PNG )
2021-11-03 20:49:56 +00:00
2024-01-10 21:27:29 +00:00
except Exception as e :
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2021-11-03 20:49:56 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2021-11-03 20:49:56 +00:00
finally :
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
2023-11-29 22:27:53 +00:00
elif mime in HC . IMAGES :
2023-08-16 20:46:51 +00:00
# TODO: it would be nice to have gif and apng generating their thumb x frames in, like with videos. maybe we should add animation thumb fetcher to hydrusanimationhandling
try :
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( path , target_resolution , mime )
2023-08-16 20:46:51 +00:00
except Exception as e :
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2023-08-16 20:46:51 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-08-16 20:46:51 +00:00
2023-11-29 22:27:53 +00:00
elif mime == HC . ANIMATION_UGOIRA :
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
desired_thumb_frame_index = int ( ( percentage_in / 100.0 ) * ( num_frames - 1 ) )
HydrusUgoiraHandling . ExtractFrame ( path , desired_thumb_frame_index , temp_path )
cover_mime = GetMime ( temp_path )
thumbnail_numpy = HydrusImageHandling . GenerateThumbnailNumPyFromStaticImagePath ( temp_path , target_resolution , cover_mime )
2024-01-10 21:27:29 +00:00
except Exception as e :
PrintMoreThumbErrorInfo ( e , f ' Problem generating thumbnail for " { path } " . ' , extra_description = extra_description )
2023-11-29 22:27:53 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-11-29 22:27:53 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
else : # animations and video
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
renderer = None
2021-11-03 20:49:56 +00:00
2023-11-29 22:27:53 +00:00
desired_thumb_frame_index = int ( ( percentage_in / 100.0 ) * ( num_frames - 1 ) )
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
try :
2019-03-06 23:06:22 +00:00
2023-11-29 22:27:53 +00:00
renderer = HydrusVideoHandling . VideoRendererFFMPEG ( path , mime , duration , num_frames , target_resolution , start_pos = desired_thumb_frame_index )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = renderer . read_frame ( )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
except Exception as e :
2023-02-15 21:26:44 +00:00
2024-01-10 21:27:29 +00:00
message = ' Problem generating thumbnail for " {} " at frame {} ( {} )--FFMPEG could not render it. ' . format ( path , desired_thumb_frame_index , HydrusData . ConvertFloatToPercentage ( percentage_in / 100.0 ) )
PrintMoreThumbErrorInfo ( e , message , extra_description = extra_description )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = None
2019-03-06 23:06:22 +00:00
2014-05-21 21:37:35 +00:00
2023-11-29 22:27:53 +00:00
if numpy_image is None and desired_thumb_frame_index != 0 :
2023-03-08 21:52:17 +00:00
if renderer is not None :
renderer . Stop ( )
# try first frame instead
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
try :
2023-11-01 21:38:03 +00:00
renderer = HydrusVideoHandling . VideoRendererFFMPEG ( path , mime , duration , num_frames , target_resolution )
2023-03-08 21:52:17 +00:00
numpy_image = renderer . read_frame ( )
except Exception as e :
2024-01-10 21:27:29 +00:00
message = ' Problem generating thumbnail for " {} " at first frame--FFMPEG could not render it. ' . format ( path )
PrintMoreThumbErrorInfo ( e , message , extra_description = extra_description )
2023-03-08 21:52:17 +00:00
numpy_image = None
if numpy_image is None :
2023-02-15 21:26:44 +00:00
2024-03-16 20:35:32 +00:00
thumbnail_numpy = GenerateDefaultThumbnail ( mime , target_resolution )
2023-02-15 21:26:44 +00:00
else :
2023-09-23 19:13:21 +00:00
thumbnail_numpy = HydrusImageHandling . ResizeNumPyImage ( numpy_image , target_resolution ) # just in case ffmpeg doesn't deliver right
2023-02-15 21:26:44 +00:00
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
if renderer is not None :
renderer . Stop ( )
2021-11-03 20:49:56 +00:00
2015-11-25 22:00:57 +00:00
2023-09-23 19:13:21 +00:00
return thumbnail_numpy
2019-03-27 22:01:02 +00:00
2015-03-25 22:04:19 +00:00
def GetExtraHashesFromPath ( path ) :
h_md5 = hashlib . md5 ( )
h_sha1 = hashlib . sha1 ( )
h_sha512 = hashlib . sha512 ( )
with open ( path , ' rb ' ) as f :
2015-11-04 22:30:28 +00:00
for block in HydrusPaths . ReadFileLikeAsBlocks ( f ) :
2015-03-25 22:04:19 +00:00
h_md5 . update ( block )
h_sha1 . update ( block )
h_sha512 . update ( block )
md5 = h_md5 . digest ( )
sha1 = h_sha1 . digest ( )
sha512 = h_sha512 . digest ( )
return ( md5 , sha1 , sha512 )
2023-08-23 20:43:26 +00:00
2019-07-10 22:38:30 +00:00
def GetFileInfo ( path , mime = None , ok_to_look_for_hydrus_updates = False ) :
2013-07-17 20:56:13 +00:00
2016-04-14 01:54:29 +00:00
size = os . path . getsize ( path )
2013-07-17 20:56:13 +00:00
2017-07-19 21:21:41 +00:00
if size == 0 :
2022-01-05 22:15:56 +00:00
raise HydrusExceptions . ZeroSizeFileException ( ' File is of zero length! ' )
2017-07-19 21:21:41 +00:00
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
if mime is None :
2019-07-10 22:38:30 +00:00
mime = GetMime ( path , ok_to_look_for_hydrus_updates = ok_to_look_for_hydrus_updates )
2017-10-04 17:51:58 +00:00
2013-07-17 20:56:13 +00:00
2017-03-08 23:23:12 +00:00
if mime not in HC . ALLOWED_MIMES :
2018-09-05 20:52:32 +00:00
if mime == HC . TEXT_HTML :
2020-05-27 21:27:52 +00:00
raise HydrusExceptions . UnsupportedFileException ( ' Looks like HTML -- maybe the client needs to be taught how to parse this? ' )
2018-09-05 20:52:32 +00:00
elif mime == HC . APPLICATION_UNKNOWN :
2020-05-27 21:27:52 +00:00
raise HydrusExceptions . UnsupportedFileException ( ' Unknown filetype! ' )
2018-09-05 20:52:32 +00:00
else :
2020-05-27 21:27:52 +00:00
raise HydrusExceptions . UnsupportedFileException ( ' Filetype is not permitted! ' )
2018-09-05 20:52:32 +00:00
2017-03-08 23:23:12 +00:00
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
if mime in HC . PIL_HEIF_MIMES and not HydrusImageHandling . HEIF_OK :
raise HydrusExceptions . UnsupportedFileException ( ' Sorry, you need the pillow-heif library to support this filetype ( {} )! Please rebuild your venv. ' . format ( HC . mime_string_lookup [ mime ] ) )
2013-07-17 20:56:13 +00:00
width = None
height = None
duration = None
num_frames = None
num_words = None
2021-08-18 21:10:01 +00:00
if mime in HC . MIMES_THAT_DEFINITELY_HAVE_AUDIO :
has_audio = True
else :
has_audio = False
2023-08-16 20:46:51 +00:00
# keep this in the specific-first, general-last test order
2023-11-29 22:27:53 +00:00
if mime == HC . APPLICATION_CBZ :
( os_file_handle , temp_path ) = HydrusTemp . GetTempPath ( )
try :
HydrusArchiveHandling . ExtractCoverPage ( path , temp_path )
cover_mime = GetMime ( temp_path )
( width , height ) = HydrusImageHandling . GetImageResolution ( temp_path , cover_mime )
except :
2024-03-16 20:20:09 +00:00
( width , height ) = ( None , None )
2023-11-29 22:27:53 +00:00
finally :
HydrusTemp . CleanUpTempPath ( os_file_handle , temp_path )
elif mime == HC . APPLICATION_CLIP :
2021-11-03 20:49:56 +00:00
2021-11-10 21:53:57 +00:00
( ( width , height ) , duration , num_frames ) = HydrusClipHandling . GetClipProperties ( path )
2021-11-03 20:49:56 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC . APPLICATION_KRITA :
2023-09-06 19:49:46 +00:00
try :
( width , height ) = HydrusKritaHandling . GetKraProperties ( path )
except HydrusExceptions . NoResolutionFileException :
pass
2023-08-26 19:05:45 +00:00
elif mime == HC . APPLICATION_PROCREATE :
2023-09-06 19:49:46 +00:00
try :
( width , height ) = HydrusProcreateHandling . GetProcreateResolution ( path )
except :
pass
2023-07-19 21:38:23 +00:00
2023-07-08 18:35:49 +00:00
elif mime == HC . IMAGE_SVG :
2023-07-19 21:38:23 +00:00
2023-09-06 19:49:46 +00:00
try :
( width , height ) = HydrusSVGHandling . GetSVGResolution ( path )
except HydrusExceptions . NoResolutionFileException :
pass
2023-09-02 19:36:17 +00:00
elif mime == HC . APPLICATION_PDF :
2023-09-06 19:49:46 +00:00
try :
( num_words , ( width , height ) ) = HydrusPDFHandling . GetPDFInfo ( path )
except HydrusExceptions . LimitedSupportFileException :
pass
2023-07-19 21:38:23 +00:00
2024-03-16 20:35:32 +00:00
elif mime == HC . APPLICATION_PPTX :
2013-07-17 20:56:13 +00:00
2024-03-16 20:35:32 +00:00
try :
( num_words , ( width , height ) ) = HydrusOfficeOpenXMLHandling . GetPPTXInfo ( path )
except HydrusExceptions . LimitedSupportFileException :
pass
2013-07-17 20:56:13 +00:00
2017-06-28 20:23:21 +00:00
2024-03-16 20:35:32 +00:00
elif mime == HC . APPLICATION_DOCX :
try :
( num_words ) = HydrusOfficeOpenXMLHandling . GetDOCXInfo ( path )
except HydrusExceptions . LimitedSupportFileException :
pass
elif mime == HC . APPLICATION_FLASH :
( ( width , height ) , duration , num_frames ) = HydrusFlashHandling . GetFlashProperties ( path )
2017-06-28 20:23:21 +00:00
2019-03-20 21:22:10 +00:00
elif mime == HC . APPLICATION_PSD :
2023-09-06 19:49:46 +00:00
2023-08-30 16:27:55 +00:00
try :
( width , height ) = HydrusPSDHandling . GetPSDResolution ( path )
2023-11-22 22:29:14 +00:00
2023-08-30 16:27:55 +00:00
except Exception as e :
2023-11-22 22:29:14 +00:00
2023-08-30 16:27:55 +00:00
HydrusData . Print ( ' Problem calculating resolution for " {} " : ' . format ( path ) )
HydrusData . PrintException ( e )
HydrusData . Print ( ' Attempting PSD resolution fallback ' )
( width , height ) = HydrusPSDHandling . GetPSDResolutionFallback ( path )
2023-11-22 22:29:14 +00:00
2019-03-20 21:22:10 +00:00
2023-08-09 21:12:17 +00:00
elif mime in HC . VIDEO or mime in HC . HEIF_TYPE_SEQUENCES :
2021-11-24 21:59:58 +00:00
( ( width , height ) , duration , num_frames , has_audio ) = HydrusVideoHandling . GetFFMPEGVideoProperties ( path )
2023-11-29 22:27:53 +00:00
elif mime in HC . VIEWABLE_ANIMATIONS :
2023-08-16 20:46:51 +00:00
( ( width , height ) , duration , num_frames ) = HydrusAnimationHandling . GetAnimationProperties ( path , mime )
2023-11-29 22:27:53 +00:00
elif mime == HC . ANIMATION_UGOIRA :
( ( width , height ) , num_frames ) = HydrusUgoiraHandling . GetUgoiraProperties ( path )
2023-08-16 20:46:51 +00:00
elif mime in HC . IMAGES :
2023-09-06 19:49:46 +00:00
( width , height ) = HydrusImageHandling . GetImageResolution ( path , mime )
2023-08-16 20:46:51 +00:00
2017-12-06 22:06:56 +00:00
elif mime in HC . AUDIO :
2017-06-28 20:23:21 +00:00
2017-12-06 22:06:56 +00:00
ffmpeg_lines = HydrusVideoHandling . GetFFMPEGInfoLines ( path )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
( file_duration_in_s , stream_duration_in_s ) = HydrusVideoHandling . ParseFFMPEGDuration ( ffmpeg_lines )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
duration = int ( file_duration_in_s * 1000 )
2017-06-28 20:23:21 +00:00
2013-07-17 20:56:13 +00:00
2018-03-22 00:03:33 +00:00
if width is not None and width < 0 :
width * = - 1
if height is not None and height < 0 :
width * = - 1
if duration is not None and duration < 0 :
duration * = - 1
if num_frames is not None and num_frames < 0 :
num_frames * = - 1
if num_words is not None and num_words < 0 :
num_words * = - 1
2019-08-07 22:59:53 +00:00
return ( size , mime , width , height , duration , num_frames , has_audio , num_words )
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
2024-01-17 18:57:00 +00:00
def GetFileModifiedTimestampMS ( path ) - > int :
2019-09-25 21:34:18 +00:00
2024-01-17 18:57:00 +00:00
return HydrusTime . MillisecondiseS ( os . path . getmtime ( path ) )
2019-09-25 21:34:18 +00:00
2023-08-09 21:12:17 +00:00
2013-08-07 22:25:18 +00:00
def GetHashFromPath ( path ) :
h = hashlib . sha256 ( )
2013-08-14 20:21:49 +00:00
with open ( path , ' rb ' ) as f :
2013-08-07 22:25:18 +00:00
2017-07-19 21:21:41 +00:00
for block in HydrusPaths . ReadFileLikeAsBlocks ( f ) :
h . update ( block )
2013-08-07 22:25:18 +00:00
2014-11-12 23:33:13 +00:00
return h . digest ( )
2023-08-09 21:12:17 +00:00
2023-10-11 20:46:40 +00:00
# TODO: replace this with a FileTypeChecker class or something that tucks all this messy data away more neatly
2023-10-11 21:20:32 +00:00
# do this the next time you visit this place
2023-10-07 19:14:35 +00:00
headers_and_mime = [
( ( ( [ 0 ] , [ b ' \xff \xd8 ' ] ) , ) , HC . IMAGE_JPEG ) ,
( ( ( [ 0 ] , [ b ' \x89 PNG ' ] ) , ) , HC . UNDETERMINED_PNG ) ,
( ( ( [ 0 ] , [ b ' GIF87a ' , b ' GIF89a ' ] ) , ) , HC . UNDETERMINED_GIF ) ,
( ( ( [ 8 ] , [ b ' WEBP ' ] ) , ) , HC . IMAGE_WEBP ) ,
( ( ( [ 0 ] , [ b ' II* \x00 ' , b ' MM \x00 * ' ] ) , ) , HC . IMAGE_TIFF ) ,
( ( ( [ 0 ] , [ b ' BM ' ] ) , ) , HC . IMAGE_BMP ) ,
( ( ( [ 0 ] , [ b ' \x00 \x00 \x01 \x00 ' , b ' \x00 \x00 \x02 \x00 ' ] ) , ) , HC . IMAGE_ICON ) ,
( ( ( [ 0 ] , [ b ' qoif ' ] ) , ) , HC . IMAGE_QOI ) ,
( ( ( [ 0 ] , [ b ' CWS ' , b ' FWS ' , b ' ZWS ' ] ) , ) , HC . APPLICATION_FLASH ) ,
( ( ( [ 0 ] , [ b ' FLV ' ] ) , ) , HC . VIDEO_FLV ) ,
( ( ( [ 0 ] , [ b ' % PDF ' ] ) , ) , HC . APPLICATION_PDF ) ,
( ( ( [ 0 ] , [ b ' 8BPS \x00 \x01 ' , b ' 8BPS \x00 \x02 ' ] ) , ) , HC . APPLICATION_PSD ) ,
( ( ( [ 0 ] , [ b ' CSFCHUNK ' ] ) , ) , HC . APPLICATION_CLIP ) ,
( ( ( [ 0 ] , [ b ' SAI-CANVAS ' ] ) , ) , HC . APPLICATION_SAI2 ) ,
( ( ( [ 0 ] , [ b ' gimp xcf ' ] ) , ) , HC . APPLICATION_XCF ) ,
( ( ( [ 38 , 42 , 58 , 63 ] , [ b ' application/x-krita ' ] ) , ) , HC . APPLICATION_KRITA ) , # important this comes before zip files because this is also a zip file
( ( ( [ 38 , 43 ] , [ b ' application/epub+zip ' ] ) , ) , HC . APPLICATION_EPUB ) ,
( ( ( [ 4 ] , [ b ' FORM ' ] ) , ( [ 12 ] , [ b ' DJVU ' , b ' DJVM ' , b ' PM44 ' , b ' BM44 ' , b ' SDJV ' ] ) , ) , HC . APPLICATION_DJVU ) ,
2023-12-23 00:19:38 +00:00
( ( ( [ 0 ] , [ b ' { \\ rtf ' ] ) , ) , HC . APPLICATION_RTF ) ,
2023-10-07 19:14:35 +00:00
( ( ( [ 0 ] , [ b ' PK \x03 \x04 ' , b ' PK \x05 \x06 ' , b ' PK \x07 \x08 ' ] ) , ) , HC . APPLICATION_ZIP ) ,
( ( ( [ 0 ] , [ b ' 7z \xBC \xAF \x27 \x1C ' ] ) , ) , HC . APPLICATION_7Z ) ,
( ( ( [ 0 ] , [ b ' \x52 \x61 \x72 \x21 \x1A \x07 \x00 ' , b ' \x52 \x61 \x72 \x21 \x1A \x07 \x01 \x00 ' ] ) , ) , HC . APPLICATION_RAR ) ,
( ( ( [ 0 ] , [ b ' \x1f \x8b ' ] ) , ) , HC . APPLICATION_GZIP ) ,
( ( ( [ 0 ] , [ b ' hydrus encrypted zip ' ] ) , ) , HC . APPLICATION_HYDRUS_ENCRYPTED_ZIP ) ,
( ( ( [ 4 ] , [ b ' ftypavif ' ] ) , ) , HC . IMAGE_AVIF ) ,
( ( ( [ 4 ] , [ b ' ftypavis ' ] ) , ) , HC . IMAGE_AVIF_SEQUENCE ) ,
( ( ( [ 4 ] , [ b ' ftypmif1 ' ] ) , ( [ 16 , 20 , 24 ] , [ b ' avif ' ] ) , ) , HC . IMAGE_AVIF ) ,
( ( ( [ 4 ] , [ b ' ftypheic ' , b ' ftypheix ' , b ' ftypheim ' , b ' ftypheis ' ] ) , ) , HC . IMAGE_HEIC ) ,
( ( ( [ 4 ] , [ b ' ftyphevc ' , b ' ftyphevx ' , b ' ftyphevm ' , b ' ftyphevs ' ] ) , ) , HC . IMAGE_HEIC_SEQUENCE ) ,
( ( ( [ 4 ] , [ b ' ftypmif1 ' ] ) , ) , HC . IMAGE_HEIF ) ,
( ( ( [ 4 ] , [ b ' ftypmsf1 ' ] ) , ) , HC . IMAGE_HEIF_SEQUENCE ) ,
( ( ( [ 4 ] , [ b ' ftypmp4 ' , b ' ftypisom ' , b ' ftypM4V ' , b ' ftypMSNV ' , b ' ftypavc1 ' , b ' ftypavc1 ' , b ' ftypFACE ' , b ' ftypdash ' ] ) , ) , HC . UNDETERMINED_MP4 ) ,
( ( ( [ 4 ] , [ b ' ftypqt ' ] ) , ) , HC . VIDEO_MOV ) ,
( ( ( [ 0 ] , [ b ' fLaC ' ] ) , ) , HC . AUDIO_FLAC ) ,
2023-10-11 20:46:40 +00:00
( ( ( [ 0 ] , [ b ' RIFF ' ] ) , ( [ 8 ] , [ b ' WAVE ' ] ) ) , HC . AUDIO_WAVE ) ,
2023-10-07 19:14:35 +00:00
( ( ( [ 0 ] , [ b ' wvpk ' ] ) , ) , HC . AUDIO_WAVPACK ) ,
( ( ( [ 8 ] , [ b ' AVI ' ] ) , ) , HC . VIDEO_AVI ) ,
( ( ( [ 0 ] , [ b ' \x30 \x26 \xB2 \x75 \x8E \x66 \xCF \x11 \xA6 \xD9 \x00 \xAA \x00 \x62 \xCE \x6C ' ] ) , ) , HC . UNDETERMINED_WM ) ,
( ( ( [ 0 ] , [ b ' \x4D \x5A \x90 \x00 \x03 ' ] , ) , ) , HC . APPLICATION_WINDOWS_EXE )
]
2023-10-11 21:26:01 +00:00
def passes_offsets_and_headers_pair ( offsets , headers , first_bytes_of_file ) - > bool :
# TODO: rewrite this garbage
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
for offset in offsets :
2023-10-11 21:20:32 +00:00
2023-10-11 21:26:01 +00:00
for header in headers :
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
if first_bytes_of_file [ offset : offset + len ( header ) ] == header :
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
return True
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
return False
def passes_offsets_and_headers ( offsets_and_headers , first_bytes_of_file ) - > bool :
# ok we need to match every pair here
for ( offsets , headers ) in offsets_and_headers :
if not passes_offsets_and_headers_pair ( offsets , headers , first_bytes_of_file ) :
2023-10-11 21:20:32 +00:00
return False
2023-10-11 20:46:40 +00:00
2023-10-11 21:20:32 +00:00
return True
2023-10-11 20:46:40 +00:00
2023-10-07 19:14:35 +00:00
2019-07-10 22:38:30 +00:00
def GetMime ( path , ok_to_look_for_hydrus_updates = False ) :
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
size = os . path . getsize ( path )
if size == 0 :
2022-01-05 22:15:56 +00:00
raise HydrusExceptions . ZeroSizeFileException ( ' File is of zero length! ' )
2017-10-04 17:51:58 +00:00
2022-06-22 20:43:12 +00:00
if ok_to_look_for_hydrus_updates and size < 64 * 1024 * 1024 :
with open ( path , ' rb ' ) as f :
update_network_bytes = f . read ( )
try :
update = HydrusSerialisable . CreateFromNetworkBytes ( update_network_bytes )
if isinstance ( update , HydrusNetwork . ContentUpdate ) :
return HC . APPLICATION_HYDRUS_UPDATE_CONTENT
elif isinstance ( update , HydrusNetwork . DefinitionsUpdate ) :
return HC . APPLICATION_HYDRUS_UPDATE_DEFINITIONS
except :
pass
2013-08-14 20:21:49 +00:00
with open ( path , ' rb ' ) as f :
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
first_bytes_of_file = f . read ( 256 )
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
for ( offsets_and_headers , mime ) in headers_and_mime :
2013-08-07 22:25:18 +00:00
2023-10-11 20:46:40 +00:00
if passes_offsets_and_headers ( offsets_and_headers , first_bytes_of_file ) :
2013-08-14 20:21:49 +00:00
2023-07-19 20:38:06 +00:00
if mime == HC . APPLICATION_ZIP :
2023-09-27 21:12:55 +00:00
2024-01-03 21:21:53 +00:00
try :
if HydrusArchiveHandling . IsEncryptedZip ( path ) :
return HC . APPLICATION_ZIP
except HydrusExceptions . DamagedOrUnusualFileException :
2023-12-06 22:13:50 +00:00
return HC . APPLICATION_ZIP
2023-09-23 19:21:26 +00:00
opendoc_mime = HydrusArchiveHandling . MimeFromOpenDocument ( path )
if opendoc_mime is not None :
2023-09-27 21:12:55 +00:00
2023-09-23 19:21:26 +00:00
return opendoc_mime
2023-09-27 21:12:55 +00:00
2023-07-19 20:38:06 +00:00
2024-03-16 20:35:32 +00:00
microsoft_mime = HydrusOfficeOpenXMLHandling . MimeFromMicrosoftOpenXMLDocument ( path )
2024-03-13 20:33:53 +00:00
if microsoft_mime is not None :
return microsoft_mime
2023-09-23 19:21:26 +00:00
if HydrusProcreateHandling . ZipLooksLikeProcreate ( path ) :
2023-09-27 21:12:55 +00:00
2023-08-26 19:05:45 +00:00
return HC . APPLICATION_PROCREATE
2023-07-19 20:38:06 +00:00
2023-09-27 21:12:55 +00:00
2023-11-29 22:27:53 +00:00
if HydrusUgoiraHandling . ZipLooksLikeUgoira ( path ) :
return HC . ANIMATION_UGOIRA
if HydrusArchiveHandling . ZipLooksLikeCBZ ( path ) :
return HC . APPLICATION_CBZ
2023-09-23 19:21:26 +00:00
return HC . APPLICATION_ZIP
2023-07-19 20:38:06 +00:00
2022-03-02 21:44:08 +00:00
if mime in ( HC . UNDETERMINED_WM , HC . UNDETERMINED_MP4 ) :
2013-08-14 20:21:49 +00:00
2022-03-02 21:44:08 +00:00
return HydrusVideoHandling . GetMime ( path )
2013-08-14 20:21:49 +00:00
2016-02-24 21:42:54 +00:00
elif mime == HC . UNDETERMINED_PNG :
2023-08-09 21:12:17 +00:00
if HydrusAnimationHandling . IsPNGAnimated ( first_bytes_of_file ) :
2017-06-28 20:23:21 +00:00
2023-08-16 20:46:51 +00:00
return HC . ANIMATION_APNG
2017-06-28 20:23:21 +00:00
else :
return HC . IMAGE_PNG
2016-02-24 21:42:54 +00:00
2023-08-16 20:46:51 +00:00
elif mime == HC . UNDETERMINED_GIF :
if HydrusAnimationHandling . PILAnimationHasDuration ( path ) :
return HC . ANIMATION_GIF
else :
return HC . IMAGE_GIF
2016-02-24 21:42:54 +00:00
else :
return mime
2013-08-14 20:21:49 +00:00
2013-07-17 20:56:13 +00:00
2023-07-26 20:57:00 +00:00
# If the file starts with '{' it is probably JSON
# but we can't know for sure so we send it over to be checked
2023-08-09 21:12:17 +00:00
if first_bytes_of_file . startswith ( b ' { ' ) or first_bytes_of_file . startswith ( b ' [ ' ) :
2023-07-26 20:57:00 +00:00
with open ( path , ' rb ' ) as f :
2023-08-09 21:12:17 +00:00
potential_json_document_bytes = f . read ( )
if HydrusText . LooksLikeJSON ( potential_json_document_bytes ) :
2023-07-26 20:57:00 +00:00
return HC . APPLICATION_JSON
2023-08-09 21:12:17 +00:00
if HydrusText . LooksLikeHTML ( first_bytes_of_file ) :
2022-06-22 20:43:12 +00:00
return HC . TEXT_HTML
2023-07-19 20:38:06 +00:00
2023-08-09 21:12:17 +00:00
if HydrusText . LooksLikeSVG ( first_bytes_of_file ) :
2023-07-03 16:27:14 +00:00
return HC . IMAGE_SVG
2023-07-19 20:38:06 +00:00
2022-06-22 20:43:12 +00:00
2023-09-20 19:58:17 +00:00
# it is important this goes at the end, because ffmpeg has a billion false positives! and it takes CPU to true negative
2022-06-22 20:43:12 +00:00
# for instance, it once thought some hydrus update files were mpegs
2023-09-20 19:58:17 +00:00
# it also thinks txt files can be mpegs
likely_to_false_positive = True in ( path . endswith ( ext ) for ext in ( ' .txt ' , ' .log ' , ' .json ' ) )
if not likely_to_false_positive :
2014-04-30 21:31:40 +00:00
2023-09-20 19:58:17 +00:00
try :
2017-01-04 22:48:23 +00:00
2023-09-20 19:58:17 +00:00
mime = HydrusVideoHandling . GetMime ( path )
if mime != HC . APPLICATION_UNKNOWN :
return mime
except HydrusExceptions . UnsupportedFileException :
pass
except Exception as e :
HydrusData . Print ( ' FFMPEG had trouble with: ' + path )
HydrusData . PrintException ( e , do_wait = False )
2017-01-04 22:48:23 +00:00
2016-08-17 20:07:22 +00:00
2014-04-30 21:31:40 +00:00
2013-08-07 22:25:18 +00:00
return HC . APPLICATION_UNKNOWN
2023-10-11 20:46:40 +00:00
2023-10-07 19:14:35 +00:00
2023-10-11 20:46:40 +00:00
headers_and_mime_thumbnails = [ ( offsets_and_headers , mime ) for ( offsets_and_headers , mime ) in headers_and_mime if mime in ( HC . IMAGE_JPEG , HC . IMAGE_PNG ) ]
2023-10-07 19:14:35 +00:00
2022-04-06 20:40:17 +00:00
def GetThumbnailMime ( path ) :
with open ( path , ' rb ' ) as f :
bit_to_check = f . read ( 256 )
for ( offsets_and_headers , mime ) in headers_and_mime_thumbnails :
2023-10-11 20:46:40 +00:00
if passes_offsets_and_headers ( offsets_and_headers , bit_to_check ) :
2022-04-06 20:40:17 +00:00
return mime
2023-10-11 20:46:40 +00:00
return GetMime ( path )