hydrus/hydrus/core/files/HydrusFileHandling.py

925 lines
31 KiB
Python
Raw Normal View History

2024-01-31 21:20:50 +00:00
import collections
2013-08-07 22:25:18 +00:00
import hashlib
2020-07-29 20:52:44 +00:00
import os
2024-01-10 21:27:29 +00:00
import typing
2020-07-29 20:52:44 +00:00
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusTemp
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusText
2024-01-17 18:57:00 +00:00
from hydrus.core import HydrusTime
2024-01-03 21:21:53 +00:00
from hydrus.core.files import HydrusAnimationHandling
from hydrus.core.files import HydrusArchiveHandling
from hydrus.core.files import HydrusClipHandling
from hydrus.core.files import HydrusFlashHandling
from hydrus.core.files import HydrusKritaHandling
from hydrus.core.files import HydrusPDFHandling
from hydrus.core.files import HydrusProcreateHandling
from hydrus.core.files import HydrusPSDHandling
from hydrus.core.files import HydrusSVGHandling
from hydrus.core.files import HydrusUgoiraHandling
from hydrus.core.files import HydrusVideoHandling
from hydrus.core.files import HydrusOfficeOpenXMLHandling
2024-01-03 21:21:53 +00:00
from hydrus.core.files.images import HydrusImageHandling
2021-04-07 21:26:45 +00:00
from hydrus.core.networking import HydrusNetwork
2013-07-17 20:56:13 +00:00
2022-12-07 22:41:53 +00:00
try:
import speedcopy
speedcopy.patch_copyfile()
SPEEDCOPY_OK = True
except Exception as e:
if not isinstance( e, ImportError ):
HydrusData.Print( 'Failed to initialise speedcopy:' )
HydrusData.PrintException( e )
SPEEDCOPY_OK = False
2024-01-31 21:20:50 +00:00
mimes_to_default_thumbnail_paths = collections.defaultdict( lambda: os.path.join( HC.STATIC_DIR, 'hydrus.png' ) )
for mime in HC.AUDIO:
mimes_to_default_thumbnail_paths[ mime ] = os.path.join( os.path.join( HC.STATIC_DIR, 'audio.png' ) )
for mime in HC.VIDEO:
mimes_to_default_thumbnail_paths[ mime ] = os.path.join( os.path.join( HC.STATIC_DIR, 'video.png' ) )
for mime in HC.ANIMATIONS:
mimes_to_default_thumbnail_paths[ mime ] = os.path.join( os.path.join( HC.STATIC_DIR, 'video.png' ) )
for mime in HC.ARCHIVES:
mimes_to_default_thumbnail_paths[ mime ] = png_path = os.path.join( HC.STATIC_DIR, 'zip.png' )
for mime in HC.IMAGES:
mimes_to_default_thumbnail_paths[ mime ] = png_path = os.path.join( HC.STATIC_DIR, 'image.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_UNKNOWN ] = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PDF ] = os.path.join( HC.STATIC_DIR, 'pdf.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_DOCX ] = os.path.join( HC.STATIC_DIR, 'docx.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_XLSX ] = os.path.join( HC.STATIC_DIR, 'xlsx.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PPTX ] = os.path.join( HC.STATIC_DIR, 'pptx.png' )
2024-01-31 21:20:50 +00:00
mimes_to_default_thumbnail_paths[ HC.APPLICATION_EPUB ] = os.path.join( HC.STATIC_DIR, 'epub.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_DJVU ] = os.path.join( HC.STATIC_DIR, 'djvu.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PSD ] = os.path.join( HC.STATIC_DIR, 'psd.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_CLIP ] = os.path.join( HC.STATIC_DIR, 'clip.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_SAI2 ] = os.path.join( HC.STATIC_DIR, 'sai.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_KRITA ] = os.path.join( HC.STATIC_DIR, 'krita.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_FLASH ] = os.path.join( HC.STATIC_DIR, 'flash.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_XCF ] = os.path.join( HC.STATIC_DIR, 'xcf.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PROCREATE ] = os.path.join( HC.STATIC_DIR, 'procreate.png' )
mimes_to_default_thumbnail_paths[ HC.APPLICATION_RTF ] = os.path.join( HC.STATIC_DIR, 'rtf.png' )
mimes_to_default_thumbnail_paths[ HC.IMAGE_SVG ] = os.path.join( HC.STATIC_DIR, 'svg.png' )
2015-03-25 22:04:19 +00:00
def GenerateDefaultThumbnail( mime: int, target_resolution: typing.Tuple[ int, int ] ):
thumb_path = mimes_to_default_thumbnail_paths[mime]
return HydrusImageHandling.GenerateDefaultThumbnailNumPyFromPath( thumb_path, target_resolution )
def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames, percentage_in = 35 ):
2014-05-21 21:37:35 +00:00
thumbnail_numpy = GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, percentage_in = percentage_in )
2023-09-27 21:12:55 +00:00
return HydrusImageHandling.GenerateThumbnailBytesFromNumPy( thumbnail_numpy )
2024-01-10 21:27:29 +00:00
def PrintMoreThumbErrorInfo( e: Exception, message, extra_description: typing.Optional[ str ] = None ):
if not isinstance( e, HydrusExceptions.NoThumbnailFileException ):
HydrusData.Print( message )
if extra_description is not None:
HydrusData.Print( f'Extra info: {extra_description}' )
HydrusData.PrintException( e )
def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, percentage_in = 35, extra_description = None ):
2023-09-27 21:12:55 +00:00
2023-11-29 22:27:53 +00:00
if mime == HC.APPLICATION_CBZ:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
2014-05-21 21:37:35 +00:00
2021-07-28 21:12:00 +00:00
try:
2023-11-29 22:27:53 +00:00
HydrusArchiveHandling.ExtractCoverPage( path, temp_path )
2021-07-28 21:12:00 +00:00
2023-11-29 22:27:53 +00:00
cover_mime = GetMime( temp_path )
2023-09-06 19:49:46 +00:00
2023-11-29 22:27:53 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, cover_mime )
2023-08-16 20:46:51 +00:00
2024-01-10 21:27:29 +00:00
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-11-29 22:27:53 +00:00
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2021-07-28 21:12:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_CLIP:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusClipHandling.ExtractDBPNGToPath( path, temp_path )
2014-05-21 21:37:35 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
2014-05-21 21:37:35 +00:00
2024-01-10 21:27:29 +00:00
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
2014-05-21 21:37:35 +00:00
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2018-06-06 21:27:02 +00:00
2021-11-03 20:49:56 +00:00
finally:
2018-05-30 20:13:21 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-07-26 20:57:00 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC.APPLICATION_KRITA:
try:
thumbnail_numpy = HydrusKritaHandling.GenerateThumbnailNumPyFromKraPath( path, target_resolution )
2023-10-18 20:31:50 +00:00
2023-07-06 08:14:19 +00:00
except Exception as e:
2023-10-18 20:31:50 +00:00
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-07-06 08:14:19 +00:00
2023-09-27 21:12:55 +00:00
elif mime == HC.APPLICATION_PROCREATE:
2023-09-06 19:49:46 +00:00
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusProcreateHandling.ExtractZippedThumbnailToPath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
except Exception as e:
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-09-06 19:49:46 +00:00
2023-11-29 22:27:53 +00:00
elif mime == HC.APPLICATION_PSD:
try:
thumbnail_numpy = HydrusPSDHandling.GenerateThumbnailNumPyFromPSDPath( path, target_resolution )
except Exception as e:
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
2023-11-29 22:27:53 +00:00
HydrusData.Print( 'Attempting ffmpeg PSD thumbnail fallback' )
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath( suffix = '.png' )
try:
HydrusVideoHandling.RenderImageToImagePath( path, temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
except Exception as e:
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Secondary problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-11-29 22:27:53 +00:00
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-07-08 18:35:49 +00:00
elif mime == HC.IMAGE_SVG:
2023-07-26 20:57:00 +00:00
2023-07-08 18:35:49 +00:00
try:
thumbnail_numpy = HydrusSVGHandling.GenerateThumbnailNumPyFromSVGPath( path, target_resolution )
2023-07-08 18:35:49 +00:00
except Exception as e:
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
2023-07-08 18:35:49 +00:00
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-09-06 19:49:46 +00:00
2023-11-15 22:40:54 +00:00
elif mime == HC.APPLICATION_PDF:
try:
thumbnail_numpy = HydrusPDFHandling.GenerateThumbnailNumPyFromPDFPath( path, target_resolution )
except Exception as e:
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
elif mime == HC.APPLICATION_PPTX:
try:
thumbnail_numpy = HydrusOfficeOpenXMLHandling.GenerateThumbnailNumPyFromOfficePath( path, target_resolution )
except Exception as e:
2018-05-30 20:13:21 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-07-26 20:57:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_FLASH:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
HydrusFlashHandling.RenderPageToFile( path, temp_path, 1 )
2015-11-25 22:00:57 +00:00
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, HC.IMAGE_PNG )
2021-11-03 20:49:56 +00:00
2024-01-10 21:27:29 +00:00
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
2021-11-03 20:49:56 +00:00
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2021-11-03 20:49:56 +00:00
finally:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2023-11-29 22:27:53 +00:00
elif mime in HC.IMAGES:
2023-08-16 20:46:51 +00:00
# TODO: it would be nice to have gif and apng generating their thumb x frames in, like with videos. maybe we should add animation thumb fetcher to hydrusanimationhandling
try:
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( path, target_resolution, mime )
2023-08-16 20:46:51 +00:00
except Exception as e:
2024-01-10 21:27:29 +00:00
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
2023-08-16 20:46:51 +00:00
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-08-16 20:46:51 +00:00
2023-11-29 22:27:53 +00:00
elif mime == HC.ANIMATION_UGOIRA:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
desired_thumb_frame_index = int( ( percentage_in / 100.0 ) * ( num_frames - 1 ) )
HydrusUgoiraHandling.ExtractFrame( path, desired_thumb_frame_index, temp_path )
cover_mime = GetMime( temp_path )
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( temp_path, target_resolution, cover_mime )
2024-01-10 21:27:29 +00:00
except Exception as e:
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
2023-11-29 22:27:53 +00:00
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-11-29 22:27:53 +00:00
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
else: # animations and video
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
renderer = None
2021-11-03 20:49:56 +00:00
2023-11-29 22:27:53 +00:00
desired_thumb_frame_index = int( ( percentage_in / 100.0 ) * ( num_frames - 1 ) )
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
try:
2019-03-06 23:06:22 +00:00
2023-11-29 22:27:53 +00:00
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution, start_pos = desired_thumb_frame_index )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = renderer.read_frame()
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
except Exception as e:
2023-02-15 21:26:44 +00:00
2024-01-10 21:27:29 +00:00
message = 'Problem generating thumbnail for "{}" at frame {} ({})--FFMPEG could not render it.'.format( path, desired_thumb_frame_index, HydrusData.ConvertFloatToPercentage( percentage_in / 100.0 ) )
PrintMoreThumbErrorInfo( e, message, extra_description = extra_description )
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
numpy_image = None
2019-03-06 23:06:22 +00:00
2014-05-21 21:37:35 +00:00
2023-11-29 22:27:53 +00:00
if numpy_image is None and desired_thumb_frame_index != 0:
2023-03-08 21:52:17 +00:00
if renderer is not None:
renderer.Stop()
# try first frame instead
2023-02-15 21:26:44 +00:00
2023-03-08 21:52:17 +00:00
try:
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution )
2023-03-08 21:52:17 +00:00
numpy_image = renderer.read_frame()
except Exception as e:
2024-01-10 21:27:29 +00:00
message = 'Problem generating thumbnail for "{}" at first frame--FFMPEG could not render it.'.format( path )
PrintMoreThumbErrorInfo( e, message, extra_description = extra_description )
2023-03-08 21:52:17 +00:00
numpy_image = None
if numpy_image is None:
2023-02-15 21:26:44 +00:00
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
2023-02-15 21:26:44 +00:00
else:
thumbnail_numpy = HydrusImageHandling.ResizeNumPyImage( numpy_image, target_resolution ) # just in case ffmpeg doesn't deliver right
2023-02-15 21:26:44 +00:00
2021-11-03 20:49:56 +00:00
2023-03-08 21:52:17 +00:00
if renderer is not None:
renderer.Stop()
2021-11-03 20:49:56 +00:00
2015-11-25 22:00:57 +00:00
return thumbnail_numpy
2019-03-27 22:01:02 +00:00
2015-03-25 22:04:19 +00:00
def GetExtraHashesFromPath( path ):
h_md5 = hashlib.md5()
h_sha1 = hashlib.sha1()
h_sha512 = hashlib.sha512()
with open( path, 'rb' ) as f:
2015-11-04 22:30:28 +00:00
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
2015-03-25 22:04:19 +00:00
h_md5.update( block )
h_sha1.update( block )
h_sha512.update( block )
md5 = h_md5.digest()
sha1 = h_sha1.digest()
sha512 = h_sha512.digest()
return ( md5, sha1, sha512 )
2023-08-23 20:43:26 +00:00
2019-07-10 22:38:30 +00:00
def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ):
2013-07-17 20:56:13 +00:00
2016-04-14 01:54:29 +00:00
size = os.path.getsize( path )
2013-07-17 20:56:13 +00:00
2017-07-19 21:21:41 +00:00
if size == 0:
raise HydrusExceptions.ZeroSizeFileException( 'File is of zero length!' )
2017-07-19 21:21:41 +00:00
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
if mime is None:
2019-07-10 22:38:30 +00:00
mime = GetMime( path, ok_to_look_for_hydrus_updates = ok_to_look_for_hydrus_updates )
2017-10-04 17:51:58 +00:00
2013-07-17 20:56:13 +00:00
2017-03-08 23:23:12 +00:00
if mime not in HC.ALLOWED_MIMES:
2018-09-05 20:52:32 +00:00
if mime == HC.TEXT_HTML:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Looks like HTML -- maybe the client needs to be taught how to parse this?' )
2018-09-05 20:52:32 +00:00
elif mime == HC.APPLICATION_UNKNOWN:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Unknown filetype!' )
2018-09-05 20:52:32 +00:00
else:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Filetype is not permitted!' )
2018-09-05 20:52:32 +00:00
2017-03-08 23:23:12 +00:00
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
if mime in HC.PIL_HEIF_MIMES and not HydrusImageHandling.HEIF_OK:
raise HydrusExceptions.UnsupportedFileException( 'Sorry, you need the pillow-heif library to support this filetype ({})! Please rebuild your venv.'.format( HC.mime_string_lookup[ mime ] ) )
2013-07-17 20:56:13 +00:00
width = None
height = None
duration = None
num_frames = None
num_words = None
2021-08-18 21:10:01 +00:00
if mime in HC.MIMES_THAT_DEFINITELY_HAVE_AUDIO:
has_audio = True
else:
has_audio = False
2023-08-16 20:46:51 +00:00
# keep this in the specific-first, general-last test order
2023-11-29 22:27:53 +00:00
if mime == HC.APPLICATION_CBZ:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
HydrusArchiveHandling.ExtractCoverPage( path, temp_path )
cover_mime = GetMime( temp_path )
( width, height ) = HydrusImageHandling.GetImageResolution( temp_path, cover_mime )
except:
( width, height ) = ( None, None )
2023-11-29 22:27:53 +00:00
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
elif mime == HC.APPLICATION_CLIP:
2021-11-03 20:49:56 +00:00
2021-11-10 21:53:57 +00:00
( ( width, height ), duration, num_frames ) = HydrusClipHandling.GetClipProperties( path )
2021-11-03 20:49:56 +00:00
2023-07-06 08:14:19 +00:00
elif mime == HC.APPLICATION_KRITA:
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusKritaHandling.GetKraProperties( path )
except HydrusExceptions.NoResolutionFileException:
pass
elif mime == HC.APPLICATION_PROCREATE:
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusProcreateHandling.GetProcreateResolution( path )
except:
pass
2023-07-19 21:38:23 +00:00
2023-07-08 18:35:49 +00:00
elif mime == HC.IMAGE_SVG:
2023-07-19 21:38:23 +00:00
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusSVGHandling.GetSVGResolution( path )
except HydrusExceptions.NoResolutionFileException:
pass
elif mime == HC.APPLICATION_PDF:
2023-09-06 19:49:46 +00:00
try:
( num_words, ( width, height ) ) = HydrusPDFHandling.GetPDFInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
2023-07-19 21:38:23 +00:00
elif mime == HC.APPLICATION_PPTX:
2013-07-17 20:56:13 +00:00
try:
( num_words, ( width, height ) ) = HydrusOfficeOpenXMLHandling.GetPPTXInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
2013-07-17 20:56:13 +00:00
2017-06-28 20:23:21 +00:00
elif mime == HC.APPLICATION_DOCX:
try:
( num_words ) = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path )
except HydrusExceptions.LimitedSupportFileException:
pass
elif mime == HC.APPLICATION_FLASH:
( ( width, height ), duration, num_frames ) = HydrusFlashHandling.GetFlashProperties( path )
2017-06-28 20:23:21 +00:00
2019-03-20 21:22:10 +00:00
elif mime == HC.APPLICATION_PSD:
2023-09-06 19:49:46 +00:00
try:
( width, height ) = HydrusPSDHandling.GetPSDResolution( path )
2023-11-22 22:29:14 +00:00
except Exception as e:
2023-11-22 22:29:14 +00:00
HydrusData.Print( 'Problem calculating resolution for "{}":'.format( path ) )
HydrusData.PrintException( e )
HydrusData.Print( 'Attempting PSD resolution fallback' )
( width, height ) = HydrusPSDHandling.GetPSDResolutionFallback( path )
2023-11-22 22:29:14 +00:00
2019-03-20 21:22:10 +00:00
2023-08-09 21:12:17 +00:00
elif mime in HC.VIDEO or mime in HC.HEIF_TYPE_SEQUENCES:
2021-11-24 21:59:58 +00:00
( ( width, height ), duration, num_frames, has_audio ) = HydrusVideoHandling.GetFFMPEGVideoProperties( path )
2023-11-29 22:27:53 +00:00
elif mime in HC.VIEWABLE_ANIMATIONS:
2023-08-16 20:46:51 +00:00
( ( width, height ), duration, num_frames ) = HydrusAnimationHandling.GetAnimationProperties( path, mime )
2023-11-29 22:27:53 +00:00
elif mime == HC.ANIMATION_UGOIRA:
( ( width, height ), num_frames ) = HydrusUgoiraHandling.GetUgoiraProperties( path )
2023-08-16 20:46:51 +00:00
elif mime in HC.IMAGES:
2023-09-06 19:49:46 +00:00
( width, height ) = HydrusImageHandling.GetImageResolution( path, mime )
2023-08-16 20:46:51 +00:00
2017-12-06 22:06:56 +00:00
elif mime in HC.AUDIO:
2017-06-28 20:23:21 +00:00
2017-12-06 22:06:56 +00:00
ffmpeg_lines = HydrusVideoHandling.GetFFMPEGInfoLines( path )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
( file_duration_in_s, stream_duration_in_s ) = HydrusVideoHandling.ParseFFMPEGDuration( ffmpeg_lines )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
duration = int( file_duration_in_s * 1000 )
2017-06-28 20:23:21 +00:00
2013-07-17 20:56:13 +00:00
2018-03-22 00:03:33 +00:00
if width is not None and width < 0:
width *= -1
if height is not None and height < 0:
width *= -1
if duration is not None and duration < 0:
duration *= -1
if num_frames is not None and num_frames < 0:
num_frames *= -1
if num_words is not None and num_words < 0:
num_words *= -1
2019-08-07 22:59:53 +00:00
return ( size, mime, width, height, duration, num_frames, has_audio, num_words )
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
2024-01-17 18:57:00 +00:00
def GetFileModifiedTimestampMS( path ) -> int:
2019-09-25 21:34:18 +00:00
2024-01-17 18:57:00 +00:00
return HydrusTime.MillisecondiseS( os.path.getmtime( path ) )
2019-09-25 21:34:18 +00:00
2023-08-09 21:12:17 +00:00
2013-08-07 22:25:18 +00:00
def GetHashFromPath( path ):
h = hashlib.sha256()
2013-08-14 20:21:49 +00:00
with open( path, 'rb' ) as f:
2013-08-07 22:25:18 +00:00
2017-07-19 21:21:41 +00:00
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
h.update( block )
2013-08-07 22:25:18 +00:00
2014-11-12 23:33:13 +00:00
return h.digest()
2023-08-09 21:12:17 +00:00
2023-10-11 20:46:40 +00:00
# TODO: replace this with a FileTypeChecker class or something that tucks all this messy data away more neatly
2023-10-11 21:20:32 +00:00
# do this the next time you visit this place
headers_and_mime = [
( ( ( [0], [b'\xff\xd8'] ), ), HC.IMAGE_JPEG ),
( ( ( [0], [b'\x89PNG'] ), ), HC.UNDETERMINED_PNG ),
( ( ( [0], [b'GIF87a', b'GIF89a'] ), ), HC.UNDETERMINED_GIF ),
( ( ( [8], [b'WEBP'] ), ), HC.IMAGE_WEBP ),
( ( ( [0], [b'II*\x00', b'MM\x00*'] ), ), HC.IMAGE_TIFF ),
( ( ( [0], [b'BM'] ), ), HC.IMAGE_BMP ),
( ( ( [0], [b'\x00\x00\x01\x00', b'\x00\x00\x02\x00'] ), ), HC.IMAGE_ICON ),
( ( ( [0], [b'qoif'] ), ), HC.IMAGE_QOI ),
( ( ( [0], [b'CWS', b'FWS', b'ZWS'] ), ), HC.APPLICATION_FLASH ),
( ( ( [0], [b'FLV'] ), ), HC.VIDEO_FLV ),
( ( ( [0], [b'%PDF'] ), ), HC.APPLICATION_PDF ),
( ( ( [0], [b'8BPS\x00\x01', b'8BPS\x00\x02'] ), ), HC.APPLICATION_PSD ),
( ( ( [0], [b'CSFCHUNK'] ), ), HC.APPLICATION_CLIP ),
( ( ( [0], [b'SAI-CANVAS'] ), ), HC.APPLICATION_SAI2 ),
( ( ( [0], [b'gimp xcf '] ), ), HC.APPLICATION_XCF ),
( ( ( [38, 42, 58, 63],[ b'application/x-krita'] ), ), HC.APPLICATION_KRITA ), # important this comes before zip files because this is also a zip file
( ( ( [38, 43],[ b'application/epub+zip'] ), ), HC.APPLICATION_EPUB ),
( ( ( [4], [b'FORM'] ), ( [12], [b'DJVU', b'DJVM', b'PM44', b'BM44', b'SDJV'] ), ), HC.APPLICATION_DJVU ),
2023-12-23 00:19:38 +00:00
( ( ( [0], [b'{\\rtf'] ), ), HC.APPLICATION_RTF ),
( ( ( [0], [b'PK\x03\x04', b'PK\x05\x06', b'PK\x07\x08'] ), ), HC.APPLICATION_ZIP ),
( ( ( [0], [b'7z\xBC\xAF\x27\x1C'] ), ), HC.APPLICATION_7Z ),
( ( ( [0], [b'\x52\x61\x72\x21\x1A\x07\x00', b'\x52\x61\x72\x21\x1A\x07\x01\x00'] ), ), HC.APPLICATION_RAR ),
( ( ( [0], [b'\x1f\x8b'] ), ), HC.APPLICATION_GZIP ),
( ( ( [0], [b'hydrus encrypted zip'] ), ), HC.APPLICATION_HYDRUS_ENCRYPTED_ZIP ),
( ( ( [4], [b'ftypavif'] ), ), HC.IMAGE_AVIF ),
( ( ( [4], [b'ftypavis'] ), ), HC.IMAGE_AVIF_SEQUENCE ),
( ( ( [4], [b'ftypmif1'] ), ( [16, 20, 24], [b'avif'] ), ), HC.IMAGE_AVIF ),
( ( ( [4], [b'ftypheic', b'ftypheix', b'ftypheim', b'ftypheis'] ), ), HC.IMAGE_HEIC ),
( ( ( [4], [b'ftyphevc', b'ftyphevx', b'ftyphevm', b'ftyphevs'] ), ), HC.IMAGE_HEIC_SEQUENCE ),
( ( ( [4], [b'ftypmif1'] ), ), HC.IMAGE_HEIF ),
( ( ( [4], [b'ftypmsf1'] ), ), HC.IMAGE_HEIF_SEQUENCE ),
( ( ( [4], [b'ftypmp4', b'ftypisom', b'ftypM4V', b'ftypMSNV', b'ftypavc1', b'ftypavc1', b'ftypFACE', b'ftypdash'] ), ), HC.UNDETERMINED_MP4 ),
( ( ( [4], [b'ftypqt'] ), ), HC.VIDEO_MOV ),
( ( ( [0], [b'fLaC'] ), ), HC.AUDIO_FLAC ),
2023-10-11 20:46:40 +00:00
( ( ( [0], [b'RIFF'] ), ( [8], [ b'WAVE' ] ) ), HC.AUDIO_WAVE ),
( ( ( [0], [b'wvpk'] ), ), HC.AUDIO_WAVPACK ),
( ( ( [8], [b'AVI '] ), ), HC.VIDEO_AVI ),
( ( ( [0], [b'\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C'] ), ), HC.UNDETERMINED_WM ),
( ( ( [0], [b'\x4D\x5A\x90\x00\x03'], ), ), HC.APPLICATION_WINDOWS_EXE )
]
2023-10-11 21:26:01 +00:00
def passes_offsets_and_headers_pair( offsets, headers, first_bytes_of_file ) -> bool:
# TODO: rewrite this garbage
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
for offset in offsets:
2023-10-11 21:20:32 +00:00
2023-10-11 21:26:01 +00:00
for header in headers:
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
if first_bytes_of_file[ offset : offset + len( header ) ] == header:
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
return True
2023-10-11 20:46:40 +00:00
2023-10-11 21:26:01 +00:00
return False
def passes_offsets_and_headers( offsets_and_headers, first_bytes_of_file ) -> bool:
# ok we need to match every pair here
for ( offsets, headers ) in offsets_and_headers:
if not passes_offsets_and_headers_pair( offsets, headers, first_bytes_of_file ):
2023-10-11 21:20:32 +00:00
return False
2023-10-11 20:46:40 +00:00
2023-10-11 21:20:32 +00:00
return True
2023-10-11 20:46:40 +00:00
2019-07-10 22:38:30 +00:00
def GetMime( path, ok_to_look_for_hydrus_updates = False ):
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
size = os.path.getsize( path )
if size == 0:
raise HydrusExceptions.ZeroSizeFileException( 'File is of zero length!' )
2017-10-04 17:51:58 +00:00
2022-06-22 20:43:12 +00:00
if ok_to_look_for_hydrus_updates and size < 64 * 1024 * 1024:
with open( path, 'rb' ) as f:
update_network_bytes = f.read()
try:
update = HydrusSerialisable.CreateFromNetworkBytes( update_network_bytes )
if isinstance( update, HydrusNetwork.ContentUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_CONTENT
elif isinstance( update, HydrusNetwork.DefinitionsUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_DEFINITIONS
except:
pass
2013-08-14 20:21:49 +00:00
with open( path, 'rb' ) as f:
2013-07-17 20:56:13 +00:00
2023-08-09 21:12:17 +00:00
first_bytes_of_file = f.read( 256 )
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
for ( offsets_and_headers, mime ) in headers_and_mime:
2013-08-07 22:25:18 +00:00
2023-10-11 20:46:40 +00:00
if passes_offsets_and_headers( offsets_and_headers, first_bytes_of_file ):
2013-08-14 20:21:49 +00:00
2023-07-19 20:38:06 +00:00
if mime == HC.APPLICATION_ZIP:
2023-09-27 21:12:55 +00:00
2024-01-03 21:21:53 +00:00
try:
if HydrusArchiveHandling.IsEncryptedZip( path ):
return HC.APPLICATION_ZIP
except HydrusExceptions.DamagedOrUnusualFileException:
2023-12-06 22:13:50 +00:00
return HC.APPLICATION_ZIP
opendoc_mime = HydrusArchiveHandling.MimeFromOpenDocument( path )
if opendoc_mime is not None:
2023-09-27 21:12:55 +00:00
return opendoc_mime
2023-09-27 21:12:55 +00:00
2023-07-19 20:38:06 +00:00
microsoft_mime = HydrusOfficeOpenXMLHandling.MimeFromMicrosoftOpenXMLDocument( path )
2024-03-13 20:33:53 +00:00
if microsoft_mime is not None:
return microsoft_mime
if HydrusProcreateHandling.ZipLooksLikeProcreate( path ):
2023-09-27 21:12:55 +00:00
return HC.APPLICATION_PROCREATE
2023-07-19 20:38:06 +00:00
2023-09-27 21:12:55 +00:00
2023-11-29 22:27:53 +00:00
if HydrusUgoiraHandling.ZipLooksLikeUgoira( path ):
return HC.ANIMATION_UGOIRA
if HydrusArchiveHandling.ZipLooksLikeCBZ( path ):
return HC.APPLICATION_CBZ
return HC.APPLICATION_ZIP
2023-07-19 20:38:06 +00:00
if mime in ( HC.UNDETERMINED_WM, HC.UNDETERMINED_MP4 ):
2013-08-14 20:21:49 +00:00
return HydrusVideoHandling.GetMime( path )
2013-08-14 20:21:49 +00:00
2016-02-24 21:42:54 +00:00
elif mime == HC.UNDETERMINED_PNG:
2023-08-09 21:12:17 +00:00
if HydrusAnimationHandling.IsPNGAnimated( first_bytes_of_file ):
2017-06-28 20:23:21 +00:00
2023-08-16 20:46:51 +00:00
return HC.ANIMATION_APNG
2017-06-28 20:23:21 +00:00
else:
return HC.IMAGE_PNG
2016-02-24 21:42:54 +00:00
2023-08-16 20:46:51 +00:00
elif mime == HC.UNDETERMINED_GIF:
if HydrusAnimationHandling.PILAnimationHasDuration( path ):
return HC.ANIMATION_GIF
else:
return HC.IMAGE_GIF
2016-02-24 21:42:54 +00:00
else:
return mime
2013-08-14 20:21:49 +00:00
2013-07-17 20:56:13 +00:00
2023-07-26 20:57:00 +00:00
# If the file starts with '{' it is probably JSON
# but we can't know for sure so we send it over to be checked
2023-08-09 21:12:17 +00:00
if first_bytes_of_file.startswith( b'{' ) or first_bytes_of_file.startswith( b'[' ):
2023-07-26 20:57:00 +00:00
with open( path, 'rb' ) as f:
2023-08-09 21:12:17 +00:00
potential_json_document_bytes = f.read()
if HydrusText.LooksLikeJSON( potential_json_document_bytes ):
2023-07-26 20:57:00 +00:00
return HC.APPLICATION_JSON
2023-08-09 21:12:17 +00:00
if HydrusText.LooksLikeHTML( first_bytes_of_file ):
2022-06-22 20:43:12 +00:00
return HC.TEXT_HTML
2023-07-19 20:38:06 +00:00
2023-08-09 21:12:17 +00:00
if HydrusText.LooksLikeSVG( first_bytes_of_file ):
2023-07-03 16:27:14 +00:00
return HC.IMAGE_SVG
2023-07-19 20:38:06 +00:00
2022-06-22 20:43:12 +00:00
2023-09-20 19:58:17 +00:00
# it is important this goes at the end, because ffmpeg has a billion false positives! and it takes CPU to true negative
2022-06-22 20:43:12 +00:00
# for instance, it once thought some hydrus update files were mpegs
2023-09-20 19:58:17 +00:00
# it also thinks txt files can be mpegs
likely_to_false_positive = True in ( path.endswith( ext ) for ext in ( '.txt', '.log', '.json' ) )
if not likely_to_false_positive:
2014-04-30 21:31:40 +00:00
2023-09-20 19:58:17 +00:00
try:
2017-01-04 22:48:23 +00:00
2023-09-20 19:58:17 +00:00
mime = HydrusVideoHandling.GetMime( path )
if mime != HC.APPLICATION_UNKNOWN:
return mime
except HydrusExceptions.UnsupportedFileException:
pass
except Exception as e:
HydrusData.Print( 'FFMPEG had trouble with: ' + path )
HydrusData.PrintException( e, do_wait = False )
2017-01-04 22:48:23 +00:00
2016-08-17 20:07:22 +00:00
2014-04-30 21:31:40 +00:00
2013-08-07 22:25:18 +00:00
return HC.APPLICATION_UNKNOWN
2023-10-11 20:46:40 +00:00
2023-10-11 20:46:40 +00:00
headers_and_mime_thumbnails = [ ( offsets_and_headers, mime ) for ( offsets_and_headers, mime ) in headers_and_mime if mime in ( HC.IMAGE_JPEG, HC.IMAGE_PNG ) ]
2022-04-06 20:40:17 +00:00
def GetThumbnailMime( path ):
with open( path, 'rb' ) as f:
bit_to_check = f.read( 256 )
for ( offsets_and_headers, mime ) in headers_and_mime_thumbnails:
2023-10-11 20:46:40 +00:00
if passes_offsets_and_headers( offsets_and_headers, bit_to_check ):
2022-04-06 20:40:17 +00:00
return mime
2023-10-11 20:46:40 +00:00
return GetMime( path )