hydrus/hydrus/core/HydrusFileHandling.py

437 lines
14 KiB
Python
Raw Normal View History

2013-08-07 22:25:18 +00:00
import hashlib
2020-07-29 20:52:44 +00:00
import os
2021-10-06 20:59:30 +00:00
import struct
2020-07-29 20:52:44 +00:00
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusAudioHandling
2021-11-03 20:49:56 +00:00
from hydrus.core import HydrusClipHandling
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusDocumentHandling
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusFlashHandling
from hydrus.core import HydrusImageHandling
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusTemp
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusText
from hydrus.core import HydrusVideoHandling
2021-04-07 21:26:45 +00:00
from hydrus.core.networking import HydrusNetwork
2013-07-17 20:56:13 +00:00
# Mime
2021-07-28 21:12:00 +00:00
headers_and_mime = [
( ( ( 0, b'\xff\xd8' ), ), HC.IMAGE_JPEG ),
( ( ( 0, b'GIF87a' ), ), HC.IMAGE_GIF ),
( ( ( 0, b'GIF89a' ), ), HC.IMAGE_GIF ),
( ( ( 0, b'\x89PNG' ), ), HC.UNDETERMINED_PNG ),
( ( ( 8, b'WEBP' ), ), HC.IMAGE_WEBP ),
( ( ( 0, b'II*\x00' ), ), HC.IMAGE_TIFF ),
( ( ( 0, b'MM\x00*' ), ), HC.IMAGE_TIFF ),
( ( ( 0, b'BM' ), ), HC.IMAGE_BMP ),
( ( ( 0, b'\x00\x00\x01\x00' ), ), HC.IMAGE_ICON ),
( ( ( 0, b'\x00\x00\x02\x00' ), ), HC.IMAGE_ICON ),
( ( ( 0, b'CWS' ), ), HC.APPLICATION_FLASH ),
( ( ( 0, b'FWS' ), ), HC.APPLICATION_FLASH ),
( ( ( 0, b'ZWS' ), ), HC.APPLICATION_FLASH ),
( ( ( 0, b'FLV' ), ), HC.VIDEO_FLV ),
( ( ( 0, b'%PDF' ), ), HC.APPLICATION_PDF ),
( ( ( 0, b'8BPS\x00\x01' ), ), HC.APPLICATION_PSD ),
( ( ( 0, b'8BPS\x00\x02' ), ), HC.APPLICATION_PSD ), # PSB, which is basically PSD v2 and does giganto resolution
( ( ( 0, b'CSFCHUNK' ), ), HC.APPLICATION_CLIP ),
( ( ( 0, b'PK\x03\x04' ), ), HC.APPLICATION_ZIP ),
( ( ( 0, b'PK\x05\x06' ), ), HC.APPLICATION_ZIP ),
( ( ( 0, b'PK\x07\x08' ), ), HC.APPLICATION_ZIP ),
( ( ( 0, b'7z\xBC\xAF\x27\x1C' ), ), HC.APPLICATION_7Z ),
( ( ( 0, b'\x52\x61\x72\x21\x1A\x07\x00' ), ), HC.APPLICATION_RAR ),
( ( ( 0, b'\x52\x61\x72\x21\x1A\x07\x01\x00' ), ), HC.APPLICATION_RAR ),
( ( ( 0, b'hydrus encrypted zip' ), ), HC.APPLICATION_HYDRUS_ENCRYPTED_ZIP ),
( ( ( 4, b'ftypmp4' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypisom' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypM4V' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypMSNV' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypavc1' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypFACE' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypdash' ), ), HC.VIDEO_MP4 ),
( ( ( 4, b'ftypqt' ), ), HC.VIDEO_MOV ),
( ( ( 0, b'fLaC' ), ), HC.AUDIO_FLAC ),
( ( ( 0, b'RIFF' ), ( 8, b'WAVE' ) ), HC.AUDIO_WAVE ),
( ( ( 8, b'AVI ' ), ), HC.VIDEO_AVI ),
( ( ( 0, b'\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C' ), ), HC.UNDETERMINED_WM )
2015-03-25 22:04:19 +00:00
]
2019-05-08 21:06:42 +00:00
def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames, percentage_in = 35 ):
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
if target_resolution == ( 0, 0 ):
target_resolution = ( 128, 128 )
2019-05-15 20:35:00 +00:00
if mime in ( HC.IMAGE_JPEG, HC.IMAGE_PNG, HC.IMAGE_GIF, HC.IMAGE_WEBP, HC.IMAGE_TIFF, HC.IMAGE_ICON ): # not apng atm
2014-05-21 21:37:35 +00:00
2019-05-08 21:06:42 +00:00
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( path, target_resolution, mime )
2014-05-21 21:37:35 +00:00
2021-07-28 21:12:00 +00:00
elif mime == HC.APPLICATION_PSD:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath( suffix = '.png' )
2021-07-28 21:12:00 +00:00
try:
HydrusVideoHandling.RenderImageToPNGPath( path, temp_path )
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( temp_path, target_resolution, mime )
except:
thumb_path = os.path.join( HC.STATIC_DIR, 'psd.png' )
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, mime )
finally:
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2021-07-28 21:12:00 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_CLIP:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusClipHandling.ExtractDBPNGToPath( path, temp_path )
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( temp_path, target_resolution, mime )
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
except:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
thumb_path = os.path.join( HC.STATIC_DIR, 'clip.png' )
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, mime )
2018-06-06 21:27:02 +00:00
2021-11-03 20:49:56 +00:00
finally:
2018-05-30 20:13:21 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
2018-05-30 20:13:21 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_FLASH:
( os_file_handle, temp_path ) = HydrusTemp.GetTempPath()
try:
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
HydrusFlashHandling.RenderPageToFile( path, temp_path, 1 )
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( temp_path, target_resolution, mime )
except:
thumb_path = os.path.join( HC.STATIC_DIR, 'flash.png' )
2015-11-25 22:00:57 +00:00
2021-11-03 20:49:56 +00:00
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, mime )
finally:
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
HydrusTemp.CleanUpTempPath( os_file_handle, temp_path )
else:
renderer = HydrusVideoHandling.VideoRendererFFMPEG( path, mime, duration, num_frames, target_resolution )
renderer.read_frame() # this initialises the renderer and loads the first frame as a fallback
desired_thumb_frame = int( ( percentage_in / 100.0 ) * num_frames )
renderer.set_position( desired_thumb_frame )
numpy_image = renderer.read_frame()
if numpy_image is None:
2019-03-06 23:06:22 +00:00
2021-11-03 20:49:56 +00:00
raise Exception( 'Could not create a thumbnail from that video!' )
2019-03-06 23:06:22 +00:00
2014-05-21 21:37:35 +00:00
2021-11-03 20:49:56 +00:00
numpy_image = HydrusImageHandling.ResizeNumPyImage( numpy_image, target_resolution ) # just in case ffmpeg doesn't deliver right
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesNumPy( numpy_image, mime )
renderer.Stop()
del renderer
2015-11-25 22:00:57 +00:00
2019-03-27 22:01:02 +00:00
return thumbnail_bytes
2015-03-25 22:04:19 +00:00
def GetExtraHashesFromPath( path ):
h_md5 = hashlib.md5()
h_sha1 = hashlib.sha1()
h_sha512 = hashlib.sha512()
with open( path, 'rb' ) as f:
2015-11-04 22:30:28 +00:00
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
2015-03-25 22:04:19 +00:00
h_md5.update( block )
h_sha1.update( block )
h_sha512.update( block )
md5 = h_md5.digest()
sha1 = h_sha1.digest()
sha512 = h_sha512.digest()
return ( md5, sha1, sha512 )
2019-07-10 22:38:30 +00:00
def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ):
2013-07-17 20:56:13 +00:00
2016-04-14 01:54:29 +00:00
size = os.path.getsize( path )
2013-07-17 20:56:13 +00:00
2017-07-19 21:21:41 +00:00
if size == 0:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.FileSizeException( 'File is of zero length!' )
2017-07-19 21:21:41 +00:00
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
if mime is None:
2019-07-10 22:38:30 +00:00
mime = GetMime( path, ok_to_look_for_hydrus_updates = ok_to_look_for_hydrus_updates )
2017-10-04 17:51:58 +00:00
2013-07-17 20:56:13 +00:00
2017-03-08 23:23:12 +00:00
if mime not in HC.ALLOWED_MIMES:
2018-09-05 20:52:32 +00:00
if mime == HC.TEXT_HTML:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Looks like HTML -- maybe the client needs to be taught how to parse this?' )
2018-09-05 20:52:32 +00:00
elif mime == HC.APPLICATION_UNKNOWN:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Unknown filetype!' )
2018-09-05 20:52:32 +00:00
else:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.UnsupportedFileException( 'Filetype is not permitted!' )
2018-09-05 20:52:32 +00:00
2017-03-08 23:23:12 +00:00
2013-07-17 20:56:13 +00:00
width = None
height = None
duration = None
num_frames = None
num_words = None
2021-08-18 21:10:01 +00:00
if mime in HC.MIMES_THAT_DEFINITELY_HAVE_AUDIO:
has_audio = True
else:
has_audio = False
2019-05-15 20:35:00 +00:00
if mime in ( HC.IMAGE_JPEG, HC.IMAGE_PNG, HC.IMAGE_GIF, HC.IMAGE_WEBP, HC.IMAGE_TIFF, HC.IMAGE_ICON ):
2013-07-17 20:56:13 +00:00
2018-09-26 19:05:12 +00:00
( ( width, height ), duration, num_frames ) = HydrusImageHandling.GetImageProperties( path, mime )
2013-07-17 20:56:13 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.APPLICATION_CLIP:
2021-11-10 21:53:57 +00:00
( ( width, height ), duration, num_frames ) = HydrusClipHandling.GetClipProperties( path )
2021-11-03 20:49:56 +00:00
2013-07-17 20:56:13 +00:00
elif mime == HC.APPLICATION_FLASH:
2013-08-07 22:25:18 +00:00
( ( width, height ), duration, num_frames ) = HydrusFlashHandling.GetFlashProperties( path )
2013-07-17 20:56:13 +00:00
2021-11-03 20:49:56 +00:00
elif mime == HC.IMAGE_APNG:
( ( width, height ), duration, num_frames, has_audio ) = HydrusVideoHandling.GetFFMPEGAPNGProperties( path )
elif mime in ( HC.VIDEO_AVI, HC.VIDEO_FLV, HC.VIDEO_WMV, HC.VIDEO_MOV, HC.VIDEO_MP4, HC.VIDEO_MKV, HC.VIDEO_REALMEDIA, HC.VIDEO_WEBM, HC.VIDEO_MPEG ):
2014-04-30 21:31:40 +00:00
2021-08-18 21:10:01 +00:00
( ( width, height ), duration, num_frames, has_audio ) = HydrusVideoHandling.GetFFMPEGVideoProperties( path )
2014-04-30 21:31:40 +00:00
2017-06-28 20:23:21 +00:00
elif mime == HC.APPLICATION_PDF:
2018-11-07 23:09:40 +00:00
num_words = HydrusDocumentHandling.GetPDFNumWords( path ) # this now give None until a better solution can be found
2017-06-28 20:23:21 +00:00
2019-03-20 21:22:10 +00:00
elif mime == HC.APPLICATION_PSD:
( width, height ) = HydrusImageHandling.GetPSDResolution( path )
2017-12-06 22:06:56 +00:00
elif mime in HC.AUDIO:
2017-06-28 20:23:21 +00:00
2017-12-06 22:06:56 +00:00
ffmpeg_lines = HydrusVideoHandling.GetFFMPEGInfoLines( path )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
( file_duration_in_s, stream_duration_in_s ) = HydrusVideoHandling.ParseFFMPEGDuration( ffmpeg_lines )
2017-06-28 20:23:21 +00:00
2019-04-24 22:18:50 +00:00
duration = int( file_duration_in_s * 1000 )
2017-06-28 20:23:21 +00:00
2013-07-17 20:56:13 +00:00
2018-03-22 00:03:33 +00:00
if width is not None and width < 0:
width *= -1
if height is not None and height < 0:
width *= -1
if duration is not None and duration < 0:
duration *= -1
if num_frames is not None and num_frames < 0:
num_frames *= -1
if num_words is not None and num_words < 0:
num_words *= -1
2019-08-07 22:59:53 +00:00
return ( size, mime, width, height, duration, num_frames, has_audio, num_words )
2013-07-17 20:56:13 +00:00
2019-09-25 21:34:18 +00:00
def GetFileModifiedTimestamp( path ):
s = os.stat( path )
file_modified_timestamp = int( s.st_mtime )
return file_modified_timestamp
2013-08-07 22:25:18 +00:00
def GetHashFromPath( path ):
h = hashlib.sha256()
2013-08-14 20:21:49 +00:00
with open( path, 'rb' ) as f:
2013-08-07 22:25:18 +00:00
2017-07-19 21:21:41 +00:00
for block in HydrusPaths.ReadFileLikeAsBlocks( f ):
h.update( block )
2013-08-07 22:25:18 +00:00
2014-11-12 23:33:13 +00:00
return h.digest()
2019-07-10 22:38:30 +00:00
def GetMime( path, ok_to_look_for_hydrus_updates = False ):
2013-07-17 20:56:13 +00:00
2017-10-04 17:51:58 +00:00
size = os.path.getsize( path )
if size == 0:
2020-05-27 21:27:52 +00:00
raise HydrusExceptions.FileSizeException( 'File is of zero length!' )
2017-10-04 17:51:58 +00:00
2013-08-14 20:21:49 +00:00
with open( path, 'rb' ) as f:
2013-07-17 20:56:13 +00:00
2013-08-07 22:25:18 +00:00
bit_to_check = f.read( 256 )
2021-07-28 21:12:00 +00:00
for ( offsets_and_headers, mime ) in headers_and_mime:
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
it_passes = False not in ( bit_to_check[ offset: ].startswith( header ) for ( offset, header ) in offsets_and_headers )
2013-08-07 22:25:18 +00:00
2021-07-28 21:12:00 +00:00
if it_passes:
2013-08-14 20:21:49 +00:00
if mime == HC.UNDETERMINED_WM:
2015-11-18 22:44:07 +00:00
if HydrusVideoHandling.HasVideoStream( path ):
2013-08-14 20:21:49 +00:00
return HC.VIDEO_WMV
2015-11-18 22:44:07 +00:00
# we'll catch and verify wma later
2013-08-14 20:21:49 +00:00
2016-02-24 21:42:54 +00:00
elif mime == HC.UNDETERMINED_PNG:
2021-10-06 20:59:30 +00:00
if IsPNGAnimated( bit_to_check ):
2017-06-28 20:23:21 +00:00
return HC.IMAGE_APNG
else:
return HC.IMAGE_PNG
2016-02-24 21:42:54 +00:00
else:
return mime
2013-08-14 20:21:49 +00:00
2013-07-17 20:56:13 +00:00
2014-04-30 21:31:40 +00:00
try:
2017-06-28 20:23:21 +00:00
mime = HydrusVideoHandling.GetMime( path )
2014-04-30 21:31:40 +00:00
2017-01-04 22:48:23 +00:00
if mime != HC.APPLICATION_UNKNOWN:
return mime
2014-04-30 21:31:40 +00:00
2020-05-27 21:27:52 +00:00
except HydrusExceptions.UnsupportedFileException:
2016-08-17 20:07:22 +00:00
2019-07-10 22:38:30 +00:00
pass
2016-12-07 22:12:52 +00:00
except Exception as e:
2019-07-10 22:38:30 +00:00
HydrusData.Print( 'FFMPEG had trouble with: ' + path )
2017-01-04 22:48:23 +00:00
HydrusData.PrintException( e, do_wait = False )
2016-08-17 20:07:22 +00:00
2014-04-30 21:31:40 +00:00
2019-07-10 22:38:30 +00:00
if ok_to_look_for_hydrus_updates:
with open( path, 'rb' ) as f:
update_network_bytes = f.read()
try:
update = HydrusSerialisable.CreateFromNetworkBytes( update_network_bytes )
if isinstance( update, HydrusNetwork.ContentUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_CONTENT
elif isinstance( update, HydrusNetwork.DefinitionsUpdate ):
return HC.APPLICATION_HYDRUS_UPDATE_DEFINITIONS
except:
pass
2020-12-23 23:07:58 +00:00
if HydrusText.LooksLikeHTML( bit_to_check ):
return HC.TEXT_HTML
2013-08-07 22:25:18 +00:00
return HC.APPLICATION_UNKNOWN
2016-12-07 22:12:52 +00:00
2021-10-06 20:59:30 +00:00
def IsPNGAnimated( file_header_bytes ):
2021-11-03 20:49:56 +00:00
apng_actl_bytes = HydrusVideoHandling.GetAPNGACTLChunk( file_header_bytes )
if apng_actl_bytes is not None:
2021-10-06 20:59:30 +00:00
# this is an animated png
# acTL chunk in an animated png is 4 bytes of num frames, then 4 bytes of num times to loop
# https://wiki.mozilla.org/APNG_Specification#.60acTL.60:_The_Animation_Control_Chunk
2021-11-03 20:49:56 +00:00
num_frames = HydrusVideoHandling.GetAPNGNumFrames( apng_actl_bytes )
2021-10-06 20:59:30 +00:00
if num_frames > 1:
return True
return False