2023-08-12 13:36:21 +00:00
from hydrus . core import HydrusConstants as HC
from hydrus . core import HydrusPSDHandling
2021-06-30 21:27:35 +00:00
from hydrus . core import HydrusData
from hydrus . core import HydrusExceptions
from hydrus . core import HydrusFileHandling
from hydrus . core import HydrusGlobals as HG
2023-04-19 20:38:13 +00:00
from hydrus . core import HydrusTime
2023-10-04 20:51:17 +00:00
from hydrus . core . images import HydrusBlurhash
from hydrus . core . images import HydrusImageHandling
from hydrus . core . images import HydrusImageMetadata
from hydrus . core . images import HydrusImageOpening
2021-06-30 21:27:35 +00:00
from hydrus . client import ClientConstants as CC
2023-09-06 19:49:46 +00:00
from hydrus . client import ClientFiles
2021-06-30 21:27:35 +00:00
from hydrus . client import ClientImageHandling
from hydrus . client . importing . options import FileImportOptions
class FileImportStatus ( object ) :
def __init__ ( self , status , hash , mime = None , note = ' ' ) :
self . status = status
self . hash = hash
self . mime = mime
self . note = note
def __str__ ( self ) :
return ' File Import Status: {} ' . format ( self . ToString ( ) )
def AlreadyInDB ( self ) :
return self . status == CC . STATUS_SUCCESSFUL_BUT_REDUNDANT
def Duplicate ( self ) - > " FileImportStatus " :
return FileImportStatus ( self . status , self . hash , mime = self . mime , note = self . note )
def ShouldImport ( self , file_import_options : FileImportOptions . FileImportOptions ) :
if self . status == CC . STATUS_UNKNOWN :
return True
if self . status == CC . STATUS_DELETED :
if not file_import_options . ExcludesDeleted ( ) :
return True
return False
def ToString ( self ) - > str :
s = CC . status_string_lookup [ self . status ]
if len ( self . note ) > 0 :
s = ' {} , {} ' . format ( s , self . note )
return s
@staticmethod
def STATICGetUnknownStatus ( ) - > " FileImportStatus " :
return FileImportStatus ( CC . STATUS_UNKNOWN , None )
2022-12-21 22:00:27 +00:00
def CheckFileImportStatus ( file_import_status : FileImportStatus ) - > FileImportStatus :
2021-06-30 21:27:35 +00:00
if file_import_status . AlreadyInDB ( ) :
try :
hash = file_import_status . hash
mime = file_import_status . mime
if hash is None or mime is None :
return file_import_status
HG . client_controller . client_files_manager . GetFilePath ( hash , mime = mime )
except HydrusExceptions . FileMissingException :
note = ' The client believed this file was already in the db, but it was truly missing! Import will go ahead, in an attempt to fix the situation. '
return FileImportStatus ( CC . STATUS_UNKNOWN , hash , mime = mime , note = note )
return file_import_status
class FileImportJob ( object ) :
def __init__ ( self , temp_path : str , file_import_options : FileImportOptions . FileImportOptions ) :
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job created for path {} . ' . format ( temp_path ) )
2023-03-22 20:28:10 +00:00
if file_import_options . IsDefault ( ) :
file_import_options = FileImportOptions . GetRealFileImportOptions ( file_import_options , FileImportOptions . IMPORT_TYPE_LOUD )
2021-06-30 21:27:35 +00:00
self . _temp_path = temp_path
self . _file_import_options = file_import_options
self . _pre_import_file_status = FileImportStatus . STATICGetUnknownStatus ( )
self . _post_import_file_status = FileImportStatus . STATICGetUnknownStatus ( )
self . _file_info = None
self . _thumbnail_bytes = None
2021-12-15 22:16:22 +00:00
self . _perceptual_hashes = None
2021-06-30 21:27:35 +00:00
self . _extra_hashes = None
2022-11-09 22:24:07 +00:00
self . _has_exif = None
self . _has_human_readable_embedded_metadata = None
2021-12-08 22:40:59 +00:00
self . _has_icc_profile = None
self . _pixel_hash = None
2021-06-30 21:27:35 +00:00
self . _file_modified_timestamp = None
2023-09-23 19:13:21 +00:00
self . _blurhash = None
2021-06-30 21:27:35 +00:00
def CheckIsGoodToImport ( self ) :
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job testing if good to import for file import options ' )
( size , mime , width , height , duration , num_frames , has_audio , num_words ) = self . _file_info
self . _file_import_options . CheckFileIsValid ( size , mime , width , height )
def DoWork ( self , status_hook = None ) - > FileImportStatus :
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job starting work. ' )
2021-08-18 21:10:01 +00:00
self . GeneratePreImportHashAndStatus ( status_hook = status_hook )
2021-06-30 21:27:35 +00:00
if self . _pre_import_file_status . ShouldImport ( self . _file_import_options ) :
2021-08-18 21:10:01 +00:00
self . GenerateInfo ( status_hook = status_hook )
2021-06-30 21:27:35 +00:00
try :
self . CheckIsGoodToImport ( )
ok_to_go = True
2022-01-05 22:15:56 +00:00
except HydrusExceptions . FileImportRulesException as e :
2021-06-30 21:27:35 +00:00
ok_to_go = False
not_ok_file_import_status = self . _pre_import_file_status . Duplicate ( )
not_ok_file_import_status . status = CC . STATUS_VETOED
not_ok_file_import_status . note = str ( e )
if ok_to_go :
hash = self . _pre_import_file_status . hash
mime = self . _pre_import_file_status . mime
if status_hook is not None :
2021-08-18 21:10:01 +00:00
status_hook ( ' copying file into file storage ' )
2021-06-30 21:27:35 +00:00
HG . client_controller . client_files_manager . AddFile ( hash , mime , self . _temp_path , thumbnail_bytes = self . _thumbnail_bytes )
if status_hook is not None :
2021-08-18 21:10:01 +00:00
status_hook ( ' importing to database ' )
2021-06-30 21:27:35 +00:00
2022-04-13 21:39:26 +00:00
self . _file_import_options . CheckReadyToImport ( )
2021-06-30 21:27:35 +00:00
self . _post_import_file_status = HG . client_controller . WriteSynchronous ( ' import_file ' , self )
else :
self . _post_import_file_status = not_ok_file_import_status
else :
2022-05-04 21:40:27 +00:00
# if the file is already in the database but not in all the desired file services, let's push content updates to make it happen
if self . _pre_import_file_status . status == CC . STATUS_SUCCESSFUL_BUT_REDUNDANT :
media_result = HG . client_controller . Read ( ' media_result ' , self . _pre_import_file_status . hash )
destination_location_context = self . _file_import_options . GetDestinationLocationContext ( )
desired_file_service_keys = destination_location_context . current_service_keys
current_file_service_keys = media_result . GetLocationsManager ( ) . GetCurrent ( )
file_service_keys_to_add_to = set ( desired_file_service_keys ) . difference ( current_file_service_keys )
if len ( file_service_keys_to_add_to ) > 0 :
file_info_manager = media_result . GetFileInfoManager ( )
2023-04-19 20:38:13 +00:00
now = HydrusTime . GetNow ( )
2022-05-04 21:40:27 +00:00
service_keys_to_content_updates = { }
for service_key in file_service_keys_to_add_to :
service_keys_to_content_updates [ service_key ] = [ HydrusData . ContentUpdate ( HC . CONTENT_TYPE_FILES , HC . CONTENT_UPDATE_ADD , ( file_info_manager , now ) ) ]
HG . client_controller . WriteSynchronous ( ' content_updates ' , service_keys_to_content_updates )
2021-06-30 21:27:35 +00:00
self . _post_import_file_status = self . _pre_import_file_status . Duplicate ( )
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job is done, now publishing content updates ' )
self . PubsubContentUpdates ( )
return self . _post_import_file_status
2021-08-18 21:10:01 +00:00
def GeneratePreImportHashAndStatus ( self , status_hook = None ) :
2021-06-30 21:27:35 +00:00
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' calculating hash ' )
2021-06-30 21:27:35 +00:00
hash = HydrusFileHandling . GetHashFromPath ( self . _temp_path )
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job hash: {} ' . format ( hash . hex ( ) ) )
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' checking for file status ' )
2021-06-30 21:27:35 +00:00
self . _pre_import_file_status = HG . client_controller . Read ( ' hash_status ' , ' sha256 ' , hash , prefix = ' file recognised ' )
2022-12-21 22:00:27 +00:00
if self . _pre_import_file_status . hash is None :
self . _pre_import_file_status . hash = hash
2021-06-30 21:27:35 +00:00
self . _pre_import_file_status = CheckFileImportStatus ( self . _pre_import_file_status )
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job pre-import status: {} ' . format ( self . _pre_import_file_status . ToString ( ) ) )
2021-08-18 21:10:01 +00:00
def GenerateInfo ( self , status_hook = None ) :
2021-06-30 21:27:35 +00:00
if self . _pre_import_file_status . mime is None :
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' generating filetype ' )
2021-06-30 21:27:35 +00:00
mime = HydrusFileHandling . GetMime ( self . _temp_path )
self . _pre_import_file_status . mime = mime
else :
mime = self . _pre_import_file_status . mime
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job mime: {} ' . format ( HC . mime_string_lookup [ mime ] ) )
new_options = HG . client_controller . new_options
if mime in HC . DECOMPRESSION_BOMB_IMAGES and not self . _file_import_options . AllowsDecompressionBombs ( ) :
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job testing for decompression bomb ' )
if HydrusImageHandling . IsDecompressionBomb ( self . _temp_path ) :
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job: it was a decompression bomb ' )
raise HydrusExceptions . DecompressionBombException ( ' Image seems to be a Decompression Bomb! ' )
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' generating file metadata ' )
2021-10-13 20:16:57 +00:00
self . _file_info = HydrusFileHandling . GetFileInfo ( self . _temp_path , mime = mime )
2021-06-30 21:27:35 +00:00
( size , mime , width , height , duration , num_frames , has_audio , num_words ) = self . _file_info
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job file info: {} ' . format ( self . _file_info ) )
if mime in HC . MIMES_WITH_THUMBNAILS :
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' generating thumbnail ' )
2021-06-30 21:27:35 +00:00
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job generating thumbnail ' )
bounding_dimensions = HG . client_controller . options [ ' thumbnail_dimensions ' ]
2022-02-02 22:14:01 +00:00
thumbnail_scale_type = HG . client_controller . new_options . GetInteger ( ' thumbnail_scale_type ' )
2022-12-21 22:00:27 +00:00
thumbnail_dpr_percent = HG . client_controller . new_options . GetInteger ( ' thumbnail_dpr_percent ' )
2021-06-30 21:27:35 +00:00
2023-11-01 21:38:03 +00:00
target_resolution = HydrusImageHandling . GetThumbnailResolution ( ( width , height ) , bounding_dimensions , thumbnail_scale_type , thumbnail_dpr_percent )
2021-06-30 21:27:35 +00:00
percentage_in = HG . client_controller . new_options . GetInteger ( ' video_thumbnail_percentage_in ' )
2023-11-01 21:38:03 +00:00
thumbnail_numpy = HydrusFileHandling . GenerateThumbnailNumPy ( self . _temp_path , target_resolution , mime , duration , num_frames , percentage_in = percentage_in )
2023-09-23 19:13:21 +00:00
2023-09-06 19:49:46 +00:00
# this guy handles almost all his own exceptions now, so no need for clever catching. if it fails, we are prob talking an I/O failure, which is not a 'thumbnail failed' error
2023-09-27 21:12:55 +00:00
self . _thumbnail_bytes = HydrusImageHandling . GenerateThumbnailBytesFromNumPy ( thumbnail_numpy )
2023-09-23 19:13:21 +00:00
try :
2023-09-27 21:12:55 +00:00
2023-10-04 20:51:17 +00:00
self . _blurhash = HydrusBlurhash . GetBlurhashFromNumPy ( thumbnail_numpy )
2023-09-27 21:12:55 +00:00
2023-09-23 19:13:21 +00:00
except :
2023-09-27 21:12:55 +00:00
2023-09-23 19:13:21 +00:00
pass
2023-09-27 21:12:55 +00:00
2023-09-23 19:13:21 +00:00
2021-06-30 21:27:35 +00:00
2021-12-15 22:16:22 +00:00
if mime in HC . FILES_THAT_HAVE_PERCEPTUAL_HASH :
2021-06-30 21:27:35 +00:00
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' generating similar files metadata ' )
2021-06-30 21:27:35 +00:00
if HG . file_import_report_mode :
2021-12-15 22:16:22 +00:00
HydrusData . ShowText ( ' File import job generating perceptual_hashes ' )
2021-06-30 21:27:35 +00:00
2021-12-15 22:16:22 +00:00
self . _perceptual_hashes = ClientImageHandling . GenerateShapePerceptualHashes ( self . _temp_path , mime )
2021-06-30 21:27:35 +00:00
if HG . file_import_report_mode :
2021-12-15 22:16:22 +00:00
HydrusData . ShowText ( ' File import job generated {} perceptual_hashes: {} ' . format ( len ( self . _perceptual_hashes ) , [ perceptual_hash . hex ( ) for perceptual_hash in self . _perceptual_hashes ] ) )
2021-06-30 21:27:35 +00:00
if HG . file_import_report_mode :
HydrusData . ShowText ( ' File import job generating other hashes ' )
2021-08-18 21:10:01 +00:00
if status_hook is not None :
status_hook ( ' generating additional hashes ' )
2021-06-30 21:27:35 +00:00
self . _extra_hashes = HydrusFileHandling . GetExtraHashesFromPath ( self . _temp_path )
2022-11-09 22:24:07 +00:00
#
has_exif = False
2023-10-04 20:51:17 +00:00
raw_pil_image = None
2022-11-09 22:24:07 +00:00
if mime in HC . FILES_THAT_CAN_HAVE_EXIF :
try :
2023-10-04 20:51:17 +00:00
if raw_pil_image is None :
raw_pil_image = HydrusImageOpening . RawOpenPILImage ( self . _temp_path )
has_exif = HydrusImageMetadata . HasEXIF ( raw_pil_image )
2022-11-09 22:24:07 +00:00
except :
pass
self . _has_exif = has_exif
2023-09-06 19:49:46 +00:00
self . _has_human_readable_embedded_metadata = ClientFiles . HasHumanReadableEmbeddedMetadata ( self . _temp_path , mime )
2022-11-09 22:24:07 +00:00
2021-12-08 22:40:59 +00:00
has_icc_profile = False
if mime in HC . FILES_THAT_CAN_HAVE_ICC_PROFILE :
try :
2023-07-22 23:58:48 +00:00
if mime == HC . APPLICATION_PSD :
2023-10-04 20:51:17 +00:00
2023-07-22 23:58:48 +00:00
has_icc_profile = HydrusPSDHandling . PSDHasICCProfile ( self . _temp_path )
2023-10-04 20:51:17 +00:00
2023-07-22 23:58:48 +00:00
else :
2023-10-04 20:51:17 +00:00
if raw_pil_image is None :
raw_pil_image = HydrusImageOpening . RawOpenPILImage ( self . _temp_path )
has_icc_profile = HydrusImageMetadata . HasICCProfile ( raw_pil_image )
2021-12-08 22:40:59 +00:00
except :
pass
self . _has_icc_profile = has_icc_profile
2022-11-09 22:24:07 +00:00
#
2021-12-08 22:40:59 +00:00
if mime in HC . FILES_THAT_CAN_HAVE_PIXEL_HASH and duration is None :
try :
self . _pixel_hash = HydrusImageHandling . GetImagePixelHash ( self . _temp_path , mime )
except :
pass
2021-06-30 21:27:35 +00:00
self . _file_modified_timestamp = HydrusFileHandling . GetFileModifiedTimestamp ( self . _temp_path )
def GetExtraHashes ( self ) :
return self . _extra_hashes
def GetFileImportOptions ( self ) :
return self . _file_import_options
def GetFileInfo ( self ) :
return self . _file_info
def GetFileModifiedTimestamp ( self ) :
return self . _file_modified_timestamp
def GetHash ( self ) :
return self . _pre_import_file_status . hash
def GetMime ( self ) :
return self . _pre_import_file_status . mime
2021-12-15 22:16:22 +00:00
def GetPerceptualHashes ( self ) :
2021-06-30 21:27:35 +00:00
2021-12-15 22:16:22 +00:00
return self . _perceptual_hashes
2021-06-30 21:27:35 +00:00
2021-12-08 22:40:59 +00:00
def GetPixelHash ( self ) :
return self . _pixel_hash
2022-11-09 22:24:07 +00:00
def HasEXIF ( self ) - > bool :
return self . _has_exif
def HasHumanReadableEmbeddedMetadata ( self ) - > bool :
return self . _has_human_readable_embedded_metadata
2021-12-08 22:40:59 +00:00
def HasICCProfile ( self ) - > bool :
return self . _has_icc_profile
2023-09-27 21:12:55 +00:00
2023-09-23 19:13:21 +00:00
def GetBlurhash ( self ) - > str :
2023-09-27 21:12:55 +00:00
2023-09-23 19:13:21 +00:00
return self . _blurhash
2021-12-08 22:40:59 +00:00
2021-06-30 21:27:35 +00:00
def PubsubContentUpdates ( self ) :
if self . _post_import_file_status . AlreadyInDB ( ) and self . _file_import_options . AutomaticallyArchives ( ) :
hashes = { self . GetHash ( ) }
service_keys_to_content_updates = { CC . COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData . ContentUpdate ( HC . CONTENT_TYPE_FILES , HC . CONTENT_UPDATE_ARCHIVE , hashes ) ] }
HG . client_controller . Write ( ' content_updates ' , service_keys_to_content_updates )
2023-09-27 21:12:55 +00:00