hydrus/hydrus/client/importing/ClientImportFiles.py

550 lines
18 KiB
Python
Raw Normal View History

2023-08-12 13:36:21 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusPSDHandling
2021-06-30 21:27:35 +00:00
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusFileHandling
from hydrus.core import HydrusGlobals as HG
2023-04-19 20:38:13 +00:00
from hydrus.core import HydrusTime
2023-10-04 20:51:17 +00:00
from hydrus.core.images import HydrusBlurhash
from hydrus.core.images import HydrusImageHandling
from hydrus.core.images import HydrusImageMetadata
from hydrus.core.images import HydrusImageOpening
2021-06-30 21:27:35 +00:00
from hydrus.client import ClientConstants as CC
2023-09-06 19:49:46 +00:00
from hydrus.client import ClientFiles
2021-06-30 21:27:35 +00:00
from hydrus.client import ClientImageHandling
from hydrus.client.importing.options import FileImportOptions
class FileImportStatus( object ):
def __init__( self, status, hash, mime = None, note = '' ):
self.status = status
self.hash = hash
self.mime = mime
self.note = note
def __str__( self ):
return 'File Import Status: {}'.format( self.ToString() )
def AlreadyInDB( self ):
return self.status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT
def Duplicate( self ) -> "FileImportStatus":
return FileImportStatus( self.status, self.hash, mime = self.mime, note = self.note )
def ShouldImport( self, file_import_options: FileImportOptions.FileImportOptions ):
if self.status == CC.STATUS_UNKNOWN:
return True
if self.status == CC.STATUS_DELETED:
if not file_import_options.ExcludesDeleted():
return True
return False
def ToString( self ) -> str:
s = CC.status_string_lookup[ self.status ]
if len( self.note ) > 0:
s = '{}, {}'.format( s, self.note )
return s
@staticmethod
def STATICGetUnknownStatus() -> "FileImportStatus":
return FileImportStatus( CC.STATUS_UNKNOWN, None )
2022-12-21 22:00:27 +00:00
def CheckFileImportStatus( file_import_status: FileImportStatus ) -> FileImportStatus:
2021-06-30 21:27:35 +00:00
if file_import_status.AlreadyInDB():
try:
hash = file_import_status.hash
mime = file_import_status.mime
if hash is None or mime is None:
return file_import_status
HG.client_controller.client_files_manager.GetFilePath( hash, mime = mime )
except HydrusExceptions.FileMissingException:
note = 'The client believed this file was already in the db, but it was truly missing! Import will go ahead, in an attempt to fix the situation.'
return FileImportStatus( CC.STATUS_UNKNOWN, hash, mime = mime, note = note )
return file_import_status
class FileImportJob( object ):
def __init__( self, temp_path: str, file_import_options: FileImportOptions.FileImportOptions ):
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job created for path {}.'.format( temp_path ) )
2023-03-22 20:28:10 +00:00
if file_import_options.IsDefault():
file_import_options = FileImportOptions.GetRealFileImportOptions( file_import_options, FileImportOptions.IMPORT_TYPE_LOUD )
2021-06-30 21:27:35 +00:00
self._temp_path = temp_path
self._file_import_options = file_import_options
self._pre_import_file_status = FileImportStatus.STATICGetUnknownStatus()
self._post_import_file_status = FileImportStatus.STATICGetUnknownStatus()
self._file_info = None
self._thumbnail_bytes = None
2021-12-15 22:16:22 +00:00
self._perceptual_hashes = None
2021-06-30 21:27:35 +00:00
self._extra_hashes = None
self._has_exif = None
self._has_human_readable_embedded_metadata = None
2021-12-08 22:40:59 +00:00
self._has_icc_profile = None
self._pixel_hash = None
2021-06-30 21:27:35 +00:00
self._file_modified_timestamp = None
self._blurhash = None
2021-06-30 21:27:35 +00:00
def CheckIsGoodToImport( self ):
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job testing if good to import for file import options' )
( size, mime, width, height, duration, num_frames, has_audio, num_words ) = self._file_info
self._file_import_options.CheckFileIsValid( size, mime, width, height )
def DoWork( self, status_hook = None ) -> FileImportStatus:
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job starting work.' )
2021-08-18 21:10:01 +00:00
self.GeneratePreImportHashAndStatus( status_hook = status_hook )
2021-06-30 21:27:35 +00:00
if self._pre_import_file_status.ShouldImport( self._file_import_options ):
2021-08-18 21:10:01 +00:00
self.GenerateInfo( status_hook = status_hook )
2021-06-30 21:27:35 +00:00
try:
self.CheckIsGoodToImport()
ok_to_go = True
except HydrusExceptions.FileImportRulesException as e:
2021-06-30 21:27:35 +00:00
ok_to_go = False
not_ok_file_import_status = self._pre_import_file_status.Duplicate()
not_ok_file_import_status.status = CC.STATUS_VETOED
not_ok_file_import_status.note = str( e )
if ok_to_go:
hash = self._pre_import_file_status.hash
mime = self._pre_import_file_status.mime
if status_hook is not None:
2021-08-18 21:10:01 +00:00
status_hook( 'copying file into file storage' )
2021-06-30 21:27:35 +00:00
HG.client_controller.client_files_manager.AddFile( hash, mime, self._temp_path, thumbnail_bytes = self._thumbnail_bytes )
if status_hook is not None:
2021-08-18 21:10:01 +00:00
status_hook( 'importing to database' )
2021-06-30 21:27:35 +00:00
2022-04-13 21:39:26 +00:00
self._file_import_options.CheckReadyToImport()
2021-06-30 21:27:35 +00:00
self._post_import_file_status = HG.client_controller.WriteSynchronous( 'import_file', self )
else:
self._post_import_file_status = not_ok_file_import_status
else:
2022-05-04 21:40:27 +00:00
# if the file is already in the database but not in all the desired file services, let's push content updates to make it happen
if self._pre_import_file_status.status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:
media_result = HG.client_controller.Read( 'media_result', self._pre_import_file_status.hash )
destination_location_context = self._file_import_options.GetDestinationLocationContext()
desired_file_service_keys = destination_location_context.current_service_keys
current_file_service_keys = media_result.GetLocationsManager().GetCurrent()
file_service_keys_to_add_to = set( desired_file_service_keys ).difference( current_file_service_keys )
if len( file_service_keys_to_add_to ) > 0:
file_info_manager = media_result.GetFileInfoManager()
2023-04-19 20:38:13 +00:00
now = HydrusTime.GetNow()
2022-05-04 21:40:27 +00:00
service_keys_to_content_updates = {}
for service_key in file_service_keys_to_add_to:
service_keys_to_content_updates[ service_key ] = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ADD, ( file_info_manager, now ) ) ]
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
2021-06-30 21:27:35 +00:00
self._post_import_file_status = self._pre_import_file_status.Duplicate()
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job is done, now publishing content updates' )
self.PubsubContentUpdates()
return self._post_import_file_status
2021-08-18 21:10:01 +00:00
def GeneratePreImportHashAndStatus( self, status_hook = None ):
2021-06-30 21:27:35 +00:00
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'calculating hash' )
2021-06-30 21:27:35 +00:00
hash = HydrusFileHandling.GetHashFromPath( self._temp_path )
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job hash: {}'.format( hash.hex() ) )
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'checking for file status' )
2021-06-30 21:27:35 +00:00
self._pre_import_file_status = HG.client_controller.Read( 'hash_status', 'sha256', hash, prefix = 'file recognised' )
2022-12-21 22:00:27 +00:00
if self._pre_import_file_status.hash is None:
self._pre_import_file_status.hash = hash
2021-06-30 21:27:35 +00:00
self._pre_import_file_status = CheckFileImportStatus( self._pre_import_file_status )
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job pre-import status: {}'.format( self._pre_import_file_status.ToString() ) )
2021-08-18 21:10:01 +00:00
def GenerateInfo( self, status_hook = None ):
2021-06-30 21:27:35 +00:00
if self._pre_import_file_status.mime is None:
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'generating filetype' )
2021-06-30 21:27:35 +00:00
mime = HydrusFileHandling.GetMime( self._temp_path )
self._pre_import_file_status.mime = mime
else:
mime = self._pre_import_file_status.mime
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job mime: {}'.format( HC.mime_string_lookup[ mime ] ) )
new_options = HG.client_controller.new_options
if mime in HC.DECOMPRESSION_BOMB_IMAGES and not self._file_import_options.AllowsDecompressionBombs():
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job testing for decompression bomb' )
if HydrusImageHandling.IsDecompressionBomb( self._temp_path ):
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job: it was a decompression bomb' )
raise HydrusExceptions.DecompressionBombException( 'Image seems to be a Decompression Bomb!' )
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'generating file metadata' )
2021-10-13 20:16:57 +00:00
self._file_info = HydrusFileHandling.GetFileInfo( self._temp_path, mime = mime )
2021-06-30 21:27:35 +00:00
( size, mime, width, height, duration, num_frames, has_audio, num_words ) = self._file_info
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job file info: {}'.format( self._file_info ) )
if mime in HC.MIMES_WITH_THUMBNAILS:
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'generating thumbnail' )
2021-06-30 21:27:35 +00:00
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job generating thumbnail' )
bounding_dimensions = HG.client_controller.options[ 'thumbnail_dimensions' ]
2022-02-02 22:14:01 +00:00
thumbnail_scale_type = HG.client_controller.new_options.GetInteger( 'thumbnail_scale_type' )
2022-12-21 22:00:27 +00:00
thumbnail_dpr_percent = HG.client_controller.new_options.GetInteger( 'thumbnail_dpr_percent' )
2021-06-30 21:27:35 +00:00
target_resolution = HydrusImageHandling.GetThumbnailResolution( ( width, height ), bounding_dimensions, thumbnail_scale_type, thumbnail_dpr_percent )
2021-06-30 21:27:35 +00:00
percentage_in = HG.client_controller.new_options.GetInteger( 'video_thumbnail_percentage_in' )
thumbnail_numpy = HydrusFileHandling.GenerateThumbnailNumPy(self._temp_path, target_resolution, mime, duration, num_frames, percentage_in = percentage_in)
2023-09-06 19:49:46 +00:00
# this guy handles almost all his own exceptions now, so no need for clever catching. if it fails, we are prob talking an I/O failure, which is not a 'thumbnail failed' error
2023-09-27 21:12:55 +00:00
self._thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromNumPy( thumbnail_numpy )
try:
2023-09-27 21:12:55 +00:00
2023-10-04 20:51:17 +00:00
self._blurhash = HydrusBlurhash.GetBlurhashFromNumPy( thumbnail_numpy )
2023-09-27 21:12:55 +00:00
except:
2023-09-27 21:12:55 +00:00
pass
2023-09-27 21:12:55 +00:00
2021-06-30 21:27:35 +00:00
2021-12-15 22:16:22 +00:00
if mime in HC.FILES_THAT_HAVE_PERCEPTUAL_HASH:
2021-06-30 21:27:35 +00:00
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'generating similar files metadata' )
2021-06-30 21:27:35 +00:00
if HG.file_import_report_mode:
2021-12-15 22:16:22 +00:00
HydrusData.ShowText( 'File import job generating perceptual_hashes' )
2021-06-30 21:27:35 +00:00
2021-12-15 22:16:22 +00:00
self._perceptual_hashes = ClientImageHandling.GenerateShapePerceptualHashes( self._temp_path, mime )
2021-06-30 21:27:35 +00:00
if HG.file_import_report_mode:
2021-12-15 22:16:22 +00:00
HydrusData.ShowText( 'File import job generated {} perceptual_hashes: {}'.format( len( self._perceptual_hashes ), [ perceptual_hash.hex() for perceptual_hash in self._perceptual_hashes ] ) )
2021-06-30 21:27:35 +00:00
if HG.file_import_report_mode:
HydrusData.ShowText( 'File import job generating other hashes' )
2021-08-18 21:10:01 +00:00
if status_hook is not None:
status_hook( 'generating additional hashes' )
2021-06-30 21:27:35 +00:00
self._extra_hashes = HydrusFileHandling.GetExtraHashesFromPath( self._temp_path )
#
has_exif = False
2023-10-04 20:51:17 +00:00
raw_pil_image = None
if mime in HC.FILES_THAT_CAN_HAVE_EXIF:
try:
2023-10-04 20:51:17 +00:00
if raw_pil_image is None:
raw_pil_image = HydrusImageOpening.RawOpenPILImage( self._temp_path )
has_exif = HydrusImageMetadata.HasEXIF( raw_pil_image )
except:
pass
self._has_exif = has_exif
2023-09-06 19:49:46 +00:00
self._has_human_readable_embedded_metadata = ClientFiles.HasHumanReadableEmbeddedMetadata( self._temp_path, mime )
2021-12-08 22:40:59 +00:00
has_icc_profile = False
if mime in HC.FILES_THAT_CAN_HAVE_ICC_PROFILE:
try:
2023-07-22 23:58:48 +00:00
if mime == HC.APPLICATION_PSD:
2023-10-04 20:51:17 +00:00
2023-07-22 23:58:48 +00:00
has_icc_profile = HydrusPSDHandling.PSDHasICCProfile( self._temp_path )
2023-10-04 20:51:17 +00:00
2023-07-22 23:58:48 +00:00
else:
2023-10-04 20:51:17 +00:00
if raw_pil_image is None:
raw_pil_image = HydrusImageOpening.RawOpenPILImage( self._temp_path )
has_icc_profile = HydrusImageMetadata.HasICCProfile( raw_pil_image )
2021-12-08 22:40:59 +00:00
except:
pass
self._has_icc_profile = has_icc_profile
#
2021-12-08 22:40:59 +00:00
if mime in HC.FILES_THAT_CAN_HAVE_PIXEL_HASH and duration is None:
try:
self._pixel_hash = HydrusImageHandling.GetImagePixelHash( self._temp_path, mime )
except:
pass
2021-06-30 21:27:35 +00:00
self._file_modified_timestamp = HydrusFileHandling.GetFileModifiedTimestamp( self._temp_path )
def GetExtraHashes( self ):
return self._extra_hashes
def GetFileImportOptions( self ):
return self._file_import_options
def GetFileInfo( self ):
return self._file_info
def GetFileModifiedTimestamp( self ):
return self._file_modified_timestamp
def GetHash( self ):
return self._pre_import_file_status.hash
def GetMime( self ):
return self._pre_import_file_status.mime
2021-12-15 22:16:22 +00:00
def GetPerceptualHashes( self ):
2021-06-30 21:27:35 +00:00
2021-12-15 22:16:22 +00:00
return self._perceptual_hashes
2021-06-30 21:27:35 +00:00
2021-12-08 22:40:59 +00:00
def GetPixelHash( self ):
return self._pixel_hash
def HasEXIF( self ) -> bool:
return self._has_exif
def HasHumanReadableEmbeddedMetadata( self ) -> bool:
return self._has_human_readable_embedded_metadata
2021-12-08 22:40:59 +00:00
def HasICCProfile( self ) -> bool:
return self._has_icc_profile
2023-09-27 21:12:55 +00:00
def GetBlurhash( self ) -> str:
2023-09-27 21:12:55 +00:00
return self._blurhash
2021-12-08 22:40:59 +00:00
2021-06-30 21:27:35 +00:00
def PubsubContentUpdates( self ):
if self._post_import_file_status.AlreadyInDB() and self._file_import_options.AutomaticallyArchives():
hashes = { self.GetHash() }
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, hashes ) ] }
HG.client_controller.Write( 'content_updates', service_keys_to_content_updates )
2023-09-27 21:12:55 +00:00