527 lines
17 KiB
Python
527 lines
17 KiB
Python
import typing
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusExceptions
|
|
from hydrus.core import HydrusFileHandling
|
|
from hydrus.core import HydrusImageHandling
|
|
from hydrus.core import HydrusGlobals as HG
|
|
|
|
from hydrus.client import ClientConstants as CC
|
|
from hydrus.client import ClientImageHandling
|
|
from hydrus.client.importing.options import FileImportOptions
|
|
|
|
class FileImportStatus( object ):
|
|
|
|
def __init__( self, status, hash, mime = None, note = '' ):
|
|
|
|
self.status = status
|
|
self.hash = hash
|
|
self.mime = mime
|
|
self.note = note
|
|
|
|
|
|
def __str__( self ):
|
|
|
|
return 'File Import Status: {}'.format( self.ToString() )
|
|
|
|
|
|
def AlreadyInDB( self ):
|
|
|
|
return self.status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT
|
|
|
|
|
|
def Duplicate( self ) -> "FileImportStatus":
|
|
|
|
return FileImportStatus( self.status, self.hash, mime = self.mime, note = self.note )
|
|
|
|
|
|
def ShouldImport( self, file_import_options: FileImportOptions.FileImportOptions ):
|
|
|
|
if self.status == CC.STATUS_UNKNOWN:
|
|
|
|
return True
|
|
|
|
|
|
if self.status == CC.STATUS_DELETED:
|
|
|
|
if not file_import_options.ExcludesDeleted():
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
s = CC.status_string_lookup[ self.status ]
|
|
|
|
if len( self.note ) > 0:
|
|
|
|
s = '{}, {}'.format( s, self.note )
|
|
|
|
|
|
return s
|
|
|
|
|
|
@staticmethod
|
|
def STATICGetUnknownStatus() -> "FileImportStatus":
|
|
|
|
return FileImportStatus( CC.STATUS_UNKNOWN, None )
|
|
|
|
|
|
def CheckFileImportStatus( file_import_status: FileImportStatus ) -> FileImportStatus:
|
|
|
|
if file_import_status.AlreadyInDB():
|
|
|
|
try:
|
|
|
|
hash = file_import_status.hash
|
|
mime = file_import_status.mime
|
|
|
|
if hash is None or mime is None:
|
|
|
|
return file_import_status
|
|
|
|
|
|
HG.client_controller.client_files_manager.GetFilePath( hash, mime = mime )
|
|
|
|
except HydrusExceptions.FileMissingException:
|
|
|
|
note = 'The client believed this file was already in the db, but it was truly missing! Import will go ahead, in an attempt to fix the situation.'
|
|
|
|
return FileImportStatus( CC.STATUS_UNKNOWN, hash, mime = mime, note = note )
|
|
|
|
|
|
|
|
return file_import_status
|
|
|
|
class FileImportJob( object ):
|
|
|
|
def __init__( self, temp_path: str, file_import_options: FileImportOptions.FileImportOptions ):
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job created for path {}.'.format( temp_path ) )
|
|
|
|
|
|
self._temp_path = temp_path
|
|
self._file_import_options = file_import_options
|
|
|
|
self._pre_import_file_status = FileImportStatus.STATICGetUnknownStatus()
|
|
self._post_import_file_status = FileImportStatus.STATICGetUnknownStatus()
|
|
|
|
self._file_info = None
|
|
self._thumbnail_bytes = None
|
|
self._perceptual_hashes = None
|
|
self._extra_hashes = None
|
|
self._has_exif = None
|
|
self._has_human_readable_embedded_metadata = None
|
|
self._has_icc_profile = None
|
|
self._pixel_hash = None
|
|
self._file_modified_timestamp = None
|
|
|
|
|
|
def CheckIsGoodToImport( self ):
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job testing if good to import for file import options' )
|
|
|
|
|
|
( size, mime, width, height, duration, num_frames, has_audio, num_words ) = self._file_info
|
|
|
|
self._file_import_options.CheckFileIsValid( size, mime, width, height )
|
|
|
|
|
|
def DoWork( self, status_hook = None ) -> FileImportStatus:
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job starting work.' )
|
|
|
|
|
|
self.GeneratePreImportHashAndStatus( status_hook = status_hook )
|
|
|
|
if self._pre_import_file_status.ShouldImport( self._file_import_options ):
|
|
|
|
self.GenerateInfo( status_hook = status_hook )
|
|
|
|
try:
|
|
|
|
self.CheckIsGoodToImport()
|
|
|
|
ok_to_go = True
|
|
|
|
except HydrusExceptions.FileImportRulesException as e:
|
|
|
|
ok_to_go = False
|
|
|
|
not_ok_file_import_status = self._pre_import_file_status.Duplicate()
|
|
|
|
not_ok_file_import_status.status = CC.STATUS_VETOED
|
|
not_ok_file_import_status.note = str( e )
|
|
|
|
|
|
if ok_to_go:
|
|
|
|
hash = self._pre_import_file_status.hash
|
|
mime = self._pre_import_file_status.mime
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'copying file into file storage' )
|
|
|
|
|
|
HG.client_controller.client_files_manager.AddFile( hash, mime, self._temp_path, thumbnail_bytes = self._thumbnail_bytes )
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'importing to database' )
|
|
|
|
|
|
self._file_import_options.CheckReadyToImport()
|
|
|
|
self._post_import_file_status = HG.client_controller.WriteSynchronous( 'import_file', self )
|
|
|
|
else:
|
|
|
|
self._post_import_file_status = not_ok_file_import_status
|
|
|
|
|
|
else:
|
|
|
|
# if the file is already in the database but not in all the desired file services, let's push content updates to make it happen
|
|
if self._pre_import_file_status.status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:
|
|
|
|
media_result = HG.client_controller.Read( 'media_result', self._pre_import_file_status.hash )
|
|
|
|
destination_location_context = self._file_import_options.GetDestinationLocationContext()
|
|
|
|
desired_file_service_keys = destination_location_context.current_service_keys
|
|
current_file_service_keys = media_result.GetLocationsManager().GetCurrent()
|
|
|
|
file_service_keys_to_add_to = set( desired_file_service_keys ).difference( current_file_service_keys )
|
|
|
|
if len( file_service_keys_to_add_to ) > 0:
|
|
|
|
file_info_manager = media_result.GetFileInfoManager()
|
|
now = HydrusData.GetNow()
|
|
|
|
service_keys_to_content_updates = {}
|
|
|
|
for service_key in file_service_keys_to_add_to:
|
|
|
|
service_keys_to_content_updates[ service_key ] = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ADD, ( file_info_manager, now ) ) ]
|
|
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
|
|
self._post_import_file_status = self._pre_import_file_status.Duplicate()
|
|
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job is done, now publishing content updates' )
|
|
|
|
|
|
self.PubsubContentUpdates()
|
|
|
|
return self._post_import_file_status
|
|
|
|
|
|
def GeneratePreImportHashAndStatus( self, status_hook = None ):
|
|
|
|
HydrusImageHandling.ConvertToPNGIfBMP( self._temp_path )
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'calculating hash' )
|
|
|
|
|
|
hash = HydrusFileHandling.GetHashFromPath( self._temp_path )
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job hash: {}'.format( hash.hex() ) )
|
|
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'checking for file status' )
|
|
|
|
|
|
self._pre_import_file_status = HG.client_controller.Read( 'hash_status', 'sha256', hash, prefix = 'file recognised' )
|
|
|
|
if self._pre_import_file_status.hash is None:
|
|
|
|
self._pre_import_file_status.hash = hash
|
|
|
|
|
|
self._pre_import_file_status = CheckFileImportStatus( self._pre_import_file_status )
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job pre-import status: {}'.format( self._pre_import_file_status.ToString() ) )
|
|
|
|
|
|
|
|
def GenerateInfo( self, status_hook = None ):
|
|
|
|
if self._pre_import_file_status.mime is None:
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'generating filetype' )
|
|
|
|
|
|
mime = HydrusFileHandling.GetMime( self._temp_path )
|
|
|
|
self._pre_import_file_status.mime = mime
|
|
|
|
else:
|
|
|
|
mime = self._pre_import_file_status.mime
|
|
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job mime: {}'.format( HC.mime_string_lookup[ mime ] ) )
|
|
|
|
|
|
new_options = HG.client_controller.new_options
|
|
|
|
if mime in HC.DECOMPRESSION_BOMB_IMAGES and not self._file_import_options.AllowsDecompressionBombs():
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job testing for decompression bomb' )
|
|
|
|
|
|
if HydrusImageHandling.IsDecompressionBomb( self._temp_path ):
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job: it was a decompression bomb' )
|
|
|
|
|
|
raise HydrusExceptions.DecompressionBombException( 'Image seems to be a Decompression Bomb!' )
|
|
|
|
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'generating file metadata' )
|
|
|
|
|
|
self._file_info = HydrusFileHandling.GetFileInfo( self._temp_path, mime = mime )
|
|
|
|
( size, mime, width, height, duration, num_frames, has_audio, num_words ) = self._file_info
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job file info: {}'.format( self._file_info ) )
|
|
|
|
|
|
if mime in HC.MIMES_WITH_THUMBNAILS:
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'generating thumbnail' )
|
|
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job generating thumbnail' )
|
|
|
|
|
|
bounding_dimensions = HG.client_controller.options[ 'thumbnail_dimensions' ]
|
|
thumbnail_scale_type = HG.client_controller.new_options.GetInteger( 'thumbnail_scale_type' )
|
|
thumbnail_dpr_percent = HG.client_controller.new_options.GetInteger( 'thumbnail_dpr_percent' )
|
|
|
|
( clip_rect, target_resolution ) = HydrusImageHandling.GetThumbnailResolutionAndClipRegion( ( width, height ), bounding_dimensions, thumbnail_scale_type, thumbnail_dpr_percent )
|
|
|
|
percentage_in = HG.client_controller.new_options.GetInteger( 'video_thumbnail_percentage_in' )
|
|
|
|
try:
|
|
|
|
self._thumbnail_bytes = HydrusFileHandling.GenerateThumbnailBytes( self._temp_path, target_resolution, mime, duration, num_frames, clip_rect = clip_rect, percentage_in = percentage_in )
|
|
|
|
except Exception as e:
|
|
|
|
raise HydrusExceptions.DamagedOrUnusualFileException( 'Could not render a thumbnail: {}'.format( str( e ) ) )
|
|
|
|
|
|
|
|
if mime in HC.FILES_THAT_HAVE_PERCEPTUAL_HASH:
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'generating similar files metadata' )
|
|
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job generating perceptual_hashes' )
|
|
|
|
|
|
self._perceptual_hashes = ClientImageHandling.GenerateShapePerceptualHashes( self._temp_path, mime )
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job generated {} perceptual_hashes: {}'.format( len( self._perceptual_hashes ), [ perceptual_hash.hex() for perceptual_hash in self._perceptual_hashes ] ) )
|
|
|
|
|
|
|
|
if HG.file_import_report_mode:
|
|
|
|
HydrusData.ShowText( 'File import job generating other hashes' )
|
|
|
|
|
|
if status_hook is not None:
|
|
|
|
status_hook( 'generating additional hashes' )
|
|
|
|
|
|
self._extra_hashes = HydrusFileHandling.GetExtraHashesFromPath( self._temp_path )
|
|
|
|
#
|
|
|
|
has_exif = False
|
|
|
|
if mime in HC.FILES_THAT_CAN_HAVE_EXIF:
|
|
|
|
try:
|
|
|
|
has_exif = HydrusImageHandling.HasEXIF( self._temp_path )
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
self._has_exif = has_exif
|
|
|
|
has_human_readable_embedded_metadata = False
|
|
|
|
if mime in HC.FILES_THAT_CAN_HAVE_HUMAN_READABLE_EMBEDDED_METADATA:
|
|
|
|
try:
|
|
|
|
has_human_readable_embedded_metadata = HydrusImageHandling.HasHumanReadableEmbeddedMetadata( self._temp_path )
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
self._has_human_readable_embedded_metadata = has_human_readable_embedded_metadata
|
|
|
|
has_icc_profile = False
|
|
|
|
if mime in HC.FILES_THAT_CAN_HAVE_ICC_PROFILE:
|
|
|
|
try:
|
|
|
|
pil_image = HydrusImageHandling.RawOpenPILImage( self._temp_path )
|
|
|
|
has_icc_profile = HydrusImageHandling.HasICCProfile( pil_image )
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
self._has_icc_profile = has_icc_profile
|
|
|
|
#
|
|
|
|
if mime in HC.FILES_THAT_CAN_HAVE_PIXEL_HASH and duration is None:
|
|
|
|
try:
|
|
|
|
self._pixel_hash = HydrusImageHandling.GetImagePixelHash( self._temp_path, mime )
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
self._file_modified_timestamp = HydrusFileHandling.GetFileModifiedTimestamp( self._temp_path )
|
|
|
|
|
|
def GetExtraHashes( self ):
|
|
|
|
return self._extra_hashes
|
|
|
|
|
|
def GetFileImportOptions( self ):
|
|
|
|
return self._file_import_options
|
|
|
|
|
|
def GetFileInfo( self ):
|
|
|
|
return self._file_info
|
|
|
|
|
|
def GetFileModifiedTimestamp( self ):
|
|
|
|
return self._file_modified_timestamp
|
|
|
|
|
|
def GetHash( self ):
|
|
|
|
return self._pre_import_file_status.hash
|
|
|
|
|
|
def GetMime( self ):
|
|
|
|
return self._pre_import_file_status.mime
|
|
|
|
|
|
def GetPerceptualHashes( self ):
|
|
|
|
return self._perceptual_hashes
|
|
|
|
|
|
def GetPixelHash( self ):
|
|
|
|
return self._pixel_hash
|
|
|
|
|
|
def HasEXIF( self ) -> bool:
|
|
|
|
return self._has_exif
|
|
|
|
|
|
def HasHumanReadableEmbeddedMetadata( self ) -> bool:
|
|
|
|
return self._has_human_readable_embedded_metadata
|
|
|
|
|
|
def HasICCProfile( self ) -> bool:
|
|
|
|
return self._has_icc_profile
|
|
|
|
|
|
def PubsubContentUpdates( self ):
|
|
|
|
if self._post_import_file_status.AlreadyInDB() and self._file_import_options.AutomaticallyArchives():
|
|
|
|
hashes = { self.GetHash() }
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, hashes ) ] }
|
|
|
|
HG.client_controller.Write( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|