hydrus/hydrus/client/ClientDuplicates.py

1252 lines
47 KiB
Python

import collections
import threading
import time
import typing
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusImageHandling
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusTags
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientData
from hydrus.client import ClientThreading
from hydrus.client.importing.options import NoteImportOptions
from hydrus.client.media import ClientMedia
from hydrus.client.metadata import ClientTags
hashes_to_jpeg_quality = {}
hashes_to_pixel_hashes = {}
def GetDuplicateComparisonScore( shown_media, comparison_media ):
statements_and_scores = GetDuplicateComparisonStatements( shown_media, comparison_media )
total_score = sum( ( score for ( statement, score ) in statements_and_scores.values() ) )
return total_score
# TODO: ok, let's make an enum here at some point and a DuplicateComparisonSetting serialisable object
# Then we can attach 'show/hide' boolean and allow editable scores and whatnot in a nice class that will one day evolve the enum to an editable MetadataConditional/MetadataComparison object
# also have banding so we can have 'at this filesize difference, score 10, at this, score 15'
# show it in a listctrl or whatever in the options, ditch the hardcoding
# metadatacomparison needs to handle 'if one is a png and one is a jpeg', and then orient to A/B and give it a score
def GetDuplicateComparisonStatements( shown_media, comparison_media ):
new_options = HG.client_controller.new_options
duplicate_comparison_score_higher_jpeg_quality = new_options.GetInteger( 'duplicate_comparison_score_higher_jpeg_quality' )
duplicate_comparison_score_much_higher_jpeg_quality = new_options.GetInteger( 'duplicate_comparison_score_much_higher_jpeg_quality' )
duplicate_comparison_score_higher_filesize = new_options.GetInteger( 'duplicate_comparison_score_higher_filesize' )
duplicate_comparison_score_much_higher_filesize = new_options.GetInteger( 'duplicate_comparison_score_much_higher_filesize' )
duplicate_comparison_score_higher_resolution = new_options.GetInteger( 'duplicate_comparison_score_higher_resolution' )
duplicate_comparison_score_much_higher_resolution = new_options.GetInteger( 'duplicate_comparison_score_much_higher_resolution' )
duplicate_comparison_score_more_tags = new_options.GetInteger( 'duplicate_comparison_score_more_tags' )
duplicate_comparison_score_older = new_options.GetInteger( 'duplicate_comparison_score_older' )
duplicate_comparison_score_nicer_ratio = new_options.GetInteger( 'duplicate_comparison_score_nicer_ratio' )
#
statements_and_scores = {}
s_hash = shown_media.GetHash()
c_hash = comparison_media.GetHash()
s_mime = shown_media.GetMime()
c_mime = comparison_media.GetMime()
# size
s_size = shown_media.GetSize()
c_size = comparison_media.GetSize()
is_a_pixel_dupe = False
if shown_media.IsStaticImage() and comparison_media.IsStaticImage() and shown_media.GetResolution() == comparison_media.GetResolution():
global hashes_to_pixel_hashes
if s_hash not in hashes_to_pixel_hashes:
path = HG.client_controller.client_files_manager.GetFilePath( s_hash, s_mime )
hashes_to_pixel_hashes[ s_hash ] = HydrusImageHandling.GetImagePixelHash( path, s_mime )
if c_hash not in hashes_to_pixel_hashes:
path = HG.client_controller.client_files_manager.GetFilePath( c_hash, c_mime )
hashes_to_pixel_hashes[ c_hash ] = HydrusImageHandling.GetImagePixelHash( path, c_mime )
s_pixel_hash = hashes_to_pixel_hashes[ s_hash ]
c_pixel_hash = hashes_to_pixel_hashes[ c_hash ]
if s_pixel_hash == c_pixel_hash:
is_a_pixel_dupe = True
if s_mime == HC.IMAGE_PNG and c_mime != HC.IMAGE_PNG:
statement = 'this is a pixel-for-pixel duplicate png!'
score = -100
elif s_mime != HC.IMAGE_PNG and c_mime == HC.IMAGE_PNG:
statement = 'other file is a pixel-for-pixel duplicate png!'
score = 100
else:
statement = 'images are pixel-for-pixel duplicates!'
score = 0
statements_and_scores[ 'pixel_duplicates' ] = ( statement, score )
if s_size != c_size:
absolute_size_ratio = max( s_size, c_size ) / min( s_size, c_size )
if absolute_size_ratio > 2.0:
if s_size > c_size:
operator = '>>'
score = duplicate_comparison_score_much_higher_filesize
else:
operator = '<<'
score = -duplicate_comparison_score_much_higher_filesize
elif absolute_size_ratio > 1.05:
if s_size > c_size:
operator = '>'
score = duplicate_comparison_score_higher_filesize
else:
operator = '<'
score = -duplicate_comparison_score_higher_filesize
else:
operator = CC.UNICODE_ALMOST_EQUAL_TO
score = 0
if s_size > c_size:
sign = '+'
percentage_difference = ( s_size / c_size ) - 1.0
else:
sign = ''
percentage_difference = ( s_size / c_size ) - 1.0
percentage_different_string = ' ({}{})'.format( sign, HydrusData.ConvertFloatToPercentage( percentage_difference ) )
if is_a_pixel_dupe:
score = 0
statement = '{} {} {}{}'.format( HydrusData.ToHumanBytes( s_size ), operator, HydrusData.ToHumanBytes( c_size ), percentage_different_string )
statements_and_scores[ 'filesize' ] = ( statement, score )
# higher/same res
s_resolution = shown_media.GetResolution()
c_resolution = comparison_media.GetResolution()
if s_resolution != c_resolution:
( s_w, s_h ) = s_resolution
( c_w, c_h ) = c_resolution
all_measurements_are_good = None not in ( s_w, s_h, c_w, c_h ) and True not in ( d <= 0 for d in ( s_w, s_h, c_w, c_h ) )
if all_measurements_are_good:
resolution_ratio = ( s_w * s_h ) / ( c_w * c_h )
if resolution_ratio == 1.0:
operator = '!='
score = 0
elif resolution_ratio > 2.0:
operator = '>>'
score = duplicate_comparison_score_much_higher_resolution
elif resolution_ratio > 1.00:
operator = '>'
score = duplicate_comparison_score_higher_resolution
elif resolution_ratio < 0.5:
operator = '<<'
score = -duplicate_comparison_score_much_higher_resolution
else:
operator = '<'
score = -duplicate_comparison_score_higher_resolution
if s_resolution in HC.NICE_RESOLUTIONS:
s_string = HC.NICE_RESOLUTIONS[ s_resolution ]
else:
s_string = HydrusData.ConvertResolutionToPrettyString( s_resolution )
if s_w % 2 == 1 or s_h % 2 == 1:
s_string += ' (unusual)'
if c_resolution in HC.NICE_RESOLUTIONS:
c_string = HC.NICE_RESOLUTIONS[ c_resolution ]
else:
c_string = HydrusData.ConvertResolutionToPrettyString( c_resolution )
if c_w % 2 == 1 or c_h % 2 == 1:
c_string += ' (unusual)'
statement = '{} {} {}'.format( s_string, operator, c_string )
statements_and_scores[ 'resolution' ] = ( statement, score )
#
s_ratio = s_w / s_h
c_ratio = c_w / c_h
s_nice = s_ratio in HC.NICE_RATIOS
c_nice = c_ratio in HC.NICE_RATIOS
if s_nice or c_nice:
if s_nice:
s_string = HC.NICE_RATIOS[ s_ratio ]
else:
s_string = 'unusual'
if c_nice:
c_string = HC.NICE_RATIOS[ c_ratio ]
else:
c_string = 'unusual'
if s_nice and c_nice:
operator = '-'
score = 0
elif s_nice:
operator = '>'
score = duplicate_comparison_score_nicer_ratio
elif c_nice:
operator = '<'
score = -duplicate_comparison_score_nicer_ratio
if s_string == c_string:
statement = 'both {}'.format( s_string )
else:
statement = '{} {} {}'.format( s_string, operator, c_string )
statements_and_scores[ 'ratio' ] = ( statement, score )
# same/diff mime
if s_mime != c_mime:
statement = '{} vs {}'.format( HC.mime_string_lookup[ s_mime ], HC.mime_string_lookup[ c_mime ] )
score = 0
statements_and_scores[ 'mime' ] = ( statement, score )
# more tags
s_num_tags = len( shown_media.GetTagsManager().GetCurrentAndPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL ) )
c_num_tags = len( comparison_media.GetTagsManager().GetCurrentAndPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL ) )
if s_num_tags != c_num_tags:
if s_num_tags > 0 and c_num_tags > 0:
if s_num_tags > c_num_tags:
operator = '>'
score = duplicate_comparison_score_more_tags
else:
operator = '<'
score = -duplicate_comparison_score_more_tags
elif s_num_tags > 0:
operator = '>>'
score = duplicate_comparison_score_more_tags
elif c_num_tags > 0:
operator = '<<'
score = -duplicate_comparison_score_more_tags
statement = '{} tags {} {} tags'.format( HydrusData.ToHumanInt( s_num_tags ), operator, HydrusData.ToHumanInt( c_num_tags ) )
statements_and_scores[ 'num_tags' ] = ( statement, score )
# older
s_ts = shown_media.GetLocationsManager().GetCurrentTimestamp( CC.COMBINED_LOCAL_FILE_SERVICE_KEY )
c_ts = comparison_media.GetLocationsManager().GetCurrentTimestamp( CC.COMBINED_LOCAL_FILE_SERVICE_KEY )
one_month = 86400 * 30
if s_ts is not None and c_ts is not None and abs( s_ts - c_ts ) > one_month:
if s_ts < c_ts:
operator = 'older than'
score = duplicate_comparison_score_older
else:
operator = 'newer than'
score = -duplicate_comparison_score_older
if is_a_pixel_dupe:
score = 0
statement = '{}, {} {}'.format( ClientData.TimestampToPrettyTimeDelta( s_ts, history_suffix = ' old' ), operator, ClientData.TimestampToPrettyTimeDelta( c_ts, history_suffix = ' old' ) )
statements_and_scores[ 'time_imported' ] = ( statement, score )
if s_mime == HC.IMAGE_JPEG and c_mime == HC.IMAGE_JPEG:
global hashes_to_jpeg_quality
if s_hash not in hashes_to_jpeg_quality:
path = HG.client_controller.client_files_manager.GetFilePath( s_hash, s_mime )
hashes_to_jpeg_quality[ s_hash ] = HydrusImageHandling.GetJPEGQuantizationQualityEstimate( path )
if c_hash not in hashes_to_jpeg_quality:
path = HG.client_controller.client_files_manager.GetFilePath( c_hash, c_mime )
hashes_to_jpeg_quality[ c_hash ] = HydrusImageHandling.GetJPEGQuantizationQualityEstimate( path )
( s_label, s_jpeg_quality ) = hashes_to_jpeg_quality[ s_hash ]
( c_label, c_jpeg_quality ) = hashes_to_jpeg_quality[ c_hash ]
score = 0
if s_label != c_label:
if c_jpeg_quality is None or s_jpeg_quality is None:
score = 0
else:
# other way around, low score is good here
quality_ratio = c_jpeg_quality / s_jpeg_quality
if quality_ratio > 2.0:
score = duplicate_comparison_score_much_higher_jpeg_quality
elif quality_ratio > 1.0:
score = duplicate_comparison_score_higher_jpeg_quality
elif quality_ratio < 0.5:
score = -duplicate_comparison_score_much_higher_jpeg_quality
else:
score = -duplicate_comparison_score_higher_jpeg_quality
statement = '{} vs {} jpeg quality'.format( s_label, c_label )
statements_and_scores[ 'jpeg_quality' ] = ( statement, score )
def has_exif( m ):
try:
hash = m.GetHash()
mime = m.GetMime()
if mime not in ( HC.IMAGE_JPEG, HC.IMAGE_TIFF ):
return False
path = HG.client_controller.client_files_manager.GetFilePath( hash, mime )
pil_image = HydrusImageHandling.RawOpenPILImage( path )
exif_dict = HydrusImageHandling.GetEXIFDict( pil_image )
if exif_dict is None:
return False
return len( exif_dict ) > 0
except:
return False
s_has_exif = has_exif( shown_media )
c_has_exif = has_exif( comparison_media )
if s_has_exif ^ c_has_exif:
if s_has_exif:
exif_statement = 'has exif data, the other does not'
else:
exif_statement = 'the other has exif data, this does not'
statements_and_scores[ 'exif_data' ] = ( exif_statement, 0 )
s_has_human_readable_embedded_metadata = shown_media.GetMediaResult().GetFileInfoManager().has_human_readable_embedded_metadata
c_has_human_readable_embedded_metadata = comparison_media.GetMediaResult().GetFileInfoManager().has_human_readable_embedded_metadata
if s_has_human_readable_embedded_metadata ^ c_has_human_readable_embedded_metadata:
if s_has_human_readable_embedded_metadata:
embedded_metadata_statement = 'has embedded metadata, the other does not'
else:
embedded_metadata_statement = 'the other has embedded metadata, this does not'
statements_and_scores[ 'embedded_metadata' ] = ( embedded_metadata_statement, 0 )
s_has_icc = shown_media.GetMediaResult().GetFileInfoManager().has_icc_profile
c_has_icc = comparison_media.GetMediaResult().GetFileInfoManager().has_icc_profile
if s_has_icc ^ c_has_icc:
if s_has_icc:
icc_statement = 'has icc profile, the other does not'
else:
icc_statement = 'the other has icc profile, this does not'
statements_and_scores[ 'icc_profile' ] = ( icc_statement, 0 )
return statements_and_scores
class DuplicatesManager( object ):
my_instance = None
def __init__( self ):
DuplicatesManager.my_instance = self
self._similar_files_maintenance_status = None
self._currently_refreshing_maintenance_numbers = False
self._refresh_maintenance_numbers = True
self._currently_doing_potentials_search = False
self._lock = threading.Lock()
@staticmethod
def instance() -> 'DuplicatesManager':
if DuplicatesManager.my_instance is None:
DuplicatesManager()
return DuplicatesManager.my_instance
def GetMaintenanceNumbers( self ):
with self._lock:
if self._refresh_maintenance_numbers and not self._currently_refreshing_maintenance_numbers:
self._refresh_maintenance_numbers = False
self._currently_refreshing_maintenance_numbers = True
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
HG.client_controller.CallToThread( self.THREADRefreshMaintenanceNumbers )
return ( self._similar_files_maintenance_status, self._currently_refreshing_maintenance_numbers, self._currently_doing_potentials_search )
def RefreshMaintenanceNumbers( self ):
with self._lock:
self._refresh_maintenance_numbers = True
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
def NotifyNewPotentialsSearchNumbers( self ):
HG.client_controller.pub( 'new_similar_files_potentials_search_numbers' )
def StartPotentialsSearch( self ):
with self._lock:
if self._currently_doing_potentials_search or self._similar_files_maintenance_status is None:
return
self._currently_doing_potentials_search = True
HG.client_controller.CallToThreadLongRunning( self.THREADSearchPotentials )
def THREADRefreshMaintenanceNumbers( self ):
try:
similar_files_maintenance_status = HG.client_controller.Read( 'similar_files_maintenance_status' )
with self._lock:
self._similar_files_maintenance_status = similar_files_maintenance_status
if self._refresh_maintenance_numbers:
self._refresh_maintenance_numbers = False
HG.client_controller.CallToThread( self.THREADRefreshMaintenanceNumbers )
else:
self._currently_refreshing_maintenance_numbers = False
self._refresh_maintenance_numbers = False
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
except:
self._currently_refreshing_maintenance_numbers = False
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
raise
def THREADSearchPotentials( self ):
try:
search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance' )
with self._lock:
if self._similar_files_maintenance_status is None:
return
searched_distances_to_count = self._similar_files_maintenance_status
total_num_files = sum( searched_distances_to_count.values() )
num_searched = sum( ( count for ( value, count ) in searched_distances_to_count.items() if value is not None and value >= search_distance ) )
all_files_searched = num_searched >= total_num_files
if all_files_searched:
return # no work to do
num_searched_estimate = num_searched
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
job_key = ClientThreading.JobKey( cancellable = True )
job_key.SetStatusTitle( 'searching for potential duplicates' )
HG.client_controller.pub( 'message', job_key )
still_work_to_do = True
while still_work_to_do:
search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance' )
start_time = HydrusData.GetNowPrecise()
( still_work_to_do, num_done ) = HG.client_controller.WriteSynchronous( 'maintain_similar_files_search_for_potential_duplicates', search_distance, maintenance_mode = HC.MAINTENANCE_FORCED, job_key = job_key, work_time_float = 0.5 )
time_it_took = HydrusData.GetNowPrecise() - start_time
num_searched_estimate += num_done
if num_searched_estimate > total_num_files:
similar_files_maintenance_status = HG.client_controller.Read( 'similar_files_maintenance_status' )
if similar_files_maintenance_status is None:
break
with self._lock:
self._similar_files_maintenance_status = similar_files_maintenance_status
searched_distances_to_count = self._similar_files_maintenance_status
total_num_files = max( num_searched_estimate, sum( searched_distances_to_count.values() ) )
text = 'searching: {}'.format( HydrusData.ConvertValueRangeToPrettyString( num_searched_estimate, total_num_files ) )
job_key.SetVariable( 'popup_text_1', text )
job_key.SetVariable( 'popup_gauge_1', ( num_searched_estimate, total_num_files ) )
if job_key.IsCancelled() or HG.model_shutdown:
break
time.sleep( min( 5, time_it_took ) ) # ideally 0.5s, but potentially longer
job_key.Delete()
finally:
with self._lock:
self._currently_doing_potentials_search = False
self.RefreshMaintenanceNumbers()
self.NotifyNewPotentialsSearchNumbers()
SYNC_ARCHIVE_NONE = 0
SYNC_ARCHIVE_IF_ONE_DO_BOTH = 1
SYNC_ARCHIVE_DO_BOTH_REGARDLESS = 2
class DuplicateContentMergeOptions( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_DUPLICATE_CONTENT_MERGE_OPTIONS
SERIALISABLE_NAME = 'Duplicate Content Merge Options'
SERIALISABLE_VERSION = 6
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._tag_service_actions = []
self._rating_service_actions = []
self._sync_notes_action = HC.CONTENT_MERGE_ACTION_NONE
self._sync_note_import_options = NoteImportOptions.NoteImportOptions()
self._sync_archive_action = SYNC_ARCHIVE_NONE
self._sync_urls_action = HC.CONTENT_MERGE_ACTION_NONE
def _GetSerialisableInfo( self ):
if HG.client_controller.IsBooted():
services_manager = HG.client_controller.services_manager
self._tag_service_actions = [ ( service_key, action, tag_filter ) for ( service_key, action, tag_filter ) in self._tag_service_actions if services_manager.ServiceExists( service_key ) and services_manager.GetServiceType( service_key ) in HC.REAL_TAG_SERVICES ]
self._rating_service_actions = [ ( service_key, action ) for ( service_key, action ) in self._rating_service_actions if services_manager.ServiceExists( service_key ) and services_manager.GetServiceType( service_key ) in ( HC.LOCAL_RATING_LIKE, HC.LOCAL_RATING_NUMERICAL ) ]
serialisable_tag_service_actions = [ ( service_key.hex(), action, tag_filter.GetSerialisableTuple() ) for ( service_key, action, tag_filter ) in self._tag_service_actions ]
serialisable_rating_service_actions = [ ( service_key.hex(), action ) for ( service_key, action ) in self._rating_service_actions ]
serialisable_sync_note_import_options = self._sync_note_import_options.GetSerialisableTuple()
return ( serialisable_tag_service_actions, serialisable_rating_service_actions, self._sync_notes_action, serialisable_sync_note_import_options, self._sync_archive_action, self._sync_urls_action )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( serialisable_tag_service_actions, serialisable_rating_service_actions, self._sync_notes_action, serialisable_sync_note_import_options, self._sync_archive_action, self._sync_urls_action ) = serialisable_info
self._tag_service_actions = [ ( bytes.fromhex( serialisable_service_key ), action, HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_filter ) ) for ( serialisable_service_key, action, serialisable_tag_filter ) in serialisable_tag_service_actions ]
self._rating_service_actions = [ ( bytes.fromhex( serialisable_service_key ), action ) for ( serialisable_service_key, action ) in serialisable_rating_service_actions ]
self._sync_note_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_sync_note_import_options )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( serialisable_service_actions, delete_second_file ) = old_serialisable_info
tag_service_actions = []
rating_service_actions = []
# As the client isn't booted when this is loaded in options, there isn't a good way to figure out tag from rating
# So, let's just dupe and purge later on, in serialisation
for ( service_key_encoded, action ) in serialisable_service_actions:
service_key = bytes.fromhex( service_key_encoded )
tag_filter = HydrusTags.TagFilter()
tag_service_actions.append( ( service_key, action, tag_filter ) )
rating_service_actions.append( ( service_key, action ) )
serialisable_tag_service_actions = [ ( service_key.hex(), action, tag_filter.GetSerialisableTuple() ) for ( service_key, action, tag_filter ) in tag_service_actions ]
serialisable_rating_service_actions = [ ( service_key.hex(), action ) for ( service_key, action ) in rating_service_actions ]
sync_archive = delete_second_file
delete_both_files = False
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files )
return ( 2, new_serialisable_info )
if version == 2:
( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files ) = old_serialisable_info
sync_urls_action = None
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files, sync_urls_action )
return ( 3, new_serialisable_info )
if version == 3:
( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files, sync_urls_action ) = old_serialisable_info
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive, sync_urls_action )
return ( 4, new_serialisable_info )
if version == 4:
( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive, sync_urls_action ) = old_serialisable_info
if sync_archive:
sync_archive_action = SYNC_ARCHIVE_IF_ONE_DO_BOTH
else:
sync_archive_action = SYNC_ARCHIVE_NONE
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive_action, sync_urls_action )
return ( 5, new_serialisable_info )
if version == 5:
( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive_action, sync_urls_action ) = old_serialisable_info
if sync_urls_action is None:
sync_urls_action = HC.CONTENT_MERGE_ACTION_NONE
sync_notes_action = HC.CONTENT_MERGE_ACTION_NONE
sync_note_import_options = NoteImportOptions.NoteImportOptions()
serialisable_sync_note_import_options = sync_note_import_options.GetSerialisableTuple()
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_notes_action, serialisable_sync_note_import_options, sync_archive_action, sync_urls_action )
return ( 6, new_serialisable_info )
def GetRatingServiceActions( self ) -> typing.Collection[ tuple ]:
return self._rating_service_actions
def GetTagServiceActions( self ) -> typing.Collection[ tuple ]:
return self._tag_service_actions
def GetSyncArchiveAction( self ) -> int:
return self._sync_archive_action
def GetSyncNotesAction( self ) -> int:
return self._sync_notes_action
def GetSyncNoteImportOptions( self ) -> NoteImportOptions.NoteImportOptions:
return self._sync_note_import_options
def GetSyncURLsAction( self ) -> int:
return self._sync_urls_action
def SetRatingServiceActions( self, rating_service_actions: typing.Collection[ tuple ] ):
self._rating_service_actions = rating_service_actions
def SetTagServiceActions( self, tag_service_actions: typing.Collection[ tuple ] ):
self._tag_service_actions = tag_service_actions
def SetSyncArchiveAction( self, sync_archive_action: int ):
self._sync_archive_action = sync_archive_action
def SetSyncNotesAction( self, sync_notes_action: int ):
self._sync_notes_action = sync_notes_action
def SetSyncNoteImportOptions( self, sync_note_import_options: NoteImportOptions.NoteImportOptions ):
self._sync_note_import_options = sync_note_import_options
def SetSyncURLsAction( self, sync_urls_action: int ):
self._sync_urls_action = sync_urls_action
def ProcessPairIntoContentUpdates( self, first_media: ClientMedia.MediaSingleton, second_media: ClientMedia.MediaSingleton, delete_first = False, delete_second = False, file_deletion_reason = None, do_not_do_deletes = False ):
if file_deletion_reason is None:
file_deletion_reason = 'unknown reason'
service_keys_to_content_updates = collections.defaultdict( list )
first_hash = first_media.GetHash()
second_hash = second_media.GetHash()
first_hashes = { first_hash }
second_hashes = { second_hash }
first_media_result = first_media.GetMediaResult()
second_media_result = second_media.GetMediaResult()
#
services_manager = HG.client_controller.services_manager
for ( service_key, action, tag_filter ) in self._tag_service_actions:
content_updates = []
try:
service = services_manager.GetService( service_key )
except HydrusExceptions.DataMissing:
continue
service_type = service.GetServiceType()
if service_type == HC.LOCAL_TAG:
add_content_action = HC.CONTENT_UPDATE_ADD
elif service_type == HC.TAG_REPOSITORY:
add_content_action = HC.CONTENT_UPDATE_PEND
else:
continue
first_tags = first_media.GetTagsManager().GetCurrentAndPending( service_key, ClientTags.TAG_DISPLAY_STORAGE )
second_tags = second_media.GetTagsManager().GetCurrentAndPending( service_key, ClientTags.TAG_DISPLAY_STORAGE )
first_tags = tag_filter.Filter( first_tags )
second_tags = tag_filter.Filter( second_tags )
if action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
first_needs = second_tags.difference( first_tags )
second_needs = first_tags.difference( second_tags )
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) )
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, second_hashes ) ) for tag in second_needs ) )
elif action == HC.CONTENT_MERGE_ACTION_COPY:
first_needs = second_tags.difference( first_tags )
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) )
elif service_type == HC.LOCAL_TAG and action == HC.CONTENT_MERGE_ACTION_MOVE:
first_needs = second_tags.difference( first_tags )
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) )
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_DELETE, ( tag, second_hashes ) ) for tag in second_tags ) )
if len( content_updates ) > 0:
service_keys_to_content_updates[ service_key ].extend( content_updates )
def worth_updating_rating( source_rating, dest_rating ):
if source_rating is not None:
if dest_rating is None or source_rating > dest_rating:
return True
return False
for ( service_key, action ) in self._rating_service_actions:
content_updates = []
try:
services_manager.GetService( service_key )
except HydrusExceptions.DataMissing:
continue
first_current_value = first_media.GetRatingsManager().GetRating( service_key )
second_current_value = second_media.GetRatingsManager().GetRating( service_key )
if action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
if worth_updating_rating( first_current_value, second_current_value ):
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( first_current_value, second_hashes ) ) )
elif worth_updating_rating( second_current_value, first_current_value ):
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) )
elif action == HC.CONTENT_MERGE_ACTION_COPY:
if worth_updating_rating( second_current_value, first_current_value ):
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) )
elif action == HC.CONTENT_MERGE_ACTION_MOVE:
if second_current_value is not None:
if worth_updating_rating( second_current_value, first_current_value ):
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) )
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( None, second_hashes ) ) )
if len( content_updates ) > 0:
service_keys_to_content_updates[ service_key ].extend( content_updates )
#
if self._sync_notes_action != HC.CONTENT_MERGE_ACTION_NONE:
first_names_and_notes = list( first_media.GetNotesManager().GetNamesToNotes().items() )
second_names_and_notes = list( second_media.GetNotesManager().GetNamesToNotes().items() )
content_updates = []
# TODO: rework this to UpdateeNamesToNotes
if self._sync_notes_action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
first_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( first_media_result, second_names_and_notes )
second_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( second_media_result, first_names_and_notes )
content_updates.extend( first_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
content_updates.extend( second_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
elif self._sync_notes_action == HC.CONTENT_MERGE_ACTION_COPY:
first_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( first_media_result, second_names_and_notes )
content_updates.extend( first_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
elif self._sync_notes_action == HC.CONTENT_MERGE_ACTION_MOVE:
first_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( first_media_result, second_names_and_notes )
content_updates.extend( first_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
content_updates.extend(
[ HydrusData.ContentUpdate( HC.CONTENT_TYPE_NOTES, HC.CONTENT_UPDATE_DELETE, ( second_hash, name ) ) for ( name, note ) in second_names_and_notes ]
)
if len( content_updates ) > 0:
service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ].extend( content_updates )
#
content_update_archive_first = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, first_hashes )
content_update_archive_second = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, second_hashes )
# and not delete_first gubbins here to help out the delete lock lmao. don't want to archive and then try to delete
# TODO: this is obviously a bad solution, so better to refactor this function to return a list of service_keys_to_content_updates and stick the delete command right up top, tested for locks on current info
if self._sync_archive_action == SYNC_ARCHIVE_IF_ONE_DO_BOTH:
if first_media.HasInbox() and second_media.HasArchive() and not delete_first:
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_first )
elif first_media.HasArchive() and second_media.HasInbox() and not delete_second:
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_second )
elif self._sync_archive_action == SYNC_ARCHIVE_DO_BOTH_REGARDLESS:
if first_media.HasInbox() and not delete_first:
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_first )
if second_media.HasInbox() and not delete_second:
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_second )
#
if self._sync_urls_action != HC.CONTENT_MERGE_ACTION_NONE:
first_urls = set( first_media.GetLocationsManager().GetURLs() )
second_urls = set( second_media.GetLocationsManager().GetURLs() )
content_updates = []
if self._sync_urls_action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
first_needs = second_urls.difference( first_urls )
second_needs = first_urls.difference( second_urls )
if len( first_needs ) > 0:
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( first_needs, first_hashes ) ) )
if len( second_needs ) > 0:
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( second_needs, second_hashes ) ) )
elif self._sync_urls_action == HC.CONTENT_MERGE_ACTION_COPY:
first_needs = second_urls.difference( first_urls )
if len( first_needs ) > 0:
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( first_needs, first_hashes ) ) )
if len( content_updates ) > 0:
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].extend( content_updates )
#
deletee_media = []
if delete_first:
deletee_media.append( first_media )
if delete_second:
deletee_media.append( second_media )
for media in deletee_media:
if do_not_do_deletes:
continue
if media.HasDeleteLocked():
ClientMedia.ReportDeleteLockFailures( [ media ] )
continue
if media.GetLocationsManager().IsTrashed():
deletee_service_keys = ( CC.COMBINED_LOCAL_FILE_SERVICE_KEY, )
else:
local_file_service_keys = HG.client_controller.services_manager.GetServiceKeys( ( HC.LOCAL_FILE_DOMAIN, ) )
deletee_service_keys = media.GetLocationsManager().GetCurrent().intersection( local_file_service_keys )
for deletee_service_key in deletee_service_keys:
content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, media.GetHashes(), reason = file_deletion_reason )
service_keys_to_content_updates[ deletee_service_key ].append( content_update )
#
return service_keys_to_content_updates
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_DUPLICATE_CONTENT_MERGE_OPTIONS ] = DuplicateContentMergeOptions