hydrus/hydrus/client/ClientDuplicates.py

1252 lines
47 KiB
Python
Raw Normal View History

2020-05-20 21:36:02 +00:00
import collections
2020-12-09 22:18:48 +00:00
import threading
import time
2022-12-14 22:22:11 +00:00
import typing
2020-05-20 21:36:02 +00:00
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
2023-01-04 22:22:08 +00:00
from hydrus.core import HydrusImageHandling
2020-04-22 21:00:35 +00:00
from hydrus.core import HydrusSerialisable
2021-04-07 21:26:45 +00:00
from hydrus.core import HydrusTags
2018-05-16 20:09:50 +00:00
2020-07-29 20:52:44 +00:00
from hydrus.client import ClientConstants as CC
2023-01-04 22:22:08 +00:00
from hydrus.client import ClientData
2020-12-09 22:18:48 +00:00
from hydrus.client import ClientThreading
2022-12-14 22:22:11 +00:00
from hydrus.client.importing.options import NoteImportOptions
2022-07-20 19:17:03 +00:00
from hydrus.client.media import ClientMedia
from hydrus.client.metadata import ClientTags
2020-07-29 20:52:44 +00:00
2023-01-04 22:22:08 +00:00
hashes_to_jpeg_quality = {}
hashes_to_pixel_hashes = {}
def GetDuplicateComparisonScore( shown_media, comparison_media ):
statements_and_scores = GetDuplicateComparisonStatements( shown_media, comparison_media )
total_score = sum( ( score for ( statement, score ) in statements_and_scores.values() ) )
return total_score
# TODO: ok, let's make an enum here at some point and a DuplicateComparisonSetting serialisable object
# Then we can attach 'show/hide' boolean and allow editable scores and whatnot in a nice class that will one day evolve the enum to an editable MetadataConditional/MetadataComparison object
# also have banding so we can have 'at this filesize difference, score 10, at this, score 15'
# show it in a listctrl or whatever in the options, ditch the hardcoding
# metadatacomparison needs to handle 'if one is a png and one is a jpeg', and then orient to A/B and give it a score
def GetDuplicateComparisonStatements( shown_media, comparison_media ):
new_options = HG.client_controller.new_options
duplicate_comparison_score_higher_jpeg_quality = new_options.GetInteger( 'duplicate_comparison_score_higher_jpeg_quality' )
duplicate_comparison_score_much_higher_jpeg_quality = new_options.GetInteger( 'duplicate_comparison_score_much_higher_jpeg_quality' )
duplicate_comparison_score_higher_filesize = new_options.GetInteger( 'duplicate_comparison_score_higher_filesize' )
duplicate_comparison_score_much_higher_filesize = new_options.GetInteger( 'duplicate_comparison_score_much_higher_filesize' )
duplicate_comparison_score_higher_resolution = new_options.GetInteger( 'duplicate_comparison_score_higher_resolution' )
duplicate_comparison_score_much_higher_resolution = new_options.GetInteger( 'duplicate_comparison_score_much_higher_resolution' )
duplicate_comparison_score_more_tags = new_options.GetInteger( 'duplicate_comparison_score_more_tags' )
duplicate_comparison_score_older = new_options.GetInteger( 'duplicate_comparison_score_older' )
duplicate_comparison_score_nicer_ratio = new_options.GetInteger( 'duplicate_comparison_score_nicer_ratio' )
#
statements_and_scores = {}
s_hash = shown_media.GetHash()
c_hash = comparison_media.GetHash()
s_mime = shown_media.GetMime()
c_mime = comparison_media.GetMime()
# size
s_size = shown_media.GetSize()
c_size = comparison_media.GetSize()
is_a_pixel_dupe = False
if shown_media.IsStaticImage() and comparison_media.IsStaticImage() and shown_media.GetResolution() == comparison_media.GetResolution():
global hashes_to_pixel_hashes
if s_hash not in hashes_to_pixel_hashes:
path = HG.client_controller.client_files_manager.GetFilePath( s_hash, s_mime )
hashes_to_pixel_hashes[ s_hash ] = HydrusImageHandling.GetImagePixelHash( path, s_mime )
if c_hash not in hashes_to_pixel_hashes:
path = HG.client_controller.client_files_manager.GetFilePath( c_hash, c_mime )
hashes_to_pixel_hashes[ c_hash ] = HydrusImageHandling.GetImagePixelHash( path, c_mime )
s_pixel_hash = hashes_to_pixel_hashes[ s_hash ]
c_pixel_hash = hashes_to_pixel_hashes[ c_hash ]
if s_pixel_hash == c_pixel_hash:
is_a_pixel_dupe = True
if s_mime == HC.IMAGE_PNG and c_mime != HC.IMAGE_PNG:
statement = 'this is a pixel-for-pixel duplicate png!'
score = -100
elif s_mime != HC.IMAGE_PNG and c_mime == HC.IMAGE_PNG:
statement = 'other file is a pixel-for-pixel duplicate png!'
score = 100
else:
statement = 'images are pixel-for-pixel duplicates!'
score = 0
statements_and_scores[ 'pixel_duplicates' ] = ( statement, score )
if s_size != c_size:
absolute_size_ratio = max( s_size, c_size ) / min( s_size, c_size )
if absolute_size_ratio > 2.0:
if s_size > c_size:
operator = '>>'
score = duplicate_comparison_score_much_higher_filesize
else:
operator = '<<'
score = -duplicate_comparison_score_much_higher_filesize
elif absolute_size_ratio > 1.05:
if s_size > c_size:
operator = '>'
score = duplicate_comparison_score_higher_filesize
else:
operator = '<'
score = -duplicate_comparison_score_higher_filesize
else:
operator = CC.UNICODE_ALMOST_EQUAL_TO
score = 0
if s_size > c_size:
sign = '+'
percentage_difference = ( s_size / c_size ) - 1.0
else:
sign = ''
percentage_difference = ( s_size / c_size ) - 1.0
percentage_different_string = ' ({}{})'.format( sign, HydrusData.ConvertFloatToPercentage( percentage_difference ) )
if is_a_pixel_dupe:
score = 0
statement = '{} {} {}{}'.format( HydrusData.ToHumanBytes( s_size ), operator, HydrusData.ToHumanBytes( c_size ), percentage_different_string )
statements_and_scores[ 'filesize' ] = ( statement, score )
# higher/same res
s_resolution = shown_media.GetResolution()
c_resolution = comparison_media.GetResolution()
if s_resolution != c_resolution:
( s_w, s_h ) = s_resolution
( c_w, c_h ) = c_resolution
all_measurements_are_good = None not in ( s_w, s_h, c_w, c_h ) and True not in ( d <= 0 for d in ( s_w, s_h, c_w, c_h ) )
if all_measurements_are_good:
resolution_ratio = ( s_w * s_h ) / ( c_w * c_h )
if resolution_ratio == 1.0:
operator = '!='
score = 0
elif resolution_ratio > 2.0:
operator = '>>'
score = duplicate_comparison_score_much_higher_resolution
elif resolution_ratio > 1.00:
operator = '>'
score = duplicate_comparison_score_higher_resolution
elif resolution_ratio < 0.5:
operator = '<<'
score = -duplicate_comparison_score_much_higher_resolution
else:
operator = '<'
score = -duplicate_comparison_score_higher_resolution
if s_resolution in HC.NICE_RESOLUTIONS:
s_string = HC.NICE_RESOLUTIONS[ s_resolution ]
else:
s_string = HydrusData.ConvertResolutionToPrettyString( s_resolution )
if s_w % 2 == 1 or s_h % 2 == 1:
s_string += ' (unusual)'
if c_resolution in HC.NICE_RESOLUTIONS:
c_string = HC.NICE_RESOLUTIONS[ c_resolution ]
else:
c_string = HydrusData.ConvertResolutionToPrettyString( c_resolution )
if c_w % 2 == 1 or c_h % 2 == 1:
c_string += ' (unusual)'
statement = '{} {} {}'.format( s_string, operator, c_string )
statements_and_scores[ 'resolution' ] = ( statement, score )
#
s_ratio = s_w / s_h
c_ratio = c_w / c_h
s_nice = s_ratio in HC.NICE_RATIOS
c_nice = c_ratio in HC.NICE_RATIOS
if s_nice or c_nice:
if s_nice:
s_string = HC.NICE_RATIOS[ s_ratio ]
else:
s_string = 'unusual'
if c_nice:
c_string = HC.NICE_RATIOS[ c_ratio ]
else:
c_string = 'unusual'
if s_nice and c_nice:
operator = '-'
score = 0
elif s_nice:
operator = '>'
score = duplicate_comparison_score_nicer_ratio
elif c_nice:
operator = '<'
score = -duplicate_comparison_score_nicer_ratio
if s_string == c_string:
statement = 'both {}'.format( s_string )
else:
statement = '{} {} {}'.format( s_string, operator, c_string )
statements_and_scores[ 'ratio' ] = ( statement, score )
# same/diff mime
if s_mime != c_mime:
statement = '{} vs {}'.format( HC.mime_string_lookup[ s_mime ], HC.mime_string_lookup[ c_mime ] )
score = 0
statements_and_scores[ 'mime' ] = ( statement, score )
# more tags
s_num_tags = len( shown_media.GetTagsManager().GetCurrentAndPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL ) )
c_num_tags = len( comparison_media.GetTagsManager().GetCurrentAndPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL ) )
if s_num_tags != c_num_tags:
if s_num_tags > 0 and c_num_tags > 0:
if s_num_tags > c_num_tags:
operator = '>'
score = duplicate_comparison_score_more_tags
else:
operator = '<'
score = -duplicate_comparison_score_more_tags
elif s_num_tags > 0:
operator = '>>'
score = duplicate_comparison_score_more_tags
elif c_num_tags > 0:
operator = '<<'
score = -duplicate_comparison_score_more_tags
statement = '{} tags {} {} tags'.format( HydrusData.ToHumanInt( s_num_tags ), operator, HydrusData.ToHumanInt( c_num_tags ) )
statements_and_scores[ 'num_tags' ] = ( statement, score )
# older
s_ts = shown_media.GetLocationsManager().GetCurrentTimestamp( CC.COMBINED_LOCAL_FILE_SERVICE_KEY )
c_ts = comparison_media.GetLocationsManager().GetCurrentTimestamp( CC.COMBINED_LOCAL_FILE_SERVICE_KEY )
one_month = 86400 * 30
if s_ts is not None and c_ts is not None and abs( s_ts - c_ts ) > one_month:
if s_ts < c_ts:
operator = 'older than'
score = duplicate_comparison_score_older
else:
operator = 'newer than'
score = -duplicate_comparison_score_older
if is_a_pixel_dupe:
score = 0
statement = '{}, {} {}'.format( ClientData.TimestampToPrettyTimeDelta( s_ts, history_suffix = ' old' ), operator, ClientData.TimestampToPrettyTimeDelta( c_ts, history_suffix = ' old' ) )
statements_and_scores[ 'time_imported' ] = ( statement, score )
if s_mime == HC.IMAGE_JPEG and c_mime == HC.IMAGE_JPEG:
global hashes_to_jpeg_quality
if s_hash not in hashes_to_jpeg_quality:
path = HG.client_controller.client_files_manager.GetFilePath( s_hash, s_mime )
hashes_to_jpeg_quality[ s_hash ] = HydrusImageHandling.GetJPEGQuantizationQualityEstimate( path )
if c_hash not in hashes_to_jpeg_quality:
path = HG.client_controller.client_files_manager.GetFilePath( c_hash, c_mime )
hashes_to_jpeg_quality[ c_hash ] = HydrusImageHandling.GetJPEGQuantizationQualityEstimate( path )
( s_label, s_jpeg_quality ) = hashes_to_jpeg_quality[ s_hash ]
( c_label, c_jpeg_quality ) = hashes_to_jpeg_quality[ c_hash ]
score = 0
if s_label != c_label:
if c_jpeg_quality is None or s_jpeg_quality is None:
score = 0
else:
# other way around, low score is good here
quality_ratio = c_jpeg_quality / s_jpeg_quality
if quality_ratio > 2.0:
score = duplicate_comparison_score_much_higher_jpeg_quality
elif quality_ratio > 1.0:
score = duplicate_comparison_score_higher_jpeg_quality
elif quality_ratio < 0.5:
score = -duplicate_comparison_score_much_higher_jpeg_quality
else:
score = -duplicate_comparison_score_higher_jpeg_quality
statement = '{} vs {} jpeg quality'.format( s_label, c_label )
statements_and_scores[ 'jpeg_quality' ] = ( statement, score )
def has_exif( m ):
try:
hash = m.GetHash()
mime = m.GetMime()
if mime not in ( HC.IMAGE_JPEG, HC.IMAGE_TIFF ):
return False
path = HG.client_controller.client_files_manager.GetFilePath( hash, mime )
pil_image = HydrusImageHandling.RawOpenPILImage( path )
exif_dict = HydrusImageHandling.GetEXIFDict( pil_image )
if exif_dict is None:
return False
return len( exif_dict ) > 0
except:
return False
s_has_exif = has_exif( shown_media )
c_has_exif = has_exif( comparison_media )
if s_has_exif ^ c_has_exif:
if s_has_exif:
exif_statement = 'has exif data, the other does not'
else:
exif_statement = 'the other has exif data, this does not'
statements_and_scores[ 'exif_data' ] = ( exif_statement, 0 )
s_has_human_readable_embedded_metadata = shown_media.GetMediaResult().GetFileInfoManager().has_human_readable_embedded_metadata
c_has_human_readable_embedded_metadata = comparison_media.GetMediaResult().GetFileInfoManager().has_human_readable_embedded_metadata
if s_has_human_readable_embedded_metadata ^ c_has_human_readable_embedded_metadata:
if s_has_human_readable_embedded_metadata:
embedded_metadata_statement = 'has embedded metadata, the other does not'
else:
embedded_metadata_statement = 'the other has embedded metadata, this does not'
statements_and_scores[ 'embedded_metadata' ] = ( embedded_metadata_statement, 0 )
s_has_icc = shown_media.GetMediaResult().GetFileInfoManager().has_icc_profile
c_has_icc = comparison_media.GetMediaResult().GetFileInfoManager().has_icc_profile
if s_has_icc ^ c_has_icc:
if s_has_icc:
icc_statement = 'has icc profile, the other does not'
else:
icc_statement = 'the other has icc profile, this does not'
statements_and_scores[ 'icc_profile' ] = ( icc_statement, 0 )
return statements_and_scores
2020-12-09 22:18:48 +00:00
class DuplicatesManager( object ):
my_instance = None
def __init__( self ):
DuplicatesManager.my_instance = self
self._similar_files_maintenance_status = None
self._currently_refreshing_maintenance_numbers = False
self._refresh_maintenance_numbers = True
self._currently_doing_potentials_search = False
self._lock = threading.Lock()
@staticmethod
def instance() -> 'DuplicatesManager':
if DuplicatesManager.my_instance is None:
DuplicatesManager()
return DuplicatesManager.my_instance
def GetMaintenanceNumbers( self ):
with self._lock:
if self._refresh_maintenance_numbers and not self._currently_refreshing_maintenance_numbers:
self._refresh_maintenance_numbers = False
self._currently_refreshing_maintenance_numbers = True
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
HG.client_controller.CallToThread( self.THREADRefreshMaintenanceNumbers )
return ( self._similar_files_maintenance_status, self._currently_refreshing_maintenance_numbers, self._currently_doing_potentials_search )
def RefreshMaintenanceNumbers( self ):
with self._lock:
self._refresh_maintenance_numbers = True
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
2020-12-16 22:29:51 +00:00
def NotifyNewPotentialsSearchNumbers( self ):
HG.client_controller.pub( 'new_similar_files_potentials_search_numbers' )
2020-12-09 22:18:48 +00:00
def StartPotentialsSearch( self ):
with self._lock:
if self._currently_doing_potentials_search or self._similar_files_maintenance_status is None:
return
self._currently_doing_potentials_search = True
HG.client_controller.CallToThreadLongRunning( self.THREADSearchPotentials )
def THREADRefreshMaintenanceNumbers( self ):
try:
similar_files_maintenance_status = HG.client_controller.Read( 'similar_files_maintenance_status' )
with self._lock:
self._similar_files_maintenance_status = similar_files_maintenance_status
if self._refresh_maintenance_numbers:
self._refresh_maintenance_numbers = False
HG.client_controller.CallToThread( self.THREADRefreshMaintenanceNumbers )
else:
self._currently_refreshing_maintenance_numbers = False
self._refresh_maintenance_numbers = False
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
except:
self._currently_refreshing_maintenance_numbers = False
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
raise
def THREADSearchPotentials( self ):
try:
search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance' )
with self._lock:
if self._similar_files_maintenance_status is None:
return
searched_distances_to_count = self._similar_files_maintenance_status
total_num_files = sum( searched_distances_to_count.values() )
num_searched = sum( ( count for ( value, count ) in searched_distances_to_count.items() if value is not None and value >= search_distance ) )
all_files_searched = num_searched >= total_num_files
if all_files_searched:
return # no work to do
num_searched_estimate = num_searched
HG.client_controller.pub( 'new_similar_files_maintenance_numbers' )
job_key = ClientThreading.JobKey( cancellable = True )
2021-06-09 20:28:09 +00:00
job_key.SetStatusTitle( 'searching for potential duplicates' )
2020-12-09 22:18:48 +00:00
HG.client_controller.pub( 'message', job_key )
still_work_to_do = True
while still_work_to_do:
search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance' )
2021-06-09 20:28:09 +00:00
start_time = HydrusData.GetNowPrecise()
2020-12-09 22:18:48 +00:00
( still_work_to_do, num_done ) = HG.client_controller.WriteSynchronous( 'maintain_similar_files_search_for_potential_duplicates', search_distance, maintenance_mode = HC.MAINTENANCE_FORCED, job_key = job_key, work_time_float = 0.5 )
2021-06-09 20:28:09 +00:00
time_it_took = HydrusData.GetNowPrecise() - start_time
2020-12-09 22:18:48 +00:00
num_searched_estimate += num_done
2021-07-14 20:42:19 +00:00
if num_searched_estimate > total_num_files:
similar_files_maintenance_status = HG.client_controller.Read( 'similar_files_maintenance_status' )
if similar_files_maintenance_status is None:
break
with self._lock:
self._similar_files_maintenance_status = similar_files_maintenance_status
searched_distances_to_count = self._similar_files_maintenance_status
total_num_files = max( num_searched_estimate, sum( searched_distances_to_count.values() ) )
2020-12-09 22:18:48 +00:00
text = 'searching: {}'.format( HydrusData.ConvertValueRangeToPrettyString( num_searched_estimate, total_num_files ) )
job_key.SetVariable( 'popup_text_1', text )
job_key.SetVariable( 'popup_gauge_1', ( num_searched_estimate, total_num_files ) )
if job_key.IsCancelled() or HG.model_shutdown:
break
2021-07-14 20:42:19 +00:00
time.sleep( min( 5, time_it_took ) ) # ideally 0.5s, but potentially longer
2020-12-09 22:18:48 +00:00
job_key.Delete()
finally:
with self._lock:
self._currently_doing_potentials_search = False
self.RefreshMaintenanceNumbers()
2020-12-16 22:29:51 +00:00
self.NotifyNewPotentialsSearchNumbers()
2020-12-09 22:18:48 +00:00
2022-12-14 22:22:11 +00:00
2022-04-20 20:18:56 +00:00
SYNC_ARCHIVE_NONE = 0
SYNC_ARCHIVE_IF_ONE_DO_BOTH = 1
SYNC_ARCHIVE_DO_BOTH_REGARDLESS = 2
2022-12-14 22:22:11 +00:00
class DuplicateContentMergeOptions( HydrusSerialisable.SerialisableBase ):
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_DUPLICATE_CONTENT_MERGE_OPTIONS
SERIALISABLE_NAME = 'Duplicate Content Merge Options'
SERIALISABLE_VERSION = 6
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
def __init__( self ):
2018-05-16 20:09:50 +00:00
HydrusSerialisable.SerialisableBase.__init__( self )
2022-12-14 22:22:11 +00:00
self._tag_service_actions = []
self._rating_service_actions = []
self._sync_notes_action = HC.CONTENT_MERGE_ACTION_NONE
self._sync_note_import_options = NoteImportOptions.NoteImportOptions()
self._sync_archive_action = SYNC_ARCHIVE_NONE
self._sync_urls_action = HC.CONTENT_MERGE_ACTION_NONE
2018-05-16 20:09:50 +00:00
def _GetSerialisableInfo( self ):
if HG.client_controller.IsBooted():
services_manager = HG.client_controller.services_manager
2020-03-11 21:52:11 +00:00
self._tag_service_actions = [ ( service_key, action, tag_filter ) for ( service_key, action, tag_filter ) in self._tag_service_actions if services_manager.ServiceExists( service_key ) and services_manager.GetServiceType( service_key ) in HC.REAL_TAG_SERVICES ]
2018-05-16 20:09:50 +00:00
self._rating_service_actions = [ ( service_key, action ) for ( service_key, action ) in self._rating_service_actions if services_manager.ServiceExists( service_key ) and services_manager.GetServiceType( service_key ) in ( HC.LOCAL_RATING_LIKE, HC.LOCAL_RATING_NUMERICAL ) ]
2019-01-09 22:59:03 +00:00
serialisable_tag_service_actions = [ ( service_key.hex(), action, tag_filter.GetSerialisableTuple() ) for ( service_key, action, tag_filter ) in self._tag_service_actions ]
serialisable_rating_service_actions = [ ( service_key.hex(), action ) for ( service_key, action ) in self._rating_service_actions ]
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
serialisable_sync_note_import_options = self._sync_note_import_options.GetSerialisableTuple()
return ( serialisable_tag_service_actions, serialisable_rating_service_actions, self._sync_notes_action, serialisable_sync_note_import_options, self._sync_archive_action, self._sync_urls_action )
2018-05-16 20:09:50 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2022-12-14 22:22:11 +00:00
( serialisable_tag_service_actions, serialisable_rating_service_actions, self._sync_notes_action, serialisable_sync_note_import_options, self._sync_archive_action, self._sync_urls_action ) = serialisable_info
2018-05-16 20:09:50 +00:00
2019-01-09 22:59:03 +00:00
self._tag_service_actions = [ ( bytes.fromhex( serialisable_service_key ), action, HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_filter ) ) for ( serialisable_service_key, action, serialisable_tag_filter ) in serialisable_tag_service_actions ]
self._rating_service_actions = [ ( bytes.fromhex( serialisable_service_key ), action ) for ( serialisable_service_key, action ) in serialisable_rating_service_actions ]
2022-12-14 22:22:11 +00:00
self._sync_note_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_sync_note_import_options )
2018-05-16 20:09:50 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( serialisable_service_actions, delete_second_file ) = old_serialisable_info
tag_service_actions = []
rating_service_actions = []
# As the client isn't booted when this is loaded in options, there isn't a good way to figure out tag from rating
# So, let's just dupe and purge later on, in serialisation
for ( service_key_encoded, action ) in serialisable_service_actions:
2019-01-09 22:59:03 +00:00
service_key = bytes.fromhex( service_key_encoded )
2018-05-16 20:09:50 +00:00
2021-04-07 21:26:45 +00:00
tag_filter = HydrusTags.TagFilter()
2018-05-16 20:09:50 +00:00
tag_service_actions.append( ( service_key, action, tag_filter ) )
rating_service_actions.append( ( service_key, action ) )
2019-01-09 22:59:03 +00:00
serialisable_tag_service_actions = [ ( service_key.hex(), action, tag_filter.GetSerialisableTuple() ) for ( service_key, action, tag_filter ) in tag_service_actions ]
serialisable_rating_service_actions = [ ( service_key.hex(), action ) for ( service_key, action ) in rating_service_actions ]
2018-05-16 20:09:50 +00:00
sync_archive = delete_second_file
delete_both_files = False
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files )
return ( 2, new_serialisable_info )
if version == 2:
( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files ) = old_serialisable_info
sync_urls_action = None
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files, sync_urls_action )
return ( 3, new_serialisable_info )
2019-05-22 22:35:06 +00:00
if version == 3:
2018-05-16 20:09:50 +00:00
2019-05-22 22:35:06 +00:00
( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files, sync_urls_action ) = old_serialisable_info
2018-05-16 20:09:50 +00:00
2019-05-22 22:35:06 +00:00
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive, sync_urls_action )
2018-05-16 20:09:50 +00:00
2019-05-22 22:35:06 +00:00
return ( 4, new_serialisable_info )
2018-05-16 20:09:50 +00:00
2022-04-20 20:18:56 +00:00
if version == 4:
( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive, sync_urls_action ) = old_serialisable_info
if sync_archive:
sync_archive_action = SYNC_ARCHIVE_IF_ONE_DO_BOTH
else:
sync_archive_action = SYNC_ARCHIVE_NONE
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive_action, sync_urls_action )
return ( 5, new_serialisable_info )
2022-12-14 22:22:11 +00:00
if version == 5:
( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive_action, sync_urls_action ) = old_serialisable_info
if sync_urls_action is None:
sync_urls_action = HC.CONTENT_MERGE_ACTION_NONE
sync_notes_action = HC.CONTENT_MERGE_ACTION_NONE
sync_note_import_options = NoteImportOptions.NoteImportOptions()
serialisable_sync_note_import_options = sync_note_import_options.GetSerialisableTuple()
new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_notes_action, serialisable_sync_note_import_options, sync_archive_action, sync_urls_action )
return ( 6, new_serialisable_info )
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
def GetRatingServiceActions( self ) -> typing.Collection[ tuple ]:
return self._rating_service_actions
def GetTagServiceActions( self ) -> typing.Collection[ tuple ]:
return self._tag_service_actions
def GetSyncArchiveAction( self ) -> int:
return self._sync_archive_action
def GetSyncNotesAction( self ) -> int:
return self._sync_notes_action
def GetSyncNoteImportOptions( self ) -> NoteImportOptions.NoteImportOptions:
return self._sync_note_import_options
def GetSyncURLsAction( self ) -> int:
return self._sync_urls_action
def SetRatingServiceActions( self, rating_service_actions: typing.Collection[ tuple ] ):
2018-05-16 20:09:50 +00:00
self._rating_service_actions = rating_service_actions
2022-12-14 22:22:11 +00:00
def SetTagServiceActions( self, tag_service_actions: typing.Collection[ tuple ] ):
self._tag_service_actions = tag_service_actions
def SetSyncArchiveAction( self, sync_archive_action: int ):
2022-04-20 20:18:56 +00:00
self._sync_archive_action = sync_archive_action
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
def SetSyncNotesAction( self, sync_notes_action: int ):
self._sync_notes_action = sync_notes_action
def SetSyncNoteImportOptions( self, sync_note_import_options: NoteImportOptions.NoteImportOptions ):
self._sync_note_import_options = sync_note_import_options
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
def SetSyncURLsAction( self, sync_urls_action: int ):
self._sync_urls_action = sync_urls_action
2018-05-16 20:09:50 +00:00
2023-01-04 22:22:08 +00:00
def ProcessPairIntoContentUpdates( self, first_media: ClientMedia.MediaSingleton, second_media: ClientMedia.MediaSingleton, delete_first = False, delete_second = False, file_deletion_reason = None, do_not_do_deletes = False ):
2019-04-10 22:50:53 +00:00
if file_deletion_reason is None:
file_deletion_reason = 'unknown reason'
2018-05-16 20:09:50 +00:00
2018-11-07 23:09:40 +00:00
service_keys_to_content_updates = collections.defaultdict( list )
2018-05-16 20:09:50 +00:00
2023-01-04 22:22:08 +00:00
first_hash = first_media.GetHash()
second_hash = second_media.GetHash()
first_hashes = { first_hash }
second_hashes = { second_hash }
first_media_result = first_media.GetMediaResult()
second_media_result = second_media.GetMediaResult()
2018-05-16 20:09:50 +00:00
#
services_manager = HG.client_controller.services_manager
for ( service_key, action, tag_filter ) in self._tag_service_actions:
content_updates = []
try:
service = services_manager.GetService( service_key )
except HydrusExceptions.DataMissing:
continue
service_type = service.GetServiceType()
if service_type == HC.LOCAL_TAG:
add_content_action = HC.CONTENT_UPDATE_ADD
elif service_type == HC.TAG_REPOSITORY:
add_content_action = HC.CONTENT_UPDATE_PEND
2022-07-20 19:17:03 +00:00
else:
continue
2018-05-16 20:09:50 +00:00
2019-10-02 23:38:59 +00:00
first_tags = first_media.GetTagsManager().GetCurrentAndPending( service_key, ClientTags.TAG_DISPLAY_STORAGE )
second_tags = second_media.GetTagsManager().GetCurrentAndPending( service_key, ClientTags.TAG_DISPLAY_STORAGE )
2018-05-16 20:09:50 +00:00
2019-01-23 22:19:16 +00:00
first_tags = tag_filter.Filter( first_tags )
second_tags = tag_filter.Filter( second_tags )
2018-05-16 20:09:50 +00:00
if action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
2019-01-23 22:19:16 +00:00
first_needs = second_tags.difference( first_tags )
second_needs = first_tags.difference( second_tags )
2018-05-16 20:09:50 +00:00
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) )
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, second_hashes ) ) for tag in second_needs ) )
elif action == HC.CONTENT_MERGE_ACTION_COPY:
2019-01-23 22:19:16 +00:00
first_needs = second_tags.difference( first_tags )
2018-05-16 20:09:50 +00:00
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) )
elif service_type == HC.LOCAL_TAG and action == HC.CONTENT_MERGE_ACTION_MOVE:
2019-01-23 22:19:16 +00:00
first_needs = second_tags.difference( first_tags )
2018-05-16 20:09:50 +00:00
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) )
2019-01-23 22:19:16 +00:00
content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_DELETE, ( tag, second_hashes ) ) for tag in second_tags ) )
2018-05-16 20:09:50 +00:00
if len( content_updates ) > 0:
2018-11-07 23:09:40 +00:00
service_keys_to_content_updates[ service_key ].extend( content_updates )
2018-05-16 20:09:50 +00:00
2019-02-27 23:03:30 +00:00
def worth_updating_rating( source_rating, dest_rating ):
if source_rating is not None:
if dest_rating is None or source_rating > dest_rating:
return True
return False
2018-05-16 20:09:50 +00:00
for ( service_key, action ) in self._rating_service_actions:
content_updates = []
try:
2022-07-20 19:17:03 +00:00
services_manager.GetService( service_key )
2018-05-16 20:09:50 +00:00
except HydrusExceptions.DataMissing:
continue
first_current_value = first_media.GetRatingsManager().GetRating( service_key )
second_current_value = second_media.GetRatingsManager().GetRating( service_key )
if action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
2019-02-27 23:03:30 +00:00
if worth_updating_rating( first_current_value, second_current_value ):
2018-05-16 20:09:50 +00:00
2019-02-27 23:03:30 +00:00
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( first_current_value, second_hashes ) ) )
2018-05-16 20:09:50 +00:00
2019-02-27 23:03:30 +00:00
elif worth_updating_rating( second_current_value, first_current_value ):
2018-05-16 20:09:50 +00:00
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) )
elif action == HC.CONTENT_MERGE_ACTION_COPY:
2019-02-27 23:03:30 +00:00
if worth_updating_rating( second_current_value, first_current_value ):
2018-05-16 20:09:50 +00:00
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) )
elif action == HC.CONTENT_MERGE_ACTION_MOVE:
if second_current_value is not None:
2019-02-27 23:03:30 +00:00
if worth_updating_rating( second_current_value, first_current_value ):
2018-05-16 20:09:50 +00:00
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) )
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( None, second_hashes ) ) )
if len( content_updates ) > 0:
2018-11-07 23:09:40 +00:00
service_keys_to_content_updates[ service_key ].extend( content_updates )
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
#
if self._sync_notes_action != HC.CONTENT_MERGE_ACTION_NONE:
first_names_and_notes = list( first_media.GetNotesManager().GetNamesToNotes().items() )
second_names_and_notes = list( second_media.GetNotesManager().GetNamesToNotes().items() )
content_updates = []
# TODO: rework this to UpdateeNamesToNotes
if self._sync_notes_action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
2023-01-04 22:22:08 +00:00
first_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( first_media_result, second_names_and_notes )
second_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( second_media_result, first_names_and_notes )
2022-12-14 22:22:11 +00:00
content_updates.extend( first_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
content_updates.extend( second_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
elif self._sync_notes_action == HC.CONTENT_MERGE_ACTION_COPY:
2023-01-04 22:22:08 +00:00
first_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( first_media_result, second_names_and_notes )
content_updates.extend( first_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
elif self._sync_notes_action == HC.CONTENT_MERGE_ACTION_MOVE:
first_service_keys_to_content_updates = self._sync_note_import_options.GetServiceKeysToContentUpdates( first_media_result, second_names_and_notes )
2022-12-14 22:22:11 +00:00
content_updates.extend( first_service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ] )
2023-01-04 22:22:08 +00:00
content_updates.extend(
[ HydrusData.ContentUpdate( HC.CONTENT_TYPE_NOTES, HC.CONTENT_UPDATE_DELETE, ( second_hash, name ) ) for ( name, note ) in second_names_and_notes ]
)
2022-12-14 22:22:11 +00:00
if len( content_updates ) > 0:
service_keys_to_content_updates[ CC.LOCAL_NOTES_SERVICE_KEY ].extend( content_updates )
2018-05-16 20:09:50 +00:00
#
2022-04-20 20:18:56 +00:00
content_update_archive_first = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, first_hashes )
content_update_archive_second = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, second_hashes )
2022-07-20 19:17:03 +00:00
# and not delete_first gubbins here to help out the delete lock lmao. don't want to archive and then try to delete
# TODO: this is obviously a bad solution, so better to refactor this function to return a list of service_keys_to_content_updates and stick the delete command right up top, tested for locks on current info
2022-04-20 20:18:56 +00:00
if self._sync_archive_action == SYNC_ARCHIVE_IF_ONE_DO_BOTH:
2018-05-16 20:09:50 +00:00
2022-07-20 19:17:03 +00:00
if first_media.HasInbox() and second_media.HasArchive() and not delete_first:
2018-05-16 20:09:50 +00:00
2022-04-20 20:18:56 +00:00
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_first )
2018-05-16 20:09:50 +00:00
2022-07-20 19:17:03 +00:00
elif first_media.HasArchive() and second_media.HasInbox() and not delete_second:
2018-05-16 20:09:50 +00:00
2022-04-20 20:18:56 +00:00
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_second )
elif self._sync_archive_action == SYNC_ARCHIVE_DO_BOTH_REGARDLESS:
2022-07-20 19:17:03 +00:00
if first_media.HasInbox() and not delete_first:
2022-04-20 20:18:56 +00:00
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_first )
2022-07-20 19:17:03 +00:00
if second_media.HasInbox() and not delete_second:
2018-05-16 20:09:50 +00:00
2022-04-20 20:18:56 +00:00
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update_archive_second )
2018-05-16 20:09:50 +00:00
#
2022-12-14 22:22:11 +00:00
if self._sync_urls_action != HC.CONTENT_MERGE_ACTION_NONE:
2018-05-16 20:09:50 +00:00
first_urls = set( first_media.GetLocationsManager().GetURLs() )
second_urls = set( second_media.GetLocationsManager().GetURLs() )
content_updates = []
if self._sync_urls_action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE:
first_needs = second_urls.difference( first_urls )
second_needs = first_urls.difference( second_urls )
2018-10-17 21:00:09 +00:00
if len( first_needs ) > 0:
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( first_needs, first_hashes ) ) )
if len( second_needs ) > 0:
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( second_needs, second_hashes ) ) )
2018-05-16 20:09:50 +00:00
elif self._sync_urls_action == HC.CONTENT_MERGE_ACTION_COPY:
first_needs = second_urls.difference( first_urls )
2018-10-17 21:00:09 +00:00
if len( first_needs ) > 0:
content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( first_needs, first_hashes ) ) )
2018-05-16 20:09:50 +00:00
if len( content_updates ) > 0:
2018-11-07 23:09:40 +00:00
service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].extend( content_updates )
2018-05-16 20:09:50 +00:00
#
deletee_media = []
2022-07-20 19:17:03 +00:00
if delete_first:
2018-05-16 20:09:50 +00:00
2022-07-20 19:17:03 +00:00
deletee_media.append( first_media )
2018-05-16 20:09:50 +00:00
2022-07-20 19:17:03 +00:00
if delete_second:
deletee_media.append( second_media )
2021-05-19 21:30:28 +00:00
2018-05-16 20:09:50 +00:00
for media in deletee_media:
2022-11-23 21:01:41 +00:00
if do_not_do_deletes:
continue
2022-07-20 19:17:03 +00:00
if media.HasDeleteLocked():
ClientMedia.ReportDeleteLockFailures( [ media ] )
2021-05-19 21:30:28 +00:00
continue
2021-04-28 21:43:16 +00:00
if media.GetLocationsManager().IsTrashed():
2018-05-16 20:09:50 +00:00
2021-04-28 21:43:16 +00:00
deletee_service_keys = ( CC.COMBINED_LOCAL_FILE_SERVICE_KEY, )
2018-05-16 20:09:50 +00:00
else:
2021-04-28 21:43:16 +00:00
local_file_service_keys = HG.client_controller.services_manager.GetServiceKeys( ( HC.LOCAL_FILE_DOMAIN, ) )
deletee_service_keys = media.GetLocationsManager().GetCurrent().intersection( local_file_service_keys )
2018-05-16 20:09:50 +00:00
2021-04-28 21:43:16 +00:00
for deletee_service_key in deletee_service_keys:
2018-05-16 20:09:50 +00:00
2019-04-10 22:50:53 +00:00
content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, media.GetHashes(), reason = file_deletion_reason )
2018-05-16 20:09:50 +00:00
service_keys_to_content_updates[ deletee_service_key ].append( content_update )
#
2018-11-07 23:09:40 +00:00
return service_keys_to_content_updates
2018-05-16 20:09:50 +00:00
2022-12-14 22:22:11 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_DUPLICATE_CONTENT_MERGE_OPTIONS ] = DuplicateContentMergeOptions