import collections import threading import time from hydrus.core import HydrusConstants as HC from hydrus.core import HydrusData from hydrus.core import HydrusExceptions from hydrus.core import HydrusGlobals as HG from hydrus.core import HydrusSerialisable from hydrus.client import ClientConstants as CC from hydrus.client import ClientThreading from hydrus.client.metadata import ClientTags class DuplicatesManager( object ): my_instance = None def __init__( self ): DuplicatesManager.my_instance = self self._similar_files_maintenance_status = None self._currently_refreshing_maintenance_numbers = False self._refresh_maintenance_numbers = True self._currently_doing_potentials_search = False self._lock = threading.Lock() @staticmethod def instance() -> 'DuplicatesManager': if DuplicatesManager.my_instance is None: DuplicatesManager() return DuplicatesManager.my_instance def GetMaintenanceNumbers( self ): with self._lock: if self._refresh_maintenance_numbers and not self._currently_refreshing_maintenance_numbers: self._refresh_maintenance_numbers = False self._currently_refreshing_maintenance_numbers = True HG.client_controller.pub( 'new_similar_files_maintenance_numbers' ) HG.client_controller.CallToThread( self.THREADRefreshMaintenanceNumbers ) return ( self._similar_files_maintenance_status, self._currently_refreshing_maintenance_numbers, self._currently_doing_potentials_search ) def RefreshMaintenanceNumbers( self ): with self._lock: self._refresh_maintenance_numbers = True HG.client_controller.pub( 'new_similar_files_maintenance_numbers' ) def NotifyNewPotentialsSearchNumbers( self ): HG.client_controller.pub( 'new_similar_files_potentials_search_numbers' ) def StartPotentialsSearch( self ): with self._lock: if self._currently_doing_potentials_search or self._similar_files_maintenance_status is None: return self._currently_doing_potentials_search = True HG.client_controller.CallToThreadLongRunning( self.THREADSearchPotentials ) def THREADRefreshMaintenanceNumbers( self ): try: similar_files_maintenance_status = HG.client_controller.Read( 'similar_files_maintenance_status' ) with self._lock: self._similar_files_maintenance_status = similar_files_maintenance_status if self._refresh_maintenance_numbers: self._refresh_maintenance_numbers = False HG.client_controller.CallToThread( self.THREADRefreshMaintenanceNumbers ) else: self._currently_refreshing_maintenance_numbers = False self._refresh_maintenance_numbers = False HG.client_controller.pub( 'new_similar_files_maintenance_numbers' ) except: self._currently_refreshing_maintenance_numbers = False HG.client_controller.pub( 'new_similar_files_maintenance_numbers' ) raise def THREADSearchPotentials( self ): try: search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance' ) with self._lock: if self._similar_files_maintenance_status is None: return searched_distances_to_count = self._similar_files_maintenance_status total_num_files = sum( searched_distances_to_count.values() ) num_searched = sum( ( count for ( value, count ) in searched_distances_to_count.items() if value is not None and value >= search_distance ) ) all_files_searched = num_searched >= total_num_files if all_files_searched: return # no work to do num_searched_estimate = num_searched HG.client_controller.pub( 'new_similar_files_maintenance_numbers' ) job_key = ClientThreading.JobKey( cancellable = True ) job_key.SetVariable( 'popup_title', 'searching for potential duplicates' ) HG.client_controller.pub( 'message', job_key ) still_work_to_do = True while still_work_to_do: search_distance = HG.client_controller.new_options.GetInteger( 'similar_files_duplicate_pairs_search_distance' ) ( still_work_to_do, num_done ) = HG.client_controller.WriteSynchronous( 'maintain_similar_files_search_for_potential_duplicates', search_distance, maintenance_mode = HC.MAINTENANCE_FORCED, job_key = job_key, work_time_float = 0.5 ) num_searched_estimate += num_done text = 'searching: {}'.format( HydrusData.ConvertValueRangeToPrettyString( num_searched_estimate, total_num_files ) ) job_key.SetVariable( 'popup_text_1', text ) job_key.SetVariable( 'popup_gauge_1', ( num_searched_estimate, total_num_files ) ) if job_key.IsCancelled() or HG.model_shutdown: break time.sleep( 0.5 ) job_key.Delete() finally: with self._lock: self._currently_doing_potentials_search = False self.RefreshMaintenanceNumbers() self.NotifyNewPotentialsSearchNumbers() class DuplicateActionOptions( HydrusSerialisable.SerialisableBase ): SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_DUPLICATE_ACTION_OPTIONS SERIALISABLE_NAME = 'Duplicate Action Options' SERIALISABLE_VERSION = 4 def __init__( self, tag_service_actions = None, rating_service_actions = None, sync_archive = False, sync_urls_action = None ): if tag_service_actions is None: tag_service_actions = [] if rating_service_actions is None: rating_service_actions = [] HydrusSerialisable.SerialisableBase.__init__( self ) self._tag_service_actions = tag_service_actions self._rating_service_actions = rating_service_actions self._sync_archive = sync_archive self._sync_urls_action = sync_urls_action def _GetSerialisableInfo( self ): if HG.client_controller.IsBooted(): services_manager = HG.client_controller.services_manager self._tag_service_actions = [ ( service_key, action, tag_filter ) for ( service_key, action, tag_filter ) in self._tag_service_actions if services_manager.ServiceExists( service_key ) and services_manager.GetServiceType( service_key ) in HC.REAL_TAG_SERVICES ] self._rating_service_actions = [ ( service_key, action ) for ( service_key, action ) in self._rating_service_actions if services_manager.ServiceExists( service_key ) and services_manager.GetServiceType( service_key ) in ( HC.LOCAL_RATING_LIKE, HC.LOCAL_RATING_NUMERICAL ) ] serialisable_tag_service_actions = [ ( service_key.hex(), action, tag_filter.GetSerialisableTuple() ) for ( service_key, action, tag_filter ) in self._tag_service_actions ] serialisable_rating_service_actions = [ ( service_key.hex(), action ) for ( service_key, action ) in self._rating_service_actions ] return ( serialisable_tag_service_actions, serialisable_rating_service_actions, self._sync_archive, self._sync_urls_action ) def _InitialiseFromSerialisableInfo( self, serialisable_info ): ( serialisable_tag_service_actions, serialisable_rating_service_actions, self._sync_archive, self._sync_urls_action ) = serialisable_info self._tag_service_actions = [ ( bytes.fromhex( serialisable_service_key ), action, HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_filter ) ) for ( serialisable_service_key, action, serialisable_tag_filter ) in serialisable_tag_service_actions ] self._rating_service_actions = [ ( bytes.fromhex( serialisable_service_key ), action ) for ( serialisable_service_key, action ) in serialisable_rating_service_actions ] def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( serialisable_service_actions, delete_second_file ) = old_serialisable_info tag_service_actions = [] rating_service_actions = [] # As the client isn't booted when this is loaded in options, there isn't a good way to figure out tag from rating # So, let's just dupe and purge later on, in serialisation for ( service_key_encoded, action ) in serialisable_service_actions: service_key = bytes.fromhex( service_key_encoded ) tag_filter = ClientTags.TagFilter() tag_service_actions.append( ( service_key, action, tag_filter ) ) rating_service_actions.append( ( service_key, action ) ) serialisable_tag_service_actions = [ ( service_key.hex(), action, tag_filter.GetSerialisableTuple() ) for ( service_key, action, tag_filter ) in tag_service_actions ] serialisable_rating_service_actions = [ ( service_key.hex(), action ) for ( service_key, action ) in rating_service_actions ] sync_archive = delete_second_file delete_both_files = False new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files ) return ( 2, new_serialisable_info ) if version == 2: ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files ) = old_serialisable_info sync_urls_action = None new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files, sync_urls_action ) return ( 3, new_serialisable_info ) if version == 3: ( serialisable_tag_service_actions, serialisable_rating_service_actions, delete_second_file, sync_archive, delete_both_files, sync_urls_action ) = old_serialisable_info new_serialisable_info = ( serialisable_tag_service_actions, serialisable_rating_service_actions, sync_archive, sync_urls_action ) return ( 4, new_serialisable_info ) def SetTuple( self, tag_service_actions, rating_service_actions, sync_archive, sync_urls_action ): self._tag_service_actions = tag_service_actions self._rating_service_actions = rating_service_actions self._sync_archive = sync_archive self._sync_urls_action = sync_urls_action def ToTuple( self ): return ( self._tag_service_actions, self._rating_service_actions, self._sync_archive, self._sync_urls_action ) def ProcessPairIntoContentUpdates( self, first_media, second_media, delete_first = False, delete_second = False, delete_both = False, file_deletion_reason = None ): if file_deletion_reason is None: file_deletion_reason = 'unknown reason' service_keys_to_content_updates = collections.defaultdict( list ) first_hashes = first_media.GetHashes() second_hashes = second_media.GetHashes() # services_manager = HG.client_controller.services_manager for ( service_key, action, tag_filter ) in self._tag_service_actions: content_updates = [] try: service = services_manager.GetService( service_key ) except HydrusExceptions.DataMissing: continue service_type = service.GetServiceType() if service_type == HC.LOCAL_TAG: add_content_action = HC.CONTENT_UPDATE_ADD elif service_type == HC.TAG_REPOSITORY: add_content_action = HC.CONTENT_UPDATE_PEND first_tags = first_media.GetTagsManager().GetCurrentAndPending( service_key, ClientTags.TAG_DISPLAY_STORAGE ) second_tags = second_media.GetTagsManager().GetCurrentAndPending( service_key, ClientTags.TAG_DISPLAY_STORAGE ) first_tags = tag_filter.Filter( first_tags ) second_tags = tag_filter.Filter( second_tags ) if action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE: first_needs = second_tags.difference( first_tags ) second_needs = first_tags.difference( second_tags ) content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) ) content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, second_hashes ) ) for tag in second_needs ) ) elif action == HC.CONTENT_MERGE_ACTION_COPY: first_needs = second_tags.difference( first_tags ) content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) ) elif service_type == HC.LOCAL_TAG and action == HC.CONTENT_MERGE_ACTION_MOVE: first_needs = second_tags.difference( first_tags ) content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, add_content_action, ( tag, first_hashes ) ) for tag in first_needs ) ) content_updates.extend( ( HydrusData.ContentUpdate( HC.CONTENT_TYPE_MAPPINGS, HC.CONTENT_UPDATE_DELETE, ( tag, second_hashes ) ) for tag in second_tags ) ) if len( content_updates ) > 0: service_keys_to_content_updates[ service_key ].extend( content_updates ) def worth_updating_rating( source_rating, dest_rating ): if source_rating is not None: if dest_rating is None or source_rating > dest_rating: return True return False for ( service_key, action ) in self._rating_service_actions: content_updates = [] try: service = services_manager.GetService( service_key ) except HydrusExceptions.DataMissing: continue first_current_value = first_media.GetRatingsManager().GetRating( service_key ) second_current_value = second_media.GetRatingsManager().GetRating( service_key ) if action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE: if worth_updating_rating( first_current_value, second_current_value ): content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( first_current_value, second_hashes ) ) ) elif worth_updating_rating( second_current_value, first_current_value ): content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) ) elif action == HC.CONTENT_MERGE_ACTION_COPY: if worth_updating_rating( second_current_value, first_current_value ): content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) ) elif action == HC.CONTENT_MERGE_ACTION_MOVE: if second_current_value is not None: if worth_updating_rating( second_current_value, first_current_value ): content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( second_current_value, first_hashes ) ) ) content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_RATINGS, HC.CONTENT_UPDATE_ADD, ( None, second_hashes ) ) ) if len( content_updates ) > 0: service_keys_to_content_updates[ service_key ].extend( content_updates ) # if self._sync_archive: if first_media.HasInbox() and second_media.HasArchive(): content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, first_hashes ) service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update ) elif first_media.HasArchive() and second_media.HasInbox(): content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, second_hashes ) service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].append( content_update ) # if self._sync_urls_action is not None: first_urls = set( first_media.GetLocationsManager().GetURLs() ) second_urls = set( second_media.GetLocationsManager().GetURLs() ) content_updates = [] if self._sync_urls_action == HC.CONTENT_MERGE_ACTION_TWO_WAY_MERGE: first_needs = second_urls.difference( first_urls ) second_needs = first_urls.difference( second_urls ) if len( first_needs ) > 0: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( first_needs, first_hashes ) ) ) if len( second_needs ) > 0: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( second_needs, second_hashes ) ) ) elif self._sync_urls_action == HC.CONTENT_MERGE_ACTION_COPY: first_needs = second_urls.difference( first_urls ) if len( first_needs ) > 0: content_updates.append( HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( first_needs, first_hashes ) ) ) if len( content_updates ) > 0: service_keys_to_content_updates[ CC.COMBINED_LOCAL_FILE_SERVICE_KEY ].extend( content_updates ) # deletee_media = [] if delete_first or delete_second or delete_both: if delete_first or delete_both: deletee_media.append( first_media ) if delete_second or delete_both: deletee_media.append( second_media ) for media in deletee_media: current_locations = media.GetLocationsManager().GetCurrent() if CC.LOCAL_FILE_SERVICE_KEY in current_locations: deletee_service_key = CC.LOCAL_FILE_SERVICE_KEY elif CC.TRASH_SERVICE_KEY in current_locations: deletee_service_key = CC.TRASH_SERVICE_KEY else: deletee_service_key = None if deletee_service_key is not None: content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, media.GetHashes(), reason = file_deletion_reason ) service_keys_to_content_updates[ deletee_service_key ].append( content_update ) # return service_keys_to_content_updates HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_DUPLICATE_ACTION_OPTIONS ] = DuplicateActionOptions