import threading import time from hydrus.core import HydrusData from hydrus.core import HydrusExceptions from hydrus.core import HydrusGlobals as HG from hydrus.core import HydrusSerialisable from hydrus.client import ClientConstants as CC from hydrus.client import ClientData from hydrus.client.importing import ClientImporting from hydrus.client.importing import ClientImportFileSeeds from hydrus.client.importing import ClientImportGallerySeeds from hydrus.client.importing.options import ClientImportOptions from hydrus.client.importing.options import FileImportOptions from hydrus.client.importing.options import TagImportOptions from hydrus.client.metadata import ClientTags from hydrus.client.networking import ClientNetworkingJobs class MultipleWatcherImport( HydrusSerialisable.SerialisableBase ): SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT SERIALISABLE_NAME = 'Multiple Watcher' SERIALISABLE_VERSION = 2 ADDED_TIMESTAMP_DURATION = 15 def __init__( self, url = None ): HydrusSerialisable.SerialisableBase.__init__( self ) self._lock = threading.Lock() self._page_key = 'initialising page key' self._watchers = HydrusSerialisable.SerialisableList() self._highlighted_watcher_url = None self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions() self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) self._tag_import_options = TagImportOptions.TagImportOptions( is_default = True ) self._watcher_keys_to_watchers = {} self._watcher_keys_to_added_timestamps = {} self._watcher_keys_to_already_in_timestamps = {} self._watchers_repeating_job = None self._status_dirty = True self._status_cache = ClientImportFileSeeds.FileSeedCacheStatus() # if url is not None: watcher = WatcherImport() watcher.SetURL( url ) self._AddWatcher( watcher ) self._have_started = False self._last_time_watchers_changed = HydrusData.GetNowPrecise() self._last_pubbed_value_range = ( 0, 0 ) self._next_pub_value_check_time = 0 def _AddWatcher( self, watcher ): watcher.PublishToPage( False ) watcher.Repage( self._page_key ) self._watchers.append( watcher ) self._last_time_watchers_changed = HydrusData.GetNowPrecise() watcher_key = watcher.GetWatcherKey() self._watcher_keys_to_watchers[ watcher_key ] = watcher self._watcher_keys_to_added_timestamps[ watcher_key ] = HydrusData.GetNow() def _CleanAddedTimestamps( self ): keys = list( self._watcher_keys_to_added_timestamps.keys() ) for key in keys: if HydrusData.TimeHasPassed( self._watcher_keys_to_added_timestamps[ key ] + self.ADDED_TIMESTAMP_DURATION ): del self._watcher_keys_to_added_timestamps[ key ] keys = list( self._watcher_keys_to_already_in_timestamps.keys() ) for key in keys: if HydrusData.TimeHasPassed( self._watcher_keys_to_already_in_timestamps[ key ] + self.ADDED_TIMESTAMP_DURATION ): del self._watcher_keys_to_already_in_timestamps[ key ] def _GetSerialisableInfo( self ): serialisable_watchers = self._watchers.GetSerialisableTuple() serialisable_checker_options = self._checker_options.GetSerialisableTuple() serialisable_file_import_options = self._file_import_options.GetSerialisableTuple() serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple() return ( serialisable_watchers, self._highlighted_watcher_url, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options ) def _InitialiseFromSerialisableInfo( self, serialisable_info ): ( serialisable_watchers, self._highlighted_watcher_url, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options ) = serialisable_info self._watchers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_watchers ) self._watcher_keys_to_watchers = { watcher.GetWatcherKey() : watcher for watcher in self._watchers } self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options ) self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options ) self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options ) def _RegenerateStatus( self ): file_seed_caches = [ watcher.GetFileSeedCache() for watcher in self._watchers ] self._status_cache = ClientImportFileSeeds.GenerateFileSeedCachesStatus( file_seed_caches ) self._status_dirty = False def _RemoveWatcher( self, watcher_key ): if watcher_key not in self._watcher_keys_to_watchers: return watcher = self._watcher_keys_to_watchers[ watcher_key ] watcher.PublishToPage( False ) watcher.Repage( 'dead page key' ) self._watchers.remove( watcher ) self._last_time_watchers_changed = HydrusData.GetNowPrecise() del self._watcher_keys_to_watchers[ watcher_key ] def _SetDirty( self ): self._status_dirty = True def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: serialisable_watchers = old_serialisable_info try: checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions() file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) tag_import_options = TagImportOptions.TagImportOptions( is_default = True ) except: checker_options = ClientImportOptions.CheckerOptions() file_import_options = FileImportOptions.FileImportOptions() tag_import_options = TagImportOptions.TagImportOptions() serialisable_checker_options = checker_options.GetSerialisableTuple() serialisable_file_import_options = file_import_options.GetSerialisableTuple() serialisable_tag_import_options = tag_import_options.GetSerialisableTuple() highlighted_watcher_key = None serialisable_highlighted_watcher_key = highlighted_watcher_key new_serialisable_info = ( serialisable_watchers, serialisable_highlighted_watcher_key, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options ) return ( 2, new_serialisable_info ) def AddURL( self, url, filterable_tags = None, additional_service_keys_to_tags = None ): if url == '': return None url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url ) with self._lock: for watcher in self._watchers: if url == watcher.GetURL(): watcher_key = watcher.GetWatcherKey() self._watcher_keys_to_already_in_timestamps[ watcher_key ] = HydrusData.GetNow() return None watcher = WatcherImport() watcher.SetURL( url ) if filterable_tags is not None: watcher.SetExternalFilterableTags( filterable_tags ) if additional_service_keys_to_tags is not None: watcher.SetExternalAdditionalServiceKeysToTags( additional_service_keys_to_tags ) watcher.SetCheckerOptions( self._checker_options ) watcher.SetFileImportOptions( self._file_import_options ) watcher.SetTagImportOptions( self._tag_import_options ) publish_to_page = False if self._have_started: watcher.Start( self._page_key, publish_to_page ) self._AddWatcher( watcher ) return watcher def AddWatcher( self, watcher ): with self._lock: self._AddWatcher( watcher ) self._SetDirty() def GetAPIInfoDict( self, simple ): highlighted_watcher = self.GetHighlightedWatcher() with self._lock: d = {} d[ 'watcher_imports' ] = [ watcher_import.GetAPIInfoDict( simple ) for watcher_import in self._watchers ] if highlighted_watcher is None: d[ 'highlight' ] = None else: d[ 'highlight' ] = highlighted_watcher.GetWatcherKey().hex() return d def GetHighlightedWatcher( self ): with self._lock: if self._highlighted_watcher_url is not None: for watcher in self._watchers: if watcher.GetURL() == self._highlighted_watcher_url: return watcher self._highlighted_watcher_url = None return None def GetLastTimeWatchersChanged( self ): with self._lock: return self._last_time_watchers_changed def GetNumDead( self ): with self._lock: return len( [ watcher for watcher in self._watchers if watcher.IsDead() ] ) def GetNumSeeds( self ): with self._lock: return sum( ( watcher.GetNumSeeds() for watcher in self._watchers ) ) def GetNumWatchers( self ): with self._lock: return len( self._watchers ) def GetOptions( self ): with self._lock: return ( self._checker_options, self._file_import_options, self._tag_import_options ) def GetTotalStatus( self ) -> ClientImportFileSeeds.FileSeedCacheStatus: with self._lock: if self._status_dirty: self._RegenerateStatus() return self._status_cache def GetValueRange( self ): with self._lock: total_value = 0 total_range = 0 for watcher in self._watchers: ( value, range ) = watcher.GetValueRange() if value != range: total_value += value total_range += range return ( total_value, total_range ) def GetWatchers( self ): with self._lock: return list( self._watchers ) def GetWatcherSimpleStatus( self, watcher ): with self._lock: watcher_key = watcher.GetWatcherKey() if watcher_key in self._watcher_keys_to_added_timestamps: added_timestamp = self._watcher_keys_to_added_timestamps[ watcher_key ] if HydrusData.TimeHasPassed( added_timestamp + self.ADDED_TIMESTAMP_DURATION ): self._CleanAddedTimestamps() else: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_WORKING, 'just added' ) if watcher_key in self._watcher_keys_to_already_in_timestamps: already_in_timestamp = self._watcher_keys_to_already_in_timestamps[ watcher_key ] if HydrusData.TimeHasPassed( already_in_timestamp + self.ADDED_TIMESTAMP_DURATION ): self._CleanAddedTimestamps() else: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_WORKING, 'already watching' ) return watcher.GetSimpleStatus() def RemoveWatcher( self, watcher_key ): with self._lock: self._RemoveWatcher( watcher_key ) self._SetDirty() def SetHighlightedWatcher( self, highlighted_watcher ): with self._lock: if highlighted_watcher is None: self._highlighted_watcher_url = None else: self._highlighted_watcher_url = highlighted_watcher.GetURL() def SetOptions( self, checker_options, file_import_options, tag_import_options ): with self._lock: self._checker_options = checker_options self._file_import_options = file_import_options self._tag_import_options = tag_import_options def Start( self, page_key ): with self._lock: if self._have_started: return self._page_key = page_key # set a 2s period so the page value/range is breddy snappy self._watchers_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), 2.0, self.REPEATINGWorkOnWatchers ) for watcher in self._watchers: publish_to_page = False if self._highlighted_watcher_url is not None and watcher.GetURL() == self._highlighted_watcher_url: publish_to_page = True watcher.Start( page_key, publish_to_page ) self._have_started = True def REPEATINGWorkOnWatchers( self ): with self._lock: if ClientImporting.PageImporterShouldStopWorking( self._page_key ): self._watchers_repeating_job.Cancel() return if not self._status_dirty: # if we think we are clean for watcher in self._watchers: file_seed_cache = watcher.GetFileSeedCache() if file_seed_cache.GetStatus().GetGenerationTime() > self._status_cache.GetGenerationTime(): # has there has been an update? self._SetDirty() break if HydrusData.TimeHasPassed( self._next_pub_value_check_time ): self._next_pub_value_check_time = HydrusData.GetNow() + 5 current_value_range = self.GetValueRange() if current_value_range != self._last_pubbed_value_range: self._last_pubbed_value_range = current_value_range HG.client_controller.pub( 'refresh_page_name', self._page_key ) HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT ] = MultipleWatcherImport class WatcherImport( HydrusSerialisable.SerialisableBase ): SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT SERIALISABLE_NAME = 'Watcher' SERIALISABLE_VERSION = 8 MIN_CHECK_PERIOD = 30 def __init__( self ): HydrusSerialisable.SerialisableBase.__init__( self ) self._page_key = 'initialising page key' self._publish_to_page = False self._url = '' self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() self._file_seed_cache = ClientImportFileSeeds.FileSeedCache() self._external_filterable_tags = set() self._external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags() self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions() self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' ) self._tag_import_options = TagImportOptions.TagImportOptions( is_default = True ) self._last_check_time = 0 self._checking_status = ClientImporting.CHECKER_STATUS_OK self._subject = 'unknown subject' self._next_check_time = None self._file_network_job = None self._checker_network_job = None self._check_now = False self._files_paused = False self._checking_paused = False self._no_work_until = 0 self._no_work_until_reason = '' self._creation_time = HydrusData.GetNow() self._file_velocity_status = '' self._file_status = '' self._watcher_status = '' self._watcher_key = HydrusData.GenerateKey() self._have_started = False self._lock = threading.Lock() self._last_pubbed_page_name = '' self._files_repeating_job = None self._checker_repeating_job = None HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' ) def _CheckerNetworkJobPresentationContextFactory( self, network_job ): def enter_call(): with self._lock: self._checker_network_job = network_job def exit_call(): with self._lock: self._checker_network_job = None return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call ) def _CheckWatchableURL( self ): def file_seeds_callable( file_seeds ): return ClientImporting.UpdateFileSeedCacheWithFileSeeds( self._file_seed_cache, file_seeds ) def status_hook( text ): with self._lock: if len( text ) > 0: text = text.splitlines()[0] self._watcher_status = text def title_hook( text ): with self._lock: if len( text ) > 0: text = text.splitlines()[0] self._subject = text gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages = False ) gallery_seed.SetExternalFilterableTags( self._external_filterable_tags ) gallery_seed.SetExternalAdditionalServiceKeysToTags( self._external_additional_service_keys_to_tags ) self._gallery_seed_log.AddGallerySeeds( ( gallery_seed, ) ) with self._lock: self._watcher_status = 'checking' try: ( num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason ) = gallery_seed.WorkOnURL( 'watcher', self._gallery_seed_log, file_seeds_callable, status_hook, title_hook, self._NetworkJobFactory, self._CheckerNetworkJobPresentationContextFactory, self._file_import_options ) if num_urls_added > 0: ClientImporting.WakeRepeatingJob( self._files_repeating_job ) if result_404: with self._lock: self._checking_paused = True self._checking_status = ClientImporting.CHECKER_STATUS_404 if gallery_seed.status == CC.STATUS_ERROR: # the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error with self._lock: self._checking_paused = True self._watcher_status = gallery_seed.note time.sleep( 5 ) except HydrusExceptions.NetworkException as e: delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay' ) self._DelayWork( delay, str( e ) ) gallery_seed.SetStatus( CC.STATUS_ERROR, str( e ) ) HydrusData.PrintException( e ) self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) ) with self._lock: if self._check_now: self._check_now = False self._last_check_time = HydrusData.GetNow() self._UpdateFileVelocityStatus() self._UpdateNextCheckTime() self._Compact() self._watcher_status = '' def _Compact( self ): death_period = self._checker_options.GetDeathFileVelocityPeriod() compact_before_this_time = self._last_check_time - ( death_period * 2 ) self._gallery_seed_log.Compact( compact_before_this_time ) def _DelayWork( self, time_delta, reason ): self._no_work_until = HydrusData.GetNow() + time_delta self._no_work_until_reason = reason def _FileNetworkJobPresentationContextFactory( self, network_job ): def enter_call(): with self._lock: self._file_network_job = network_job def exit_call(): with self._lock: self._file_network_job = None return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call ) def _NetworkJobFactory( self, *args, **kwargs ): network_job = ClientNetworkingJobs.NetworkJobWatcherPage( self._watcher_key, *args, **kwargs ) return network_job def _GetSerialisableInfo( self ): serialisable_gallery_seed_log = self._gallery_seed_log.GetSerialisableTuple() serialisable_file_seed_cache = self._file_seed_cache.GetSerialisableTuple() serialisable_external_filterable_tags = list( self._external_filterable_tags ) serialisable_external_additional_service_keys_to_tags = self._external_additional_service_keys_to_tags.GetSerialisableTuple() serialisable_checker_options = self._checker_options.GetSerialisableTuple() serialisable_file_import_options = self._file_import_options.GetSerialisableTuple() serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple() return ( self._url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_filterable_tags, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, self._last_check_time, self._files_paused, self._checking_paused, self._checking_status, self._subject, self._no_work_until, self._no_work_until_reason, self._creation_time ) def _HasURL( self ): return self._url != '' def _InitialiseFromSerialisableInfo( self, serialisable_info ): ( self._url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_filterable_tags, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, self._last_check_time, self._files_paused, self._checking_paused, self._checking_status, self._subject, self._no_work_until, self._no_work_until_reason, self._creation_time ) = serialisable_info self._external_filterable_tags = set( serialisable_external_filterable_tags ) self._external_additional_service_keys_to_tags = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_external_additional_service_keys_to_tags ) self._gallery_seed_log = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_seed_log ) self._file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache ) self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options ) self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options ) self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options ) def _UpdateFileVelocityStatus( self ): self._file_velocity_status = self._checker_options.GetPrettyCurrentVelocity( self._file_seed_cache, self._last_check_time ) def _UpdateNextCheckTime( self ): if self._check_now: self._next_check_time = self._last_check_time + self.MIN_CHECK_PERIOD else: if not HydrusData.TimeHasPassed( self._no_work_until ): self._next_check_time = self._no_work_until + 1 else: if self._checking_status == ClientImporting.CHECKER_STATUS_OK: if self._checker_options.IsDead( self._file_seed_cache, self._last_check_time ): self._checking_status = ClientImporting.CHECKER_STATUS_DEAD if self._checking_status != ClientImporting.CHECKER_STATUS_OK: self._checking_paused = True last_next_check_time = self._next_check_time self._next_check_time = self._checker_options.GetNextCheckTime( self._file_seed_cache, self._last_check_time, last_next_check_time ) def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_import_options, serialisable_tag_import_options, times_to_check, check_period, last_check_time, paused ) = old_serialisable_info checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check = 8, never_faster_than = 300, never_slower_than = 86400, death_file_velocity = ( 1, 86400 ) ) serialisable_checker_options = checker_options.GetSerialisableTuple() files_paused = paused checking_paused = paused new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused ) return ( 2, new_serialisable_info ) if version == 2: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused ) = old_serialisable_info checking_status = ClientImporting.CHECKER_STATUS_OK subject = 'unknown subject' new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject ) return ( 3, new_serialisable_info ) if version == 3: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject ) = old_serialisable_info no_work_until = 0 no_work_until_reason = '' new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason ) return ( 4, new_serialisable_info ) if version == 4: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason ) = old_serialisable_info creation_time = HydrusData.GetNow() new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 5, new_serialisable_info ) if version == 5: ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog() serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple() new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 6, new_serialisable_info ) if version == 6: ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags() serialisable_external_additional_service_keys_to_tags = external_additional_service_keys_to_tags.GetSerialisableTuple() new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 7, new_serialisable_info ) if version == 7: ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info filterable_tags = set() serialisable_external_filterable_tags = list( filterable_tags ) new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_filterable_tags, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) return ( 8, new_serialisable_info ) def _WorkOnFiles( self ): file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN ) if file_seed is None: return did_substantial_work = False def status_hook( text ): with self._lock: if len( text ) > 0: text = text.splitlines()[0] self._file_status = text did_substantial_work = file_seed.WorkOnURL( self._file_seed_cache, status_hook, self._NetworkJobFactory, self._FileNetworkJobPresentationContextFactory, self._file_import_options, self._tag_import_options ) with self._lock: should_present = self._publish_to_page and file_seed.ShouldPresent( self._file_import_options ) page_key = self._page_key if should_present: file_seed.PresentToPage( page_key ) did_substantial_work = True with self._lock: self._file_status = '' if did_substantial_work: time.sleep( ClientImporting.DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME ) def GetAPIInfoDict( self, simple ): with self._lock: d = {} d[ 'url' ] = self._url d[ 'watcher_key' ] = self._watcher_key.hex() d[ 'created' ] = self._creation_time d[ 'last_check_time' ] = self._last_check_time d[ 'next_check_time' ] = self._next_check_time d[ 'files_paused' ] = self._files_paused d[ 'checking_paused' ] = self._checking_paused d[ 'checking_status' ] = self._checking_status d[ 'subject' ] = self._subject d[ 'imports' ] = self._file_seed_cache.GetAPIInfoDict( simple ) d[ 'gallery_log' ] = self._gallery_seed_log.GetAPIInfoDict( simple ) return d def CanRetryFailed( self ): with self._lock: return self._file_seed_cache.GetFileSeedCount( CC.STATUS_ERROR ) > 0 def CanRetryIgnored( self ): with self._lock: return self._file_seed_cache.GetFileSeedCount( CC.STATUS_VETOED ) > 0 def CheckingPaused( self ): with self._lock: return self._checking_paused def CheckNow( self ): with self._lock: self._check_now = True self._checking_paused = False self._no_work_until = 0 self._no_work_until_reason = '' self._checking_status = ClientImporting.CHECKER_STATUS_OK self._UpdateNextCheckTime() ClientImporting.WakeRepeatingJob( self._checker_repeating_job ) def CurrentlyAlive( self ): with self._lock: return self._checking_status == ClientImporting.CHECKER_STATUS_OK def CurrentlyWorking( self ): with self._lock: finished = not self._file_seed_cache.WorkToDo() return not finished and not self._files_paused def FilesPaused( self ): with self._lock: return self._files_paused def GetCheckerOptions( self ): with self._lock: return self._checker_options def GetCreationTime( self ): with self._lock: return self._creation_time def GetFileImportOptions( self ): with self._lock: return self._file_import_options def GetFileSeedCache( self ): with self._lock: return self._file_seed_cache def GetGallerySeedLog( self ): with self._lock: return self._gallery_seed_log def GetHashes( self ): with self._lock: fsc = self._file_seed_cache return fsc.GetHashes() def GetNetworkJobs( self ): with self._lock: return ( self._file_network_job, self._checker_network_job ) def GetNewHashes( self ): with self._lock: fsc = self._file_seed_cache file_import_options = FileImportOptions.FileImportOptions() file_import_options.SetPresentationOptions( True, False, False ) return fsc.GetPresentedHashes( file_import_options ) def GetNextCheckTime( self ): with self._lock: return self._next_check_time def GetNumSeeds( self ): with self._lock: return len( self._file_seed_cache ) + len( self._gallery_seed_log ) def GetOptions( self ): with self._lock: return ( self._url, self._file_import_options, self._tag_import_options ) def GetPresentedHashes( self ): with self._lock: fsc = self._file_seed_cache fio = self._file_import_options return fsc.GetPresentedHashes( fio ) def GetSimpleStatus( self ): with self._lock: gallery_work_to_do = self._gallery_seed_log.WorkToDo() files_work_to_do = self._file_seed_cache.WorkToDo() gallery_go = gallery_work_to_do and not self._checking_paused files_go = files_work_to_do and not self._files_paused if self._watcher_status != '' or self._file_status != '': return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_WORKING, 'working' ) elif gallery_go or files_go: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_PENDING, 'pending' ) elif self._checking_status == ClientImporting.CHECKER_STATUS_404: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DONE, '404' ) elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DONE, 'DEAD' ) elif not HydrusData.TimeHasPassed( self._no_work_until ): if self._next_check_time is None: text = '{} - working again {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( self._no_work_until ) ) else: text = '{} - next check {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( max( self._no_work_until, self._next_check_time ) ) ) return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DEFERRED, text ) else: if self._checking_paused: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_PAUSED, '' ) else: if self._next_check_time is None or HydrusData.TimeHasPassed( self._next_check_time ): return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_PENDING, 'pending' ) else: return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DEFERRED, ClientData.TimestampToPrettyTimeDelta( self._next_check_time, no_prefix = True ) ) def GetStatus( self ): with self._lock: file_status = self._file_status if self._checking_status == ClientImporting.CHECKER_STATUS_404: watcher_status = 'URL 404' elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD: watcher_status = 'URL DEAD' elif not HydrusData.TimeHasPassed( self._no_work_until ): if self._next_check_time is None: no_work_text = '{} - working again {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( self._no_work_until ) ) else: no_work_text = '{} - next check {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( max( self._no_work_until, self._next_check_time ) ) ) file_status = no_work_text watcher_status = no_work_text else: watcher_status = self._watcher_status return ( file_status, self._files_paused, self._file_velocity_status, self._next_check_time, watcher_status, self._subject, self._checking_status, self._check_now, self._checking_paused ) def GetSubject( self ): with self._lock: if self._subject in ( None, '' ): return 'unknown subject' else: return self._subject def GetTagImportOptions( self ): with self._lock: return self._tag_import_options def GetWatcherKey( self ): with self._lock: return self._watcher_key def GetURL( self ): with self._lock: return self._url def GetValueRange( self ): with self._lock: return self._file_seed_cache.GetValueRange() def HasURL( self ): with self._lock: return self._HasURL() def _IsDead( self ): return self._checking_status in ( ClientImporting.CHECKER_STATUS_404, ClientImporting.CHECKER_STATUS_DEAD ) def IsDead( self ): with self._lock: return self._IsDead() def NotifyFileSeedsUpdated( self, file_seed_cache_key, file_seeds ): if file_seed_cache_key == self._file_seed_cache.GetFileSeedCacheKey(): ClientImporting.WakeRepeatingJob( self._files_repeating_job ) def PausePlayChecking( self ): with self._lock: if self._checking_paused and self._IsDead(): return # watcher is dead, so don't unpause until a checknow event else: self._checking_paused = not self._checking_paused ClientImporting.WakeRepeatingJob( self._checker_repeating_job ) def PausePlayFiles( self ): with self._lock: self._files_paused = not self._files_paused ClientImporting.WakeRepeatingJob( self._files_repeating_job ) def PublishToPage( self, publish_to_page ): with self._lock: self._publish_to_page = publish_to_page def Repage( self, page_key ): with self._lock: self._page_key = page_key def RetryFailed( self ): with self._lock: self._file_seed_cache.RetryFailed() def RetryIgnored( self, ignored_regex = None ): with self._lock: self._file_seed_cache.RetryIgnored( ignored_regex = ignored_regex ) def SetCheckerOptions( self, checker_options ): with self._lock: self._checker_options = checker_options self._UpdateNextCheckTime() self._UpdateFileVelocityStatus() ClientImporting.WakeRepeatingJob( self._checker_repeating_job ) def SetFileImportOptions( self, file_import_options ): with self._lock: self._file_import_options = file_import_options def SetExternalAdditionalServiceKeysToTags( self, service_keys_to_tags ): with self._lock: self._external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags( service_keys_to_tags ) def SetExternalFilterableTags( self, tags ): with self._lock: self._external_filterable_tags = set( tags ) def SetTagImportOptions( self, tag_import_options ): with self._lock: self._tag_import_options = tag_import_options def SetURL( self, url ): if url is None: url = '' if url != '': try: url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url ) except HydrusExceptions.URLClassException: url = '' with self._lock: self._url = url ClientImporting.WakeRepeatingJob( self._checker_repeating_job ) def Start( self, page_key, publish_to_page ): with self._lock: if self._have_started: return self._page_key = page_key self._publish_to_page = publish_to_page self._UpdateNextCheckTime() self._UpdateFileVelocityStatus() self._files_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnFiles ) self._checker_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnChecker ) self._files_repeating_job.SetThreadSlotType( 'watcher_files' ) self._checker_repeating_job.SetThreadSlotType( 'watcher_check' ) self._have_started = True def CanDoFileWork( self ): with self._lock: if ClientImporting.PageImporterShouldStopWorking( self._page_key ): self._files_repeating_job.Cancel() return files_paused = self._files_paused or HG.client_controller.new_options.GetBoolean( 'pause_all_file_queues' ) if files_paused: return False work_to_do = self._file_seed_cache.WorkToDo() if not work_to_do: return False return self.CanDoNetworkWork() def CanDoNetworkWork( self ): with self._lock: no_delays = HydrusData.TimeHasPassed( self._no_work_until ) if not no_delays: return False page_shown = not HG.client_controller.PageClosedButNotDestroyed( self._page_key ) if not page_shown: return False network_engine_good = not HG.client_controller.network_engine.IsBusy() if not network_engine_good: return False return True def REPEATINGWorkOnFiles( self ): while self.CanDoFileWork(): try: self._WorkOnFiles() HG.client_controller.WaitUntilViewFree() except Exception as e: HydrusData.ShowException( e ) def CanDoCheckerWork( self ): with self._lock: if ClientImporting.PageImporterShouldStopWorking( self._page_key ): self._checker_repeating_job.Cancel() return while self._gallery_seed_log.WorkToDo(): # some old unworked gallery url is hanging around, let's clear it gallery_seed = self._gallery_seed_log.GetNextGallerySeed( CC.STATUS_UNKNOWN ) gallery_seed.SetStatus( CC.STATUS_VETOED, note = 'check never finished' ) self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) ) checking_paused = self._checking_paused or HG.client_controller.new_options.GetBoolean( 'pause_all_watcher_checkers' ) if checking_paused: return False able_to_check = self._checking_status == ClientImporting.CHECKER_STATUS_OK and self._HasURL() if not able_to_check: return False check_due = HydrusData.TimeHasPassed( self._next_check_time ) if not check_due: return False return self.CanDoNetworkWork() def REPEATINGWorkOnChecker( self ): if self.CanDoCheckerWork(): try: self._CheckWatchableURL() except Exception as e: HydrusData.ShowException( e ) HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT ] = WatcherImport