hydrus/hydrus/client/importing/ClientImportWatchers.py

1868 lines
61 KiB
Python
Raw Normal View History

2020-05-20 21:36:02 +00:00
import threading
import time
2020-07-29 20:52:44 +00:00
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusSerialisable
2020-04-22 21:00:35 +00:00
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientData
2022-06-22 20:43:12 +00:00
from hydrus.client.importing import ClientImportControl
2020-04-22 21:00:35 +00:00
from hydrus.client.importing import ClientImporting
from hydrus.client.importing import ClientImportFileSeeds
from hydrus.client.importing import ClientImportGallerySeeds
2021-06-30 21:27:35 +00:00
from hydrus.client.importing.options import ClientImportOptions
from hydrus.client.importing.options import FileImportOptions
from hydrus.client.importing.options import TagImportOptions
from hydrus.client.metadata import ClientTags
2020-04-22 21:00:35 +00:00
from hydrus.client.networking import ClientNetworkingJobs
2018-05-23 21:05:06 +00:00
class MultipleWatcherImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT
SERIALISABLE_NAME = 'Multiple Watcher'
2018-06-20 20:20:22 +00:00
SERIALISABLE_VERSION = 2
2018-08-22 21:10:59 +00:00
ADDED_TIMESTAMP_DURATION = 15
2018-05-23 21:05:06 +00:00
def __init__( self, url = None ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._lock = threading.Lock()
2022-06-22 20:43:12 +00:00
self._page_key = b'initialising page key'
2018-05-23 21:05:06 +00:00
self._watchers = HydrusSerialisable.SerialisableList()
2018-06-20 20:20:22 +00:00
self._highlighted_watcher_url = None
self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
2021-06-30 21:27:35 +00:00
self._tag_import_options = TagImportOptions.TagImportOptions( is_default = True )
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
self._watcher_keys_to_watchers = {}
2018-06-20 20:20:22 +00:00
self._watcher_keys_to_added_timestamps = {}
self._watcher_keys_to_already_in_timestamps = {}
2018-05-23 21:05:06 +00:00
self._watchers_repeating_job = None
self._status_dirty = True
2020-06-11 12:01:08 +00:00
self._status_cache = ClientImportFileSeeds.FileSeedCacheStatus()
2018-05-23 21:05:06 +00:00
#
if url is not None:
watcher = WatcherImport()
watcher.SetURL( url )
self._AddWatcher( watcher )
2021-07-07 20:48:57 +00:00
self._have_started = False
2018-08-08 20:29:54 +00:00
self._last_time_watchers_changed = HydrusData.GetNowPrecise()
2021-09-22 21:12:34 +00:00
self._last_serialisable_change_timestamp = 0
2018-05-23 21:05:06 +00:00
self._last_pubbed_value_range = ( 0, 0 )
self._next_pub_value_check_time = 0
def _AddWatcher( self, watcher ):
2018-08-01 20:44:57 +00:00
watcher.PublishToPage( False )
watcher.Repage( self._page_key )
2018-05-23 21:05:06 +00:00
self._watchers.append( watcher )
2018-08-08 20:29:54 +00:00
self._last_time_watchers_changed = HydrusData.GetNowPrecise()
2018-05-23 21:05:06 +00:00
watcher_key = watcher.GetWatcherKey()
self._watcher_keys_to_watchers[ watcher_key ] = watcher
2018-06-20 20:20:22 +00:00
self._watcher_keys_to_added_timestamps[ watcher_key ] = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
def _CleanAddedTimestamps( self ):
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
keys = list( self._watcher_keys_to_added_timestamps.keys() )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
for key in keys:
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
if HydrusData.TimeHasPassed( self._watcher_keys_to_added_timestamps[ key ] + self.ADDED_TIMESTAMP_DURATION ):
del self._watcher_keys_to_added_timestamps[ key ]
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
keys = list( self._watcher_keys_to_already_in_timestamps.keys() )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
for key in keys:
if HydrusData.TimeHasPassed( self._watcher_keys_to_already_in_timestamps[ key ] + self.ADDED_TIMESTAMP_DURATION ):
del self._watcher_keys_to_already_in_timestamps[ key ]
2018-05-23 21:05:06 +00:00
def _GetSerialisableInfo( self ):
serialisable_watchers = self._watchers.GetSerialisableTuple()
2018-06-20 20:20:22 +00:00
serialisable_checker_options = self._checker_options.GetSerialisableTuple()
serialisable_file_import_options = self._file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple()
return ( serialisable_watchers, self._highlighted_watcher_url, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options )
2018-05-23 21:05:06 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-06-20 20:20:22 +00:00
( serialisable_watchers, self._highlighted_watcher_url, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options ) = serialisable_info
2018-05-23 21:05:06 +00:00
self._watchers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_watchers )
self._watcher_keys_to_watchers = { watcher.GetWatcherKey() : watcher for watcher in self._watchers }
2018-06-20 20:20:22 +00:00
self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options )
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options )
self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options )
def _RegenerateStatus( self ):
2018-07-04 20:48:28 +00:00
file_seed_caches = [ watcher.GetFileSeedCache() for watcher in self._watchers ]
2018-06-20 20:20:22 +00:00
2018-07-04 20:48:28 +00:00
self._status_cache = ClientImportFileSeeds.GenerateFileSeedCachesStatus( file_seed_caches )
2018-06-20 20:20:22 +00:00
self._status_dirty = False
2018-05-23 21:05:06 +00:00
def _RemoveWatcher( self, watcher_key ):
if watcher_key not in self._watcher_keys_to_watchers:
return
watcher = self._watcher_keys_to_watchers[ watcher_key ]
2018-08-01 20:44:57 +00:00
watcher.PublishToPage( False )
watcher.Repage( 'dead page key' )
2018-05-23 21:05:06 +00:00
self._watchers.remove( watcher )
2018-08-08 20:29:54 +00:00
self._last_time_watchers_changed = HydrusData.GetNowPrecise()
2018-05-23 21:05:06 +00:00
del self._watcher_keys_to_watchers[ watcher_key ]
2021-09-22 21:12:34 +00:00
def _SerialisableChangeMade( self ):
self._last_serialisable_change_timestamp = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
def _SetDirty( self ):
self._status_dirty = True
2018-06-20 20:20:22 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
serialisable_watchers = old_serialisable_info
try:
checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
2021-06-30 21:27:35 +00:00
tag_import_options = TagImportOptions.TagImportOptions( is_default = True )
2018-06-20 20:20:22 +00:00
except:
checker_options = ClientImportOptions.CheckerOptions()
2021-06-30 21:27:35 +00:00
file_import_options = FileImportOptions.FileImportOptions()
tag_import_options = TagImportOptions.TagImportOptions()
2018-06-20 20:20:22 +00:00
serialisable_checker_options = checker_options.GetSerialisableTuple()
serialisable_file_import_options = file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = tag_import_options.GetSerialisableTuple()
highlighted_watcher_key = None
serialisable_highlighted_watcher_key = highlighted_watcher_key
new_serialisable_info = ( serialisable_watchers, serialisable_highlighted_watcher_key, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options )
return ( 2, new_serialisable_info )
2020-09-16 20:46:54 +00:00
def AddURL( self, url, filterable_tags = None, additional_service_keys_to_tags = None ):
2018-05-23 21:05:06 +00:00
if url == '':
2018-08-08 20:29:54 +00:00
return None
2018-05-23 21:05:06 +00:00
2018-12-12 22:15:46 +00:00
url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
2018-05-23 21:05:06 +00:00
with self._lock:
2018-06-20 20:20:22 +00:00
for watcher in self._watchers:
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
if url == watcher.GetURL():
watcher_key = watcher.GetWatcherKey()
self._watcher_keys_to_already_in_timestamps[ watcher_key ] = HydrusData.GetNow()
2018-08-08 20:29:54 +00:00
return None
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
watcher = WatcherImport()
watcher.SetURL( url )
2020-09-16 20:46:54 +00:00
if filterable_tags is not None:
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
watcher.SetExternalFilterableTags( filterable_tags )
if additional_service_keys_to_tags is not None:
watcher.SetExternalAdditionalServiceKeysToTags( additional_service_keys_to_tags )
2019-02-27 23:03:30 +00:00
2018-06-20 20:20:22 +00:00
watcher.SetCheckerOptions( self._checker_options )
watcher.SetFileImportOptions( self._file_import_options )
watcher.SetTagImportOptions( self._tag_import_options )
2018-05-23 21:05:06 +00:00
publish_to_page = False
2021-08-11 21:14:12 +00:00
if self._have_started:
watcher.Start( self._page_key, publish_to_page )
2018-05-23 21:05:06 +00:00
self._AddWatcher( watcher )
2018-08-08 20:29:54 +00:00
return watcher
2018-05-23 21:05:06 +00:00
def AddWatcher( self, watcher ):
with self._lock:
self._AddWatcher( watcher )
self._SetDirty()
2021-09-22 21:12:34 +00:00
def ClearHighlightedWatcher( self ):
with self._lock:
if self._highlighted_watcher_url is not None:
self._highlighted_watcher_url = None
self._SerialisableChangeMade()
2019-08-21 21:34:01 +00:00
def GetAPIInfoDict( self, simple ):
highlighted_watcher = self.GetHighlightedWatcher()
with self._lock:
d = {}
d[ 'watcher_imports' ] = [ watcher_import.GetAPIInfoDict( simple ) for watcher_import in self._watchers ]
if highlighted_watcher is None:
d[ 'highlight' ] = None
else:
d[ 'highlight' ] = highlighted_watcher.GetWatcherKey().hex()
return d
2022-01-19 21:28:59 +00:00
def GetFileImportOptions( self ) -> FileImportOptions.FileImportOptions:
with self._lock:
return self._file_import_options
2018-06-20 20:20:22 +00:00
def GetHighlightedWatcher( self ):
with self._lock:
if self._highlighted_watcher_url is not None:
for watcher in self._watchers:
if watcher.GetURL() == self._highlighted_watcher_url:
return watcher
2018-08-01 20:44:57 +00:00
self._highlighted_watcher_url = None
2018-06-20 20:20:22 +00:00
return None
2018-08-08 20:29:54 +00:00
def GetLastTimeWatchersChanged( self ):
with self._lock:
return self._last_time_watchers_changed
2018-05-30 20:13:21 +00:00
def GetNumDead( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-05-30 20:13:21 +00:00
return len( [ watcher for watcher in self._watchers if watcher.IsDead() ] )
2018-05-23 21:05:06 +00:00
2019-10-16 20:47:55 +00:00
def GetNumSeeds( self ):
with self._lock:
return sum( ( watcher.GetNumSeeds() for watcher in self._watchers ) )
2018-08-08 20:29:54 +00:00
def GetNumWatchers( self ):
with self._lock:
return len( self._watchers )
2018-06-20 20:20:22 +00:00
def GetOptions( self ):
with self._lock:
return ( self._checker_options, self._file_import_options, self._tag_import_options )
2020-06-11 12:01:08 +00:00
def GetTotalStatus( self ) -> ClientImportFileSeeds.FileSeedCacheStatus:
2018-05-23 21:05:06 +00:00
with self._lock:
if self._status_dirty:
self._RegenerateStatus()
return self._status_cache
def GetValueRange( self ):
with self._lock:
total_value = 0
total_range = 0
for watcher in self._watchers:
( value, range ) = watcher.GetValueRange()
if value != range:
total_value += value
total_range += range
return ( total_value, total_range )
def GetWatchers( self ):
with self._lock:
return list( self._watchers )
2018-06-20 20:20:22 +00:00
def GetWatcherSimpleStatus( self, watcher ):
with self._lock:
watcher_key = watcher.GetWatcherKey()
if watcher_key in self._watcher_keys_to_added_timestamps:
added_timestamp = self._watcher_keys_to_added_timestamps[ watcher_key ]
if HydrusData.TimeHasPassed( added_timestamp + self.ADDED_TIMESTAMP_DURATION ):
self._CleanAddedTimestamps()
else:
2021-02-11 01:59:52 +00:00
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_WORKING, 'just added' )
2018-06-20 20:20:22 +00:00
if watcher_key in self._watcher_keys_to_already_in_timestamps:
already_in_timestamp = self._watcher_keys_to_already_in_timestamps[ watcher_key ]
if HydrusData.TimeHasPassed( already_in_timestamp + self.ADDED_TIMESTAMP_DURATION ):
self._CleanAddedTimestamps()
else:
2021-02-11 01:59:52 +00:00
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_WORKING, 'already watching' )
2018-06-20 20:20:22 +00:00
return watcher.GetSimpleStatus()
2021-09-22 21:12:34 +00:00
def HasSerialisableChangesSince( self, since_timestamp ):
with self._lock:
if self._last_serialisable_change_timestamp > since_timestamp:
return True
for watcher in self._watchers:
if watcher.HasSerialisableChangesSince( since_timestamp ):
return True
return False
2018-05-23 21:05:06 +00:00
def RemoveWatcher( self, watcher_key ):
with self._lock:
self._RemoveWatcher( watcher_key )
self._SetDirty()
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
def SetHighlightedWatcher( self, highlighted_watcher ):
with self._lock:
2021-09-22 21:12:34 +00:00
highlighted_watcher_url = highlighted_watcher.GetURL()
if highlighted_watcher_url != self._highlighted_watcher_url:
2018-06-20 20:20:22 +00:00
2021-09-22 21:12:34 +00:00
self._highlighted_watcher_url = highlighted_watcher_url
2018-06-20 20:20:22 +00:00
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-06-20 20:20:22 +00:00
def SetOptions( self, checker_options, file_import_options, tag_import_options ):
with self._lock:
2021-09-22 21:12:34 +00:00
if checker_options.DumpToString() != self._checker_options.DumpToString():
self._checker_options = checker_options
self._SerialisableChangeMade()
if file_import_options.DumpToString() != self._file_import_options.DumpToString():
self._file_import_options = file_import_options
self._SerialisableChangeMade()
if tag_import_options.DumpToString() != self._tag_import_options.DumpToString():
self._tag_import_options = tag_import_options
self._SerialisableChangeMade()
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
def Start( self, page_key ):
with self._lock:
2021-07-07 20:48:57 +00:00
if self._have_started:
return
2018-05-23 21:05:06 +00:00
2021-07-07 20:48:57 +00:00
self._page_key = page_key
2018-05-23 21:05:06 +00:00
2021-07-07 20:48:57 +00:00
# set a 2s period so the page value/range is breddy snappy
self._watchers_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), 2.0, self.REPEATINGWorkOnWatchers )
2018-08-01 20:44:57 +00:00
2021-07-07 20:48:57 +00:00
for watcher in self._watchers:
publish_to_page = False
if self._highlighted_watcher_url is not None and watcher.GetURL() == self._highlighted_watcher_url:
publish_to_page = True
2018-08-01 20:44:57 +00:00
2021-07-07 20:48:57 +00:00
watcher.Start( page_key, publish_to_page )
2018-08-01 20:44:57 +00:00
2021-07-07 20:48:57 +00:00
self._have_started = True
2018-05-23 21:05:06 +00:00
def REPEATINGWorkOnWatchers( self ):
with self._lock:
2022-06-22 20:43:12 +00:00
if ClientImportControl.PageImporterShouldStopWorking( self._page_key ):
2018-05-23 21:05:06 +00:00
self._watchers_repeating_job.Cancel()
return
if not self._status_dirty: # if we think we are clean
for watcher in self._watchers:
2018-06-27 19:27:05 +00:00
file_seed_cache = watcher.GetFileSeedCache()
2018-05-23 21:05:06 +00:00
2020-06-11 12:01:08 +00:00
if file_seed_cache.GetStatus().GetGenerationTime() > self._status_cache.GetGenerationTime(): # has there has been an update?
2018-05-23 21:05:06 +00:00
self._SetDirty()
break
if HydrusData.TimeHasPassed( self._next_pub_value_check_time ):
self._next_pub_value_check_time = HydrusData.GetNow() + 5
current_value_range = self.GetValueRange()
if current_value_range != self._last_pubbed_value_range:
self._last_pubbed_value_range = current_value_range
HG.client_controller.pub( 'refresh_page_name', self._page_key )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT ] = MultipleWatcherImport
class WatcherImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT
SERIALISABLE_NAME = 'Watcher'
2020-09-16 20:46:54 +00:00
SERIALISABLE_VERSION = 8
2018-05-23 21:05:06 +00:00
MIN_CHECK_PERIOD = 30
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
2022-06-22 20:43:12 +00:00
self._page_key = b'initialising page key'
2018-05-23 21:05:06 +00:00
self._publish_to_page = False
self._url = ''
2018-07-04 20:48:28 +00:00
self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
2018-06-27 19:27:05 +00:00
self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
2018-07-04 20:48:28 +00:00
2020-09-16 20:46:54 +00:00
self._external_filterable_tags = set()
self._external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags()
2019-02-27 23:03:30 +00:00
2018-07-11 20:23:51 +00:00
self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
2021-06-30 21:27:35 +00:00
self._tag_import_options = TagImportOptions.TagImportOptions( is_default = True )
2018-05-23 21:05:06 +00:00
self._last_check_time = 0
self._checking_status = ClientImporting.CHECKER_STATUS_OK
self._subject = 'unknown subject'
self._next_check_time = None
2018-08-01 20:44:57 +00:00
self._file_network_job = None
self._checker_network_job = None
2018-05-23 21:05:06 +00:00
self._check_now = False
self._files_paused = False
self._checking_paused = False
self._no_work_until = 0
self._no_work_until_reason = ''
2018-06-20 20:20:22 +00:00
self._creation_time = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
self._file_velocity_status = ''
2022-06-22 20:43:12 +00:00
self._files_status = ''
2018-05-23 21:05:06 +00:00
self._watcher_status = ''
self._watcher_key = HydrusData.GenerateKey()
2021-07-07 20:48:57 +00:00
self._have_started = False
2018-05-23 21:05:06 +00:00
self._lock = threading.Lock()
2022-06-22 20:43:12 +00:00
self._files_working_lock = threading.Lock()
self._checker_working_lock = threading.Lock()
2018-05-23 21:05:06 +00:00
self._last_pubbed_page_name = ''
self._files_repeating_job = None
self._checker_repeating_job = None
2021-09-22 21:12:34 +00:00
self._last_serialisable_change_timestamp = 0
2018-06-27 19:27:05 +00:00
HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' )
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def _CheckerNetworkJobPresentationContextFactory( self, network_job ):
def enter_call():
with self._lock:
self._checker_network_job = network_job
def exit_call():
with self._lock:
self._checker_network_job = None
return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call )
2018-05-23 21:05:06 +00:00
def _CheckWatchableURL( self ):
2018-08-22 21:10:59 +00:00
def file_seeds_callable( file_seeds ):
return ClientImporting.UpdateFileSeedCacheWithFileSeeds( self._file_seed_cache, file_seeds )
2018-07-11 20:23:51 +00:00
def status_hook( text ):
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
self._watcher_status = ClientImportControl.NeatenStatusText( text )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
def title_hook( text ):
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2019-12-05 05:29:32 +00:00
if len( text ) > 0:
text = text.splitlines()[0]
2019-11-20 23:10:46 +00:00
2018-07-11 20:23:51 +00:00
self._subject = text
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages = False )
2018-05-23 21:05:06 +00:00
2020-09-16 20:46:54 +00:00
gallery_seed.SetExternalFilterableTags( self._external_filterable_tags )
gallery_seed.SetExternalAdditionalServiceKeysToTags( self._external_additional_service_keys_to_tags )
2019-02-27 23:03:30 +00:00
2018-07-11 20:23:51 +00:00
self._gallery_seed_log.AddGallerySeeds( ( gallery_seed, ) )
2018-05-23 21:05:06 +00:00
2018-08-22 21:10:59 +00:00
with self._lock:
self._watcher_status = 'checking'
2018-05-23 21:05:06 +00:00
try:
2018-09-05 20:52:32 +00:00
( num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason ) = gallery_seed.WorkOnURL( 'watcher', self._gallery_seed_log, file_seeds_callable, status_hook, title_hook, self._NetworkJobFactory, self._CheckerNetworkJobPresentationContextFactory, self._file_import_options )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
if num_urls_added > 0:
2018-05-23 21:05:06 +00:00
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2018-07-11 20:23:51 +00:00
if result_404:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2018-08-08 20:29:54 +00:00
self._checking_paused = True
2018-07-11 20:23:51 +00:00
self._checking_status = ClientImporting.CHECKER_STATUS_404
2018-05-23 21:05:06 +00:00
2018-07-18 21:07:15 +00:00
if gallery_seed.status == CC.STATUS_ERROR:
# the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error
with self._lock:
self._checking_paused = True
self._watcher_status = gallery_seed.note
time.sleep( 5 )
2018-05-23 21:05:06 +00:00
except HydrusExceptions.NetworkException as e:
2018-11-28 22:31:04 +00:00
delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay' )
2019-01-09 22:59:03 +00:00
self._DelayWork( delay, str( e ) )
2018-05-23 21:05:06 +00:00
2021-02-17 18:22:44 +00:00
gallery_seed.SetStatus( CC.STATUS_ERROR, str( e ) )
2018-05-23 21:05:06 +00:00
HydrusData.PrintException( e )
finally:
self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) )
2021-02-17 18:22:44 +00:00
2018-07-18 21:07:15 +00:00
with self._lock:
if self._check_now:
self._check_now = False
self._last_check_time = HydrusData.GetNow()
self._UpdateFileVelocityStatus()
self._UpdateNextCheckTime()
2018-08-22 21:10:59 +00:00
self._Compact()
2021-03-03 22:23:35 +00:00
self._watcher_status = ''
2018-07-18 21:07:15 +00:00
2018-05-23 21:05:06 +00:00
2018-08-22 21:10:59 +00:00
def _Compact( self ):
death_period = self._checker_options.GetDeathFileVelocityPeriod()
compact_before_this_time = self._last_check_time - ( death_period * 2 )
self._gallery_seed_log.Compact( compact_before_this_time )
2018-05-23 21:05:06 +00:00
def _DelayWork( self, time_delta, reason ):
2022-06-22 20:43:12 +00:00
if len( reason ) > 0:
reason = reason.splitlines()[0]
2018-05-23 21:05:06 +00:00
self._no_work_until = HydrusData.GetNow() + time_delta
self._no_work_until_reason = reason
def _FileNetworkJobPresentationContextFactory( self, network_job ):
def enter_call():
with self._lock:
2018-08-01 20:44:57 +00:00
self._file_network_job = network_job
2018-05-23 21:05:06 +00:00
def exit_call():
with self._lock:
2018-08-01 20:44:57 +00:00
self._file_network_job = None
2018-05-23 21:05:06 +00:00
return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call )
2018-08-15 20:40:30 +00:00
def _NetworkJobFactory( self, *args, **kwargs ):
network_job = ClientNetworkingJobs.NetworkJobWatcherPage( self._watcher_key, *args, **kwargs )
return network_job
2018-05-23 21:05:06 +00:00
def _GetSerialisableInfo( self ):
2018-07-04 20:48:28 +00:00
serialisable_gallery_seed_log = self._gallery_seed_log.GetSerialisableTuple()
2018-06-27 19:27:05 +00:00
serialisable_file_seed_cache = self._file_seed_cache.GetSerialisableTuple()
2018-07-04 20:48:28 +00:00
2020-09-16 20:46:54 +00:00
serialisable_external_filterable_tags = list( self._external_filterable_tags )
serialisable_external_additional_service_keys_to_tags = self._external_additional_service_keys_to_tags.GetSerialisableTuple()
2019-02-27 23:03:30 +00:00
2018-05-23 21:05:06 +00:00
serialisable_checker_options = self._checker_options.GetSerialisableTuple()
2018-07-11 20:23:51 +00:00
serialisable_file_import_options = self._file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple()
2018-05-23 21:05:06 +00:00
2020-09-16 20:46:54 +00:00
return (
self._url,
serialisable_gallery_seed_log,
serialisable_file_seed_cache,
serialisable_external_filterable_tags,
serialisable_external_additional_service_keys_to_tags,
serialisable_checker_options,
serialisable_file_import_options,
serialisable_tag_import_options,
self._last_check_time,
self._files_paused,
self._checking_paused,
self._checking_status,
self._subject,
self._no_work_until,
self._no_work_until_reason,
self._creation_time
)
2018-05-23 21:05:06 +00:00
def _HasURL( self ):
return self._url != ''
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2020-09-16 20:46:54 +00:00
(
self._url,
serialisable_gallery_seed_log,
serialisable_file_seed_cache,
serialisable_external_filterable_tags,
serialisable_external_additional_service_keys_to_tags,
serialisable_checker_options,
serialisable_file_import_options,
serialisable_tag_import_options,
self._last_check_time,
self._files_paused,
self._checking_paused,
self._checking_status,
self._subject,
self._no_work_until,
self._no_work_until_reason,
self._creation_time
) = serialisable_info
self._external_filterable_tags = set( serialisable_external_filterable_tags )
self._external_additional_service_keys_to_tags = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_external_additional_service_keys_to_tags )
2018-05-23 21:05:06 +00:00
2018-07-04 20:48:28 +00:00
self._gallery_seed_log = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_seed_log )
2018-06-27 19:27:05 +00:00
self._file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache )
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options )
2018-07-11 20:23:51 +00:00
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options )
self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options )
2018-05-23 21:05:06 +00:00
2021-09-22 21:12:34 +00:00
def _SerialisableChangeMade( self ):
self._last_serialisable_change_timestamp = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
def _UpdateFileVelocityStatus( self ):
2018-06-27 19:27:05 +00:00
self._file_velocity_status = self._checker_options.GetPrettyCurrentVelocity( self._file_seed_cache, self._last_check_time )
2018-05-23 21:05:06 +00:00
def _UpdateNextCheckTime( self ):
if self._check_now:
self._next_check_time = self._last_check_time + self.MIN_CHECK_PERIOD
else:
if not HydrusData.TimeHasPassed( self._no_work_until ):
self._next_check_time = self._no_work_until + 1
else:
2018-08-08 20:29:54 +00:00
if self._checking_status == ClientImporting.CHECKER_STATUS_OK:
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if self._checker_options.IsDead( self._file_seed_cache, self._last_check_time ):
2018-05-23 21:05:06 +00:00
self._checking_status = ClientImporting.CHECKER_STATUS_DEAD
2018-08-08 20:29:54 +00:00
if self._checking_status != ClientImporting.CHECKER_STATUS_OK:
self._checking_paused = True
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
last_next_check_time = self._next_check_time
self._next_check_time = self._checker_options.GetNextCheckTime( self._file_seed_cache, self._last_check_time, last_next_check_time )
2018-05-23 21:05:06 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_import_options, serialisable_tag_import_options, times_to_check, check_period, last_check_time, paused ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check = 8, never_faster_than = 300, never_slower_than = 86400, death_file_velocity = ( 1, 86400 ) )
serialisable_checker_options = checker_options.GetSerialisableTuple()
files_paused = paused
checking_paused = paused
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused )
2018-05-23 21:05:06 +00:00
return ( 2, new_serialisable_info )
if version == 2:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
checking_status = ClientImporting.CHECKER_STATUS_OK
subject = 'unknown subject'
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject )
2018-05-23 21:05:06 +00:00
return ( 3, new_serialisable_info )
if version == 3:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
no_work_until = 0
no_work_until_reason = ''
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason )
2018-05-23 21:05:06 +00:00
return ( 4, new_serialisable_info )
2018-06-20 20:20:22 +00:00
if version == 4:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason ) = old_serialisable_info
2018-06-20 20:20:22 +00:00
creation_time = HydrusData.GetNow()
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time )
2018-06-20 20:20:22 +00:00
return ( 5, new_serialisable_info )
2018-07-04 20:48:28 +00:00
if version == 5:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info
2018-07-04 20:48:28 +00:00
gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple()
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time )
2018-07-04 20:48:28 +00:00
return ( 6, new_serialisable_info )
2019-02-27 23:03:30 +00:00
if version == 6:
( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info
2020-09-16 20:46:54 +00:00
external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags()
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
serialisable_external_additional_service_keys_to_tags = external_additional_service_keys_to_tags.GetSerialisableTuple()
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time )
2019-02-27 23:03:30 +00:00
return ( 7, new_serialisable_info )
2020-09-16 20:46:54 +00:00
if version == 7:
( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info
filterable_tags = set()
serialisable_external_filterable_tags = list( filterable_tags )
new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, serialisable_external_filterable_tags, serialisable_external_additional_service_keys_to_tags, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time )
return ( 8, new_serialisable_info )
2018-05-23 21:05:06 +00:00
def _WorkOnFiles( self ):
2018-06-27 19:27:05 +00:00
file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN )
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed is None:
2018-05-23 21:05:06 +00:00
return
2018-07-11 20:23:51 +00:00
def status_hook( text ):
2018-05-23 21:05:06 +00:00
with self._lock:
2022-06-22 20:43:12 +00:00
self._files_status = ClientImportControl.NeatenStatusText( text )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
2018-08-15 20:40:30 +00:00
did_substantial_work = file_seed.WorkOnURL( self._file_seed_cache, status_hook, self._NetworkJobFactory, self._FileNetworkJobPresentationContextFactory, self._file_import_options, self._tag_import_options )
2018-07-11 20:23:51 +00:00
with self._lock:
2018-05-23 21:05:06 +00:00
2021-11-24 21:59:58 +00:00
should_present = self._publish_to_page and file_seed.ShouldPresent( self._file_import_options.GetPresentationImportOptions() )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
page_key = self._page_key
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
if should_present:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
file_seed.PresentToPage( page_key )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
did_substantial_work = True
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
self._files_status = ''
2018-05-23 21:05:06 +00:00
if did_substantial_work:
time.sleep( ClientImporting.DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME )
2019-08-21 21:34:01 +00:00
def GetAPIInfoDict( self, simple ):
with self._lock:
d = {}
d[ 'url' ] = self._url
d[ 'watcher_key' ] = self._watcher_key.hex()
d[ 'created' ] = self._creation_time
d[ 'last_check_time' ] = self._last_check_time
d[ 'next_check_time' ] = self._next_check_time
d[ 'files_paused' ] = self._files_paused
d[ 'checking_paused' ] = self._checking_paused
d[ 'checking_status' ] = self._checking_status
d[ 'subject' ] = self._subject
d[ 'imports' ] = self._file_seed_cache.GetAPIInfoDict( simple )
d[ 'gallery_log' ] = self._gallery_seed_log.GetAPIInfoDict( simple )
return d
2018-10-03 21:00:15 +00:00
def CanRetryFailed( self ):
with self._lock:
return self._file_seed_cache.GetFileSeedCount( CC.STATUS_ERROR ) > 0
2020-01-02 03:05:35 +00:00
def CanRetryIgnored( self ):
with self._lock:
return self._file_seed_cache.GetFileSeedCount( CC.STATUS_VETOED ) > 0
2018-08-01 20:44:57 +00:00
def CheckingPaused( self ):
with self._lock:
return self._checking_paused
2018-05-23 21:05:06 +00:00
def CheckNow( self ):
with self._lock:
self._check_now = True
self._checking_paused = False
self._no_work_until = 0
self._no_work_until_reason = ''
self._checking_status = ClientImporting.CHECKER_STATUS_OK
self._UpdateNextCheckTime()
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
def CurrentlyAlive( self ):
with self._lock:
return self._checking_status == ClientImporting.CHECKER_STATUS_OK
def CurrentlyWorking( self ):
with self._lock:
2018-06-27 19:27:05 +00:00
finished = not self._file_seed_cache.WorkToDo()
2018-05-23 21:05:06 +00:00
return not finished and not self._files_paused
2018-08-01 20:44:57 +00:00
def FilesPaused( self ):
with self._lock:
return self._files_paused
2018-05-23 21:05:06 +00:00
def GetCheckerOptions( self ):
with self._lock:
return self._checker_options
2021-11-10 21:53:57 +00:00
def GetCheckingStatus( self ):
with self._lock:
return self._checking_status
2018-06-20 20:20:22 +00:00
def GetCreationTime( self ):
with self._lock:
return self._creation_time
2018-08-01 20:44:57 +00:00
def GetFileImportOptions( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-08-01 20:44:57 +00:00
return self._file_import_options
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def GetFileSeedCache( self ):
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
with self._lock:
return self._file_seed_cache
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def GetGallerySeedLog( self ):
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
with self._lock:
return self._gallery_seed_log
2018-05-23 21:05:06 +00:00
2018-11-21 22:22:36 +00:00
def GetHashes( self ):
with self._lock:
fsc = self._file_seed_cache
2018-11-21 22:22:36 +00:00
return fsc.GetHashes()
2018-11-21 22:22:36 +00:00
2018-08-01 20:44:57 +00:00
def GetNetworkJobs( self ):
with self._lock:
return ( self._file_network_job, self._checker_network_job )
2021-04-28 21:43:16 +00:00
def GetNextCheckTime( self ):
with self._lock:
return self._next_check_time
2019-10-16 20:47:55 +00:00
def GetNumSeeds( self ):
with self._lock:
return len( self._file_seed_cache ) + len( self._gallery_seed_log )
2018-08-01 20:44:57 +00:00
def GetOptions( self ):
with self._lock:
return ( self._url, self._file_import_options, self._tag_import_options )
2018-07-04 20:48:28 +00:00
2018-08-01 20:44:57 +00:00
2021-11-24 21:59:58 +00:00
def GetPresentedHashes( self, presentation_import_options = None ):
2018-08-01 20:44:57 +00:00
with self._lock:
fsc = self._file_seed_cache
2021-11-24 21:59:58 +00:00
if presentation_import_options is None:
presentation_import_options = self._file_import_options.GetPresentationImportOptions()
2018-08-01 20:44:57 +00:00
2018-07-04 20:48:28 +00:00
2021-11-24 21:59:58 +00:00
return fsc.GetPresentedHashes( presentation_import_options )
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
def GetSimpleStatus( self ):
with self._lock:
2021-02-11 01:59:52 +00:00
files_work_to_do = self._file_seed_cache.WorkToDo()
2022-06-22 20:43:12 +00:00
checker_go = HydrusData.TimeHasPassed( self._next_check_time ) and not self._checking_paused
2021-02-11 01:59:52 +00:00
files_go = files_work_to_do and not self._files_paused
2022-06-22 20:43:12 +00:00
if not HydrusData.TimeHasPassed( self._no_work_until ):
2018-10-17 21:00:09 +00:00
2021-03-10 23:10:11 +00:00
if self._next_check_time is None:
text = '{} - working again {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( self._no_work_until ) )
else:
text = '{} - next check {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( max( self._no_work_until, self._next_check_time ) ) )
2021-02-11 01:59:52 +00:00
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DEFERRED, text )
2018-10-17 21:00:09 +00:00
2022-06-22 20:43:12 +00:00
elif checker_go or files_go:
if self._checker_working_lock.locked() or self._files_working_lock.locked():
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_WORKING, 'working' )
else:
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_PENDING, 'pending' )
elif self._checking_status == ClientImporting.CHECKER_STATUS_404:
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DONE, '404' )
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DONE, 'DEAD' )
2018-05-23 21:05:06 +00:00
else:
2021-02-11 01:59:52 +00:00
if self._checking_paused:
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_PAUSED, '' )
else:
2021-03-10 23:10:11 +00:00
if self._next_check_time is None or HydrusData.TimeHasPassed( self._next_check_time ):
2021-02-11 01:59:52 +00:00
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_PENDING, 'pending' )
else:
2021-02-17 18:22:44 +00:00
return ( ClientImporting.DOWNLOADER_SIMPLE_STATUS_DEFERRED, ClientData.TimestampToPrettyTimeDelta( self._next_check_time, no_prefix = True ) )
2021-02-11 01:59:52 +00:00
2018-05-23 21:05:06 +00:00
def GetStatus( self ):
with self._lock:
2022-06-22 20:43:12 +00:00
if not HydrusData.TimeHasPassed( self._no_work_until ):
2018-05-30 20:13:21 +00:00
2021-03-10 23:10:11 +00:00
if self._next_check_time is None:
no_work_text = '{} - working again {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( self._no_work_until ) )
else:
no_work_text = '{} - next check {}'.format( self._no_work_until_reason, ClientData.TimestampToPrettyTimeDelta( max( self._no_work_until, self._next_check_time ) ) )
2018-10-17 21:00:09 +00:00
2022-06-22 20:43:12 +00:00
files_status = no_work_text
2018-10-17 21:00:09 +00:00
watcher_status = no_work_text
2018-05-30 20:13:21 +00:00
2018-05-23 21:05:06 +00:00
else:
2022-06-22 20:43:12 +00:00
files_work_to_do = self._file_seed_cache.WorkToDo()
checker_go = HydrusData.TimeHasPassed( self._next_check_time ) and not self._checking_paused
files_go = files_work_to_do and not self._files_paused
if checker_go and not self._checker_working_lock.locked():
self._watcher_status = 'waiting for a work slot'
if files_go and not self._files_working_lock.locked():
self._files_status = 'waiting for a work slot'
files_status = ClientImportControl.GenerateLiveStatusText( self._files_status, self._files_paused, self._no_work_until, self._no_work_until_reason )
watcher_status = ClientImportControl.GenerateLiveStatusText( self._watcher_status, self._checking_paused, self._no_work_until, self._no_work_until_reason )
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
return ( files_status, self._files_paused, self._file_velocity_status, self._next_check_time, watcher_status, self._subject, self._checking_status, self._check_now, self._checking_paused )
2018-05-23 21:05:06 +00:00
def GetSubject( self ):
with self._lock:
if self._subject in ( None, '' ):
return 'unknown subject'
else:
return self._subject
2018-08-01 20:44:57 +00:00
def GetTagImportOptions( self ):
with self._lock:
return self._tag_import_options
2018-05-23 21:05:06 +00:00
def GetWatcherKey( self ):
with self._lock:
return self._watcher_key
def GetURL( self ):
with self._lock:
return self._url
def GetValueRange( self ):
with self._lock:
2018-06-27 19:27:05 +00:00
return self._file_seed_cache.GetValueRange()
2018-05-23 21:05:06 +00:00
2021-09-22 21:12:34 +00:00
def HasSerialisableChangesSince( self, since_timestamp ):
return self._last_serialisable_change_timestamp > since_timestamp
2018-05-23 21:05:06 +00:00
def HasURL( self ):
with self._lock:
return self._HasURL()
2018-05-30 20:13:21 +00:00
def _IsDead( self ):
return self._checking_status in ( ClientImporting.CHECKER_STATUS_404, ClientImporting.CHECKER_STATUS_DEAD )
2018-05-23 21:05:06 +00:00
def IsDead( self ):
with self._lock:
2018-05-30 20:13:21 +00:00
return self._IsDead()
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
def NotifyFileSeedsUpdated( self, file_seed_cache_key, file_seeds ):
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed_cache_key == self._file_seed_cache.GetFileSeedCacheKey():
2018-05-23 21:05:06 +00:00
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def PausePlayChecking( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-05-30 20:13:21 +00:00
if self._checking_paused and self._IsDead():
2018-05-23 21:05:06 +00:00
return # watcher is dead, so don't unpause until a checknow event
else:
self._checking_paused = not self._checking_paused
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
def PausePlayFiles( self ):
with self._lock:
self._files_paused = not self._files_paused
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def PublishToPage( self, publish_to_page ):
2018-05-30 20:13:21 +00:00
with self._lock:
2018-08-01 20:44:57 +00:00
self._publish_to_page = publish_to_page
2018-05-30 20:13:21 +00:00
2018-08-01 20:44:57 +00:00
def Repage( self, page_key ):
2018-05-23 21:05:06 +00:00
with self._lock:
self._page_key = page_key
2018-10-03 21:00:15 +00:00
def RetryFailed( self ):
with self._lock:
2020-06-11 12:01:08 +00:00
self._file_seed_cache.RetryFailed()
2018-10-03 21:00:15 +00:00
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-10-03 21:00:15 +00:00
2021-07-14 20:42:19 +00:00
def RetryIgnored( self, ignored_regex = None ):
2020-01-02 03:05:35 +00:00
with self._lock:
2021-07-14 20:42:19 +00:00
self._file_seed_cache.RetryIgnored( ignored_regex = ignored_regex )
2020-01-02 03:05:35 +00:00
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2020-01-02 03:05:35 +00:00
2021-09-22 21:12:34 +00:00
def SetCheckerOptions( self, checker_options: ClientImportOptions.CheckerOptions ):
2018-06-20 20:20:22 +00:00
with self._lock:
2021-09-22 21:12:34 +00:00
if checker_options.DumpToString() != self._checker_options.DumpToString():
self._checker_options = checker_options
self._UpdateNextCheckTime()
self._UpdateFileVelocityStatus()
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
self._SerialisableChangeMade()
2018-06-20 20:20:22 +00:00
2021-09-22 21:12:34 +00:00
def SetFileImportOptions( self, file_import_options: FileImportOptions.FileImportOptions ):
2018-05-23 21:05:06 +00:00
with self._lock:
2021-09-22 21:12:34 +00:00
if file_import_options.DumpToString() != self._file_import_options.DumpToString():
self._file_import_options = file_import_options
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
2020-09-16 20:46:54 +00:00
def SetExternalAdditionalServiceKeysToTags( self, service_keys_to_tags ):
with self._lock:
2021-09-22 21:12:34 +00:00
external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags( service_keys_to_tags )
if external_additional_service_keys_to_tags.DumpToString() != self._external_additional_service_keys_to_tags.DumpToString():
self._external_additional_service_keys_to_tags = external_additional_service_keys_to_tags
self._SerialisableChangeMade()
2020-09-16 20:46:54 +00:00
def SetExternalFilterableTags( self, tags ):
2019-02-27 23:03:30 +00:00
with self._lock:
2021-09-22 21:12:34 +00:00
tags_set = set( tags )
if tags_set != self._external_filterable_tags:
self._external_filterable_tags = tags_set
self._SerialisableChangeMade()
2019-02-27 23:03:30 +00:00
2021-09-22 21:12:34 +00:00
def SetTagImportOptions( self, tag_import_options: TagImportOptions.TagImportOptions ):
2018-05-23 21:05:06 +00:00
with self._lock:
2021-09-22 21:12:34 +00:00
if tag_import_options.DumpToString() != self._tag_import_options.DumpToString():
self._tag_import_options = tag_import_options
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
def SetURL( self, url ):
if url is None:
url = ''
if url != '':
2020-04-01 21:51:42 +00:00
try:
url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
except HydrusExceptions.URLClassException:
url = ''
2018-05-23 21:05:06 +00:00
with self._lock:
self._url = url
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
def Start( self, page_key, publish_to_page ):
2021-07-07 20:48:57 +00:00
with self._lock:
if self._have_started:
return
self._page_key = page_key
self._publish_to_page = publish_to_page
self._UpdateNextCheckTime()
self._UpdateFileVelocityStatus()
self._files_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnFiles )
self._checker_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnChecker )
self._files_repeating_job.SetThreadSlotType( 'watcher_files' )
self._checker_repeating_job.SetThreadSlotType( 'watcher_check' )
self._have_started = True
2019-01-16 22:40:53 +00:00
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
def CheckCanDoFileWork( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2022-06-22 20:43:12 +00:00
try:
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
ClientImportControl.CheckImporterCanDoWorkBecauseStopped( self._page_key )
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
except HydrusExceptions.VetoException:
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
self._files_repeating_job.Cancel()
2022-06-22 20:43:12 +00:00
raise
2022-06-22 20:43:12 +00:00
ClientImportControl.CheckImporterCanDoFileWorkBecausePaused( self._files_paused, self._file_seed_cache, self._page_key )
2022-04-13 21:39:26 +00:00
try:
2022-06-22 20:43:12 +00:00
ClientImportControl.CheckImporterCanDoFileWorkBecausePausifyingProblem( self._file_import_options )
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
except HydrusExceptions.VetoException:
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
self._files_paused = True
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
raise
2022-06-22 20:43:12 +00:00
self.CheckCanDoNetworkWork()
2022-06-22 20:43:12 +00:00
def CheckCanDoNetworkWork( self ):
with self._lock:
2022-06-22 20:43:12 +00:00
ClientImportControl.CheckCanDoNetworkWork( self._no_work_until, self._no_work_until_reason )
2018-05-23 21:05:06 +00:00
return True
def REPEATINGWorkOnFiles( self ):
2022-06-22 20:43:12 +00:00
with self._files_working_lock:
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
while True:
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
try:
try:
self.CheckCanDoFileWork()
except HydrusExceptions.VetoException as e:
with self._lock:
self._files_status = str( e )
break
self._WorkOnFiles()
HG.client_controller.WaitUntilViewFree()
self._SerialisableChangeMade()
except Exception as e:
with self._lock:
self._files_status = 'stopping work: {}'.format( str( e ) )
HydrusData.ShowException( e )
return
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
def CheckCanDoCheckerWork( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2022-06-22 20:43:12 +00:00
try:
ClientImportControl.CheckImporterCanDoWorkBecauseStopped( self._page_key )
except HydrusExceptions.VetoException:
2018-05-23 21:05:06 +00:00
self._checker_repeating_job.Cancel()
2022-06-22 20:43:12 +00:00
raise
2018-05-23 21:05:06 +00:00
2021-02-17 18:22:44 +00:00
while self._gallery_seed_log.WorkToDo():
# some old unworked gallery url is hanging around, let's clear it
gallery_seed = self._gallery_seed_log.GetNextGallerySeed( CC.STATUS_UNKNOWN )
gallery_seed.SetStatus( CC.STATUS_VETOED, note = 'check never finished' )
self._gallery_seed_log.NotifyGallerySeedsUpdated( ( gallery_seed, ) )
2022-06-22 20:43:12 +00:00
if self._checking_paused:
raise HydrusExceptions.VetoException( 'paused' )
2018-10-31 21:41:14 +00:00
2022-06-22 20:43:12 +00:00
if HG.client_controller.new_options.GetBoolean( 'pause_all_watcher_checkers' ):
2022-06-22 20:43:12 +00:00
raise HydrusExceptions.VetoException( 'all checkers are paused!' )
2022-06-22 20:43:12 +00:00
if not self._HasURL():
raise HydrusExceptions.VetoException( 'no url set yet!' )
2022-06-22 20:43:12 +00:00
if self._checking_status == ClientImporting.CHECKER_STATUS_404:
2022-06-22 20:43:12 +00:00
raise HydrusExceptions.VetoException( 'URL 404' )
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
raise HydrusExceptions.VetoException( 'URL DEAD' )
2018-05-23 21:05:06 +00:00
check_due = HydrusData.TimeHasPassed( self._next_check_time )
if not check_due:
2022-06-22 20:43:12 +00:00
raise HydrusExceptions.VetoException( '' )
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
return self.CheckCanDoNetworkWork()
def REPEATINGWorkOnChecker( self ):
2022-06-22 20:43:12 +00:00
with self._checker_working_lock:
2018-05-23 21:05:06 +00:00
try:
2022-06-22 20:43:12 +00:00
try:
self.CheckCanDoCheckerWork()
except HydrusExceptions.VetoException as e:
with self._lock:
self._watcher_status = str( e )
return
2018-05-23 21:05:06 +00:00
self._CheckWatchableURL()
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-05-23 21:05:06 +00:00
except Exception as e:
2022-06-22 20:43:12 +00:00
with self._lock:
self._watcher_status = 'stopping work: {}'.format( str( e ) )
2018-05-23 21:05:06 +00:00
HydrusData.ShowException( e )
2022-06-22 20:43:12 +00:00
return
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
2018-05-23 21:05:06 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT ] = WatcherImport