2020-05-20 21:36:02 +00:00
import threading
import time
2020-07-29 20:52:44 +00:00
from hydrus . core import HydrusData
from hydrus . core import HydrusExceptions
from hydrus . core import HydrusGlobals as HG
from hydrus . core import HydrusSerialisable
2020-04-22 21:00:35 +00:00
from hydrus . client import ClientConstants as CC
2020-06-24 21:25:24 +00:00
from hydrus . client import ClientData
2022-06-22 20:43:12 +00:00
from hydrus . client . importing import ClientImportControl
2020-04-22 21:00:35 +00:00
from hydrus . client . importing import ClientImporting
from hydrus . client . importing import ClientImportFileSeeds
from hydrus . client . importing import ClientImportGallerySeeds
2021-06-30 21:27:35 +00:00
from hydrus . client . importing . options import ClientImportOptions
from hydrus . client . importing . options import FileImportOptions
from hydrus . client . importing . options import TagImportOptions
2020-08-05 20:10:36 +00:00
from hydrus . client . metadata import ClientTags
2020-04-22 21:00:35 +00:00
from hydrus . client . networking import ClientNetworkingJobs
2018-05-23 21:05:06 +00:00
class MultipleWatcherImport ( HydrusSerialisable . SerialisableBase ) :
SERIALISABLE_TYPE = HydrusSerialisable . SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT
SERIALISABLE_NAME = ' Multiple Watcher '
2018-06-20 20:20:22 +00:00
SERIALISABLE_VERSION = 2
2018-08-22 21:10:59 +00:00
ADDED_TIMESTAMP_DURATION = 15
2018-05-23 21:05:06 +00:00
def __init__ ( self , url = None ) :
HydrusSerialisable . SerialisableBase . __init__ ( self )
self . _lock = threading . Lock ( )
2022-06-22 20:43:12 +00:00
self . _page_key = b ' initialising page key '
2018-05-23 21:05:06 +00:00
self . _watchers = HydrusSerialisable . SerialisableList ( )
2018-06-20 20:20:22 +00:00
self . _highlighted_watcher_url = None
self . _checker_options = HG . client_controller . new_options . GetDefaultWatcherCheckerOptions ( )
self . _file_import_options = HG . client_controller . new_options . GetDefaultFileImportOptions ( ' loud ' )
2021-06-30 21:27:35 +00:00
self . _tag_import_options = TagImportOptions . TagImportOptions ( is_default = True )
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
self . _watcher_keys_to_watchers = { }
2018-06-20 20:20:22 +00:00
self . _watcher_keys_to_added_timestamps = { }
self . _watcher_keys_to_already_in_timestamps = { }
2018-05-23 21:05:06 +00:00
self . _watchers_repeating_job = None
self . _status_dirty = True
2020-06-11 12:01:08 +00:00
self . _status_cache = ClientImportFileSeeds . FileSeedCacheStatus ( )
2018-05-23 21:05:06 +00:00
#
if url is not None :
watcher = WatcherImport ( )
watcher . SetURL ( url )
self . _AddWatcher ( watcher )
2021-07-07 20:48:57 +00:00
self . _have_started = False
2018-08-08 20:29:54 +00:00
self . _last_time_watchers_changed = HydrusData . GetNowPrecise ( )
2021-09-22 21:12:34 +00:00
self . _last_serialisable_change_timestamp = 0
2018-05-23 21:05:06 +00:00
self . _last_pubbed_value_range = ( 0 , 0 )
self . _next_pub_value_check_time = 0
def _AddWatcher ( self , watcher ) :
2018-08-01 20:44:57 +00:00
watcher . PublishToPage ( False )
watcher . Repage ( self . _page_key )
2018-05-23 21:05:06 +00:00
self . _watchers . append ( watcher )
2018-08-08 20:29:54 +00:00
self . _last_time_watchers_changed = HydrusData . GetNowPrecise ( )
2018-05-23 21:05:06 +00:00
watcher_key = watcher . GetWatcherKey ( )
self . _watcher_keys_to_watchers [ watcher_key ] = watcher
2018-06-20 20:20:22 +00:00
self . _watcher_keys_to_added_timestamps [ watcher_key ] = HydrusData . GetNow ( )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
def _CleanAddedTimestamps ( self ) :
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
keys = list ( self . _watcher_keys_to_added_timestamps . keys ( ) )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
for key in keys :
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
if HydrusData . TimeHasPassed ( self . _watcher_keys_to_added_timestamps [ key ] + self . ADDED_TIMESTAMP_DURATION ) :
del self . _watcher_keys_to_added_timestamps [ key ]
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
keys = list ( self . _watcher_keys_to_already_in_timestamps . keys ( ) )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
for key in keys :
if HydrusData . TimeHasPassed ( self . _watcher_keys_to_already_in_timestamps [ key ] + self . ADDED_TIMESTAMP_DURATION ) :
del self . _watcher_keys_to_already_in_timestamps [ key ]
2018-05-23 21:05:06 +00:00
def _GetSerialisableInfo ( self ) :
serialisable_watchers = self . _watchers . GetSerialisableTuple ( )
2018-06-20 20:20:22 +00:00
serialisable_checker_options = self . _checker_options . GetSerialisableTuple ( )
serialisable_file_import_options = self . _file_import_options . GetSerialisableTuple ( )
serialisable_tag_import_options = self . _tag_import_options . GetSerialisableTuple ( )
return ( serialisable_watchers , self . _highlighted_watcher_url , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options )
2018-05-23 21:05:06 +00:00
def _InitialiseFromSerialisableInfo ( self , serialisable_info ) :
2018-06-20 20:20:22 +00:00
( serialisable_watchers , self . _highlighted_watcher_url , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options ) = serialisable_info
2018-05-23 21:05:06 +00:00
self . _watchers = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_watchers )
self . _watcher_keys_to_watchers = { watcher . GetWatcherKey ( ) : watcher for watcher in self . _watchers }
2018-06-20 20:20:22 +00:00
self . _checker_options = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_checker_options )
self . _file_import_options = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_file_import_options )
self . _tag_import_options = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_tag_import_options )
def _RegenerateStatus ( self ) :
2018-07-04 20:48:28 +00:00
file_seed_caches = [ watcher . GetFileSeedCache ( ) for watcher in self . _watchers ]
2018-06-20 20:20:22 +00:00
2018-07-04 20:48:28 +00:00
self . _status_cache = ClientImportFileSeeds . GenerateFileSeedCachesStatus ( file_seed_caches )
2018-06-20 20:20:22 +00:00
self . _status_dirty = False
2018-05-23 21:05:06 +00:00
def _RemoveWatcher ( self , watcher_key ) :
if watcher_key not in self . _watcher_keys_to_watchers :
return
watcher = self . _watcher_keys_to_watchers [ watcher_key ]
2018-08-01 20:44:57 +00:00
watcher . PublishToPage ( False )
watcher . Repage ( ' dead page key ' )
2018-05-23 21:05:06 +00:00
self . _watchers . remove ( watcher )
2018-08-08 20:29:54 +00:00
self . _last_time_watchers_changed = HydrusData . GetNowPrecise ( )
2018-05-23 21:05:06 +00:00
del self . _watcher_keys_to_watchers [ watcher_key ]
2021-09-22 21:12:34 +00:00
def _SerialisableChangeMade ( self ) :
self . _last_serialisable_change_timestamp = HydrusData . GetNow ( )
2018-05-23 21:05:06 +00:00
def _SetDirty ( self ) :
self . _status_dirty = True
2018-06-20 20:20:22 +00:00
def _UpdateSerialisableInfo ( self , version , old_serialisable_info ) :
if version == 1 :
serialisable_watchers = old_serialisable_info
try :
checker_options = HG . client_controller . new_options . GetDefaultWatcherCheckerOptions ( )
file_import_options = HG . client_controller . new_options . GetDefaultFileImportOptions ( ' loud ' )
2021-06-30 21:27:35 +00:00
tag_import_options = TagImportOptions . TagImportOptions ( is_default = True )
2018-06-20 20:20:22 +00:00
except :
checker_options = ClientImportOptions . CheckerOptions ( )
2021-06-30 21:27:35 +00:00
file_import_options = FileImportOptions . FileImportOptions ( )
tag_import_options = TagImportOptions . TagImportOptions ( )
2018-06-20 20:20:22 +00:00
serialisable_checker_options = checker_options . GetSerialisableTuple ( )
serialisable_file_import_options = file_import_options . GetSerialisableTuple ( )
serialisable_tag_import_options = tag_import_options . GetSerialisableTuple ( )
highlighted_watcher_key = None
serialisable_highlighted_watcher_key = highlighted_watcher_key
new_serialisable_info = ( serialisable_watchers , serialisable_highlighted_watcher_key , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options )
return ( 2 , new_serialisable_info )
2020-09-16 20:46:54 +00:00
def AddURL ( self , url , filterable_tags = None , additional_service_keys_to_tags = None ) :
2018-05-23 21:05:06 +00:00
if url == ' ' :
2018-08-08 20:29:54 +00:00
return None
2018-05-23 21:05:06 +00:00
2018-12-12 22:15:46 +00:00
url = HG . client_controller . network_engine . domain_manager . NormaliseURL ( url )
2018-05-23 21:05:06 +00:00
with self . _lock :
2018-06-20 20:20:22 +00:00
for watcher in self . _watchers :
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
if url == watcher . GetURL ( ) :
watcher_key = watcher . GetWatcherKey ( )
self . _watcher_keys_to_already_in_timestamps [ watcher_key ] = HydrusData . GetNow ( )
2018-08-08 20:29:54 +00:00
return None
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
watcher = WatcherImport ( )
watcher . SetURL ( url )
2020-09-16 20:46:54 +00:00
if filterable_tags is not None :
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
watcher . SetExternalFilterableTags ( filterable_tags )
if additional_service_keys_to_tags is not None :
watcher . SetExternalAdditionalServiceKeysToTags ( additional_service_keys_to_tags )
2019-02-27 23:03:30 +00:00
2018-06-20 20:20:22 +00:00
watcher . SetCheckerOptions ( self . _checker_options )
watcher . SetFileImportOptions ( self . _file_import_options )
watcher . SetTagImportOptions ( self . _tag_import_options )
2018-05-23 21:05:06 +00:00
publish_to_page = False
2021-08-11 21:14:12 +00:00
if self . _have_started :
watcher . Start ( self . _page_key , publish_to_page )
2018-05-23 21:05:06 +00:00
self . _AddWatcher ( watcher )
2018-08-08 20:29:54 +00:00
return watcher
2018-05-23 21:05:06 +00:00
def AddWatcher ( self , watcher ) :
with self . _lock :
self . _AddWatcher ( watcher )
self . _SetDirty ( )
2021-09-22 21:12:34 +00:00
def ClearHighlightedWatcher ( self ) :
with self . _lock :
if self . _highlighted_watcher_url is not None :
self . _highlighted_watcher_url = None
self . _SerialisableChangeMade ( )
2019-08-21 21:34:01 +00:00
def GetAPIInfoDict ( self , simple ) :
highlighted_watcher = self . GetHighlightedWatcher ( )
with self . _lock :
d = { }
d [ ' watcher_imports ' ] = [ watcher_import . GetAPIInfoDict ( simple ) for watcher_import in self . _watchers ]
if highlighted_watcher is None :
d [ ' highlight ' ] = None
else :
d [ ' highlight ' ] = highlighted_watcher . GetWatcherKey ( ) . hex ( )
return d
2022-01-19 21:28:59 +00:00
def GetFileImportOptions ( self ) - > FileImportOptions . FileImportOptions :
with self . _lock :
return self . _file_import_options
2018-06-20 20:20:22 +00:00
def GetHighlightedWatcher ( self ) :
with self . _lock :
if self . _highlighted_watcher_url is not None :
for watcher in self . _watchers :
if watcher . GetURL ( ) == self . _highlighted_watcher_url :
return watcher
2018-08-01 20:44:57 +00:00
self . _highlighted_watcher_url = None
2018-06-20 20:20:22 +00:00
return None
2018-08-08 20:29:54 +00:00
def GetLastTimeWatchersChanged ( self ) :
with self . _lock :
return self . _last_time_watchers_changed
2018-05-30 20:13:21 +00:00
def GetNumDead ( self ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2018-05-30 20:13:21 +00:00
return len ( [ watcher for watcher in self . _watchers if watcher . IsDead ( ) ] )
2018-05-23 21:05:06 +00:00
2019-10-16 20:47:55 +00:00
def GetNumSeeds ( self ) :
with self . _lock :
return sum ( ( watcher . GetNumSeeds ( ) for watcher in self . _watchers ) )
2018-08-08 20:29:54 +00:00
def GetNumWatchers ( self ) :
with self . _lock :
return len ( self . _watchers )
2018-06-20 20:20:22 +00:00
def GetOptions ( self ) :
with self . _lock :
return ( self . _checker_options , self . _file_import_options , self . _tag_import_options )
2020-06-11 12:01:08 +00:00
def GetTotalStatus ( self ) - > ClientImportFileSeeds . FileSeedCacheStatus :
2018-05-23 21:05:06 +00:00
with self . _lock :
if self . _status_dirty :
self . _RegenerateStatus ( )
return self . _status_cache
def GetValueRange ( self ) :
with self . _lock :
total_value = 0
total_range = 0
for watcher in self . _watchers :
( value , range ) = watcher . GetValueRange ( )
if value != range :
total_value + = value
total_range + = range
return ( total_value , total_range )
def GetWatchers ( self ) :
with self . _lock :
return list ( self . _watchers )
2018-06-20 20:20:22 +00:00
def GetWatcherSimpleStatus ( self , watcher ) :
with self . _lock :
watcher_key = watcher . GetWatcherKey ( )
if watcher_key in self . _watcher_keys_to_added_timestamps :
added_timestamp = self . _watcher_keys_to_added_timestamps [ watcher_key ]
if HydrusData . TimeHasPassed ( added_timestamp + self . ADDED_TIMESTAMP_DURATION ) :
self . _CleanAddedTimestamps ( )
else :
2021-02-11 01:59:52 +00:00
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_WORKING , ' just added ' )
2018-06-20 20:20:22 +00:00
if watcher_key in self . _watcher_keys_to_already_in_timestamps :
already_in_timestamp = self . _watcher_keys_to_already_in_timestamps [ watcher_key ]
if HydrusData . TimeHasPassed ( already_in_timestamp + self . ADDED_TIMESTAMP_DURATION ) :
self . _CleanAddedTimestamps ( )
else :
2021-02-11 01:59:52 +00:00
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_WORKING , ' already watching ' )
2018-06-20 20:20:22 +00:00
return watcher . GetSimpleStatus ( )
2021-09-22 21:12:34 +00:00
def HasSerialisableChangesSince ( self , since_timestamp ) :
with self . _lock :
if self . _last_serialisable_change_timestamp > since_timestamp :
return True
for watcher in self . _watchers :
if watcher . HasSerialisableChangesSince ( since_timestamp ) :
return True
return False
2018-05-23 21:05:06 +00:00
def RemoveWatcher ( self , watcher_key ) :
with self . _lock :
self . _RemoveWatcher ( watcher_key )
self . _SetDirty ( )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
def SetHighlightedWatcher ( self , highlighted_watcher ) :
with self . _lock :
2021-09-22 21:12:34 +00:00
highlighted_watcher_url = highlighted_watcher . GetURL ( )
if highlighted_watcher_url != self . _highlighted_watcher_url :
2018-06-20 20:20:22 +00:00
2021-09-22 21:12:34 +00:00
self . _highlighted_watcher_url = highlighted_watcher_url
2018-06-20 20:20:22 +00:00
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-06-20 20:20:22 +00:00
def SetOptions ( self , checker_options , file_import_options , tag_import_options ) :
with self . _lock :
2021-09-22 21:12:34 +00:00
if checker_options . DumpToString ( ) != self . _checker_options . DumpToString ( ) :
self . _checker_options = checker_options
self . _SerialisableChangeMade ( )
if file_import_options . DumpToString ( ) != self . _file_import_options . DumpToString ( ) :
self . _file_import_options = file_import_options
self . _SerialisableChangeMade ( )
if tag_import_options . DumpToString ( ) != self . _tag_import_options . DumpToString ( ) :
self . _tag_import_options = tag_import_options
self . _SerialisableChangeMade ( )
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
def Start ( self , page_key ) :
with self . _lock :
2021-07-07 20:48:57 +00:00
if self . _have_started :
return
2018-05-23 21:05:06 +00:00
2021-07-07 20:48:57 +00:00
self . _page_key = page_key
2018-05-23 21:05:06 +00:00
2021-07-07 20:48:57 +00:00
# set a 2s period so the page value/range is breddy snappy
self . _watchers_repeating_job = HG . client_controller . CallRepeating ( ClientImporting . GetRepeatingJobInitialDelay ( ) , 2.0 , self . REPEATINGWorkOnWatchers )
2018-08-01 20:44:57 +00:00
2021-07-07 20:48:57 +00:00
for watcher in self . _watchers :
publish_to_page = False
if self . _highlighted_watcher_url is not None and watcher . GetURL ( ) == self . _highlighted_watcher_url :
publish_to_page = True
2018-08-01 20:44:57 +00:00
2021-07-07 20:48:57 +00:00
watcher . Start ( page_key , publish_to_page )
2018-08-01 20:44:57 +00:00
2021-07-07 20:48:57 +00:00
self . _have_started = True
2018-05-23 21:05:06 +00:00
def REPEATINGWorkOnWatchers ( self ) :
with self . _lock :
2022-06-22 20:43:12 +00:00
if ClientImportControl . PageImporterShouldStopWorking ( self . _page_key ) :
2018-05-23 21:05:06 +00:00
self . _watchers_repeating_job . Cancel ( )
return
if not self . _status_dirty : # if we think we are clean
for watcher in self . _watchers :
2018-06-27 19:27:05 +00:00
file_seed_cache = watcher . GetFileSeedCache ( )
2018-05-23 21:05:06 +00:00
2020-06-11 12:01:08 +00:00
if file_seed_cache . GetStatus ( ) . GetGenerationTime ( ) > self . _status_cache . GetGenerationTime ( ) : # has there has been an update?
2018-05-23 21:05:06 +00:00
self . _SetDirty ( )
break
if HydrusData . TimeHasPassed ( self . _next_pub_value_check_time ) :
self . _next_pub_value_check_time = HydrusData . GetNow ( ) + 5
current_value_range = self . GetValueRange ( )
if current_value_range != self . _last_pubbed_value_range :
self . _last_pubbed_value_range = current_value_range
HG . client_controller . pub ( ' refresh_page_name ' , self . _page_key )
HydrusSerialisable . SERIALISABLE_TYPES_TO_OBJECT_TYPES [ HydrusSerialisable . SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT ] = MultipleWatcherImport
class WatcherImport ( HydrusSerialisable . SerialisableBase ) :
SERIALISABLE_TYPE = HydrusSerialisable . SERIALISABLE_TYPE_WATCHER_IMPORT
SERIALISABLE_NAME = ' Watcher '
2020-09-16 20:46:54 +00:00
SERIALISABLE_VERSION = 8
2018-05-23 21:05:06 +00:00
MIN_CHECK_PERIOD = 30
def __init__ ( self ) :
HydrusSerialisable . SerialisableBase . __init__ ( self )
2022-06-22 20:43:12 +00:00
self . _page_key = b ' initialising page key '
2018-05-23 21:05:06 +00:00
self . _publish_to_page = False
self . _url = ' '
2018-07-04 20:48:28 +00:00
self . _gallery_seed_log = ClientImportGallerySeeds . GallerySeedLog ( )
2018-06-27 19:27:05 +00:00
self . _file_seed_cache = ClientImportFileSeeds . FileSeedCache ( )
2018-07-04 20:48:28 +00:00
2020-09-16 20:46:54 +00:00
self . _external_filterable_tags = set ( )
self . _external_additional_service_keys_to_tags = ClientTags . ServiceKeysToTags ( )
2019-02-27 23:03:30 +00:00
2018-07-11 20:23:51 +00:00
self . _checker_options = HG . client_controller . new_options . GetDefaultWatcherCheckerOptions ( )
self . _file_import_options = HG . client_controller . new_options . GetDefaultFileImportOptions ( ' loud ' )
2021-06-30 21:27:35 +00:00
self . _tag_import_options = TagImportOptions . TagImportOptions ( is_default = True )
2018-05-23 21:05:06 +00:00
self . _last_check_time = 0
self . _checking_status = ClientImporting . CHECKER_STATUS_OK
self . _subject = ' unknown subject '
self . _next_check_time = None
2018-08-01 20:44:57 +00:00
self . _file_network_job = None
self . _checker_network_job = None
2018-05-23 21:05:06 +00:00
self . _check_now = False
self . _files_paused = False
self . _checking_paused = False
self . _no_work_until = 0
self . _no_work_until_reason = ' '
2018-06-20 20:20:22 +00:00
self . _creation_time = HydrusData . GetNow ( )
2018-05-23 21:05:06 +00:00
self . _file_velocity_status = ' '
2022-06-22 20:43:12 +00:00
self . _files_status = ' '
2018-05-23 21:05:06 +00:00
self . _watcher_status = ' '
self . _watcher_key = HydrusData . GenerateKey ( )
2021-07-07 20:48:57 +00:00
self . _have_started = False
2018-05-23 21:05:06 +00:00
self . _lock = threading . Lock ( )
2022-06-22 20:43:12 +00:00
self . _files_working_lock = threading . Lock ( )
self . _checker_working_lock = threading . Lock ( )
2018-05-23 21:05:06 +00:00
self . _last_pubbed_page_name = ' '
self . _files_repeating_job = None
self . _checker_repeating_job = None
2021-09-22 21:12:34 +00:00
self . _last_serialisable_change_timestamp = 0
2018-06-27 19:27:05 +00:00
HG . client_controller . sub ( self , ' NotifyFileSeedsUpdated ' , ' file_seed_cache_file_seeds_updated ' )
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def _CheckerNetworkJobPresentationContextFactory ( self , network_job ) :
def enter_call ( ) :
with self . _lock :
self . _checker_network_job = network_job
def exit_call ( ) :
with self . _lock :
self . _checker_network_job = None
return ClientImporting . NetworkJobPresentationContext ( enter_call , exit_call )
2018-05-23 21:05:06 +00:00
def _CheckWatchableURL ( self ) :
2018-08-22 21:10:59 +00:00
def file_seeds_callable ( file_seeds ) :
return ClientImporting . UpdateFileSeedCacheWithFileSeeds ( self . _file_seed_cache , file_seeds )
2018-07-11 20:23:51 +00:00
def status_hook ( text ) :
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self . _lock :
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
self . _watcher_status = ClientImportControl . NeatenStatusText ( text )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
def title_hook ( text ) :
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self . _lock :
2019-12-05 05:29:32 +00:00
if len ( text ) > 0 :
text = text . splitlines ( ) [ 0 ]
2019-11-20 23:10:46 +00:00
2018-07-11 20:23:51 +00:00
self . _subject = text
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
gallery_seed = ClientImportGallerySeeds . GallerySeed ( self . _url , can_generate_more_pages = False )
2018-05-23 21:05:06 +00:00
2020-09-16 20:46:54 +00:00
gallery_seed . SetExternalFilterableTags ( self . _external_filterable_tags )
gallery_seed . SetExternalAdditionalServiceKeysToTags ( self . _external_additional_service_keys_to_tags )
2019-02-27 23:03:30 +00:00
2018-07-11 20:23:51 +00:00
self . _gallery_seed_log . AddGallerySeeds ( ( gallery_seed , ) )
2018-05-23 21:05:06 +00:00
2018-08-22 21:10:59 +00:00
with self . _lock :
self . _watcher_status = ' checking '
2018-05-23 21:05:06 +00:00
try :
2018-09-05 20:52:32 +00:00
( num_urls_added , num_urls_already_in_file_seed_cache , num_urls_total , result_404 , added_new_gallery_pages , stop_reason ) = gallery_seed . WorkOnURL ( ' watcher ' , self . _gallery_seed_log , file_seeds_callable , status_hook , title_hook , self . _NetworkJobFactory , self . _CheckerNetworkJobPresentationContextFactory , self . _file_import_options )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
if num_urls_added > 0 :
2018-05-23 21:05:06 +00:00
ClientImporting . WakeRepeatingJob ( self . _files_repeating_job )
2018-07-11 20:23:51 +00:00
if result_404 :
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self . _lock :
2018-08-08 20:29:54 +00:00
self . _checking_paused = True
2018-07-11 20:23:51 +00:00
self . _checking_status = ClientImporting . CHECKER_STATUS_404
2018-05-23 21:05:06 +00:00
2018-07-18 21:07:15 +00:00
if gallery_seed . status == CC . STATUS_ERROR :
# the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error
with self . _lock :
self . _checking_paused = True
self . _watcher_status = gallery_seed . note
time . sleep ( 5 )
2018-05-23 21:05:06 +00:00
except HydrusExceptions . NetworkException as e :
2018-11-28 22:31:04 +00:00
delay = HG . client_controller . new_options . GetInteger ( ' downloader_network_error_delay ' )
2019-01-09 22:59:03 +00:00
self . _DelayWork ( delay , str ( e ) )
2018-05-23 21:05:06 +00:00
2021-02-17 18:22:44 +00:00
gallery_seed . SetStatus ( CC . STATUS_ERROR , str ( e ) )
2018-05-23 21:05:06 +00:00
HydrusData . PrintException ( e )
2021-09-08 21:41:52 +00:00
finally :
self . _gallery_seed_log . NotifyGallerySeedsUpdated ( ( gallery_seed , ) )
2021-02-17 18:22:44 +00:00
2018-07-18 21:07:15 +00:00
with self . _lock :
if self . _check_now :
self . _check_now = False
self . _last_check_time = HydrusData . GetNow ( )
self . _UpdateFileVelocityStatus ( )
self . _UpdateNextCheckTime ( )
2018-08-22 21:10:59 +00:00
self . _Compact ( )
2021-03-03 22:23:35 +00:00
self . _watcher_status = ' '
2018-07-18 21:07:15 +00:00
2018-05-23 21:05:06 +00:00
2018-08-22 21:10:59 +00:00
def _Compact ( self ) :
death_period = self . _checker_options . GetDeathFileVelocityPeriod ( )
compact_before_this_time = self . _last_check_time - ( death_period * 2 )
self . _gallery_seed_log . Compact ( compact_before_this_time )
2018-05-23 21:05:06 +00:00
def _DelayWork ( self , time_delta , reason ) :
2022-06-22 20:43:12 +00:00
if len ( reason ) > 0 :
reason = reason . splitlines ( ) [ 0 ]
2018-05-23 21:05:06 +00:00
self . _no_work_until = HydrusData . GetNow ( ) + time_delta
self . _no_work_until_reason = reason
def _FileNetworkJobPresentationContextFactory ( self , network_job ) :
def enter_call ( ) :
with self . _lock :
2018-08-01 20:44:57 +00:00
self . _file_network_job = network_job
2018-05-23 21:05:06 +00:00
def exit_call ( ) :
with self . _lock :
2018-08-01 20:44:57 +00:00
self . _file_network_job = None
2018-05-23 21:05:06 +00:00
return ClientImporting . NetworkJobPresentationContext ( enter_call , exit_call )
2018-08-15 20:40:30 +00:00
def _NetworkJobFactory ( self , * args , * * kwargs ) :
network_job = ClientNetworkingJobs . NetworkJobWatcherPage ( self . _watcher_key , * args , * * kwargs )
return network_job
2018-05-23 21:05:06 +00:00
def _GetSerialisableInfo ( self ) :
2018-07-04 20:48:28 +00:00
serialisable_gallery_seed_log = self . _gallery_seed_log . GetSerialisableTuple ( )
2018-06-27 19:27:05 +00:00
serialisable_file_seed_cache = self . _file_seed_cache . GetSerialisableTuple ( )
2018-07-04 20:48:28 +00:00
2020-09-16 20:46:54 +00:00
serialisable_external_filterable_tags = list ( self . _external_filterable_tags )
serialisable_external_additional_service_keys_to_tags = self . _external_additional_service_keys_to_tags . GetSerialisableTuple ( )
2019-02-27 23:03:30 +00:00
2018-05-23 21:05:06 +00:00
serialisable_checker_options = self . _checker_options . GetSerialisableTuple ( )
2018-07-11 20:23:51 +00:00
serialisable_file_import_options = self . _file_import_options . GetSerialisableTuple ( )
serialisable_tag_import_options = self . _tag_import_options . GetSerialisableTuple ( )
2018-05-23 21:05:06 +00:00
2020-09-16 20:46:54 +00:00
return (
self . _url ,
serialisable_gallery_seed_log ,
serialisable_file_seed_cache ,
serialisable_external_filterable_tags ,
serialisable_external_additional_service_keys_to_tags ,
serialisable_checker_options ,
serialisable_file_import_options ,
serialisable_tag_import_options ,
self . _last_check_time ,
self . _files_paused ,
self . _checking_paused ,
self . _checking_status ,
self . _subject ,
self . _no_work_until ,
self . _no_work_until_reason ,
self . _creation_time
)
2018-05-23 21:05:06 +00:00
def _HasURL ( self ) :
return self . _url != ' '
def _InitialiseFromSerialisableInfo ( self , serialisable_info ) :
2020-09-16 20:46:54 +00:00
(
self . _url ,
serialisable_gallery_seed_log ,
serialisable_file_seed_cache ,
serialisable_external_filterable_tags ,
serialisable_external_additional_service_keys_to_tags ,
serialisable_checker_options ,
serialisable_file_import_options ,
serialisable_tag_import_options ,
self . _last_check_time ,
self . _files_paused ,
self . _checking_paused ,
self . _checking_status ,
self . _subject ,
self . _no_work_until ,
self . _no_work_until_reason ,
self . _creation_time
) = serialisable_info
self . _external_filterable_tags = set ( serialisable_external_filterable_tags )
self . _external_additional_service_keys_to_tags = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_external_additional_service_keys_to_tags )
2018-05-23 21:05:06 +00:00
2018-07-04 20:48:28 +00:00
self . _gallery_seed_log = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_gallery_seed_log )
2018-06-27 19:27:05 +00:00
self . _file_seed_cache = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_file_seed_cache )
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
self . _checker_options = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_checker_options )
2018-07-11 20:23:51 +00:00
self . _file_import_options = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_file_import_options )
self . _tag_import_options = HydrusSerialisable . CreateFromSerialisableTuple ( serialisable_tag_import_options )
2018-05-23 21:05:06 +00:00
2021-09-22 21:12:34 +00:00
def _SerialisableChangeMade ( self ) :
self . _last_serialisable_change_timestamp = HydrusData . GetNow ( )
2018-05-23 21:05:06 +00:00
def _UpdateFileVelocityStatus ( self ) :
2018-06-27 19:27:05 +00:00
self . _file_velocity_status = self . _checker_options . GetPrettyCurrentVelocity ( self . _file_seed_cache , self . _last_check_time )
2018-05-23 21:05:06 +00:00
def _UpdateNextCheckTime ( self ) :
if self . _check_now :
self . _next_check_time = self . _last_check_time + self . MIN_CHECK_PERIOD
else :
if not HydrusData . TimeHasPassed ( self . _no_work_until ) :
self . _next_check_time = self . _no_work_until + 1
else :
2018-08-08 20:29:54 +00:00
if self . _checking_status == ClientImporting . CHECKER_STATUS_OK :
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if self . _checker_options . IsDead ( self . _file_seed_cache , self . _last_check_time ) :
2018-05-23 21:05:06 +00:00
self . _checking_status = ClientImporting . CHECKER_STATUS_DEAD
2018-08-08 20:29:54 +00:00
if self . _checking_status != ClientImporting . CHECKER_STATUS_OK :
self . _checking_paused = True
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
last_next_check_time = self . _next_check_time
self . _next_check_time = self . _checker_options . GetNextCheckTime ( self . _file_seed_cache , self . _last_check_time , last_next_check_time )
2018-05-23 21:05:06 +00:00
def _UpdateSerialisableInfo ( self , version , old_serialisable_info ) :
if version == 1 :
2018-07-11 20:23:51 +00:00
( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_file_import_options , serialisable_tag_import_options , times_to_check , check_period , last_check_time , paused ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
checker_options = ClientImportOptions . CheckerOptions ( intended_files_per_check = 8 , never_faster_than = 300 , never_slower_than = 86400 , death_file_velocity = ( 1 , 86400 ) )
serialisable_checker_options = checker_options . GetSerialisableTuple ( )
files_paused = paused
checking_paused = paused
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused )
2018-05-23 21:05:06 +00:00
return ( 2 , new_serialisable_info )
if version == 2 :
2018-07-11 20:23:51 +00:00
( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
checking_status = ClientImporting . CHECKER_STATUS_OK
subject = ' unknown subject '
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject )
2018-05-23 21:05:06 +00:00
return ( 3 , new_serialisable_info )
if version == 3 :
2018-07-11 20:23:51 +00:00
( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
no_work_until = 0
no_work_until_reason = ' '
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason )
2018-05-23 21:05:06 +00:00
return ( 4 , new_serialisable_info )
2018-06-20 20:20:22 +00:00
if version == 4 :
2018-07-11 20:23:51 +00:00
( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason ) = old_serialisable_info
2018-06-20 20:20:22 +00:00
creation_time = HydrusData . GetNow ( )
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time )
2018-06-20 20:20:22 +00:00
return ( 5 , new_serialisable_info )
2018-07-04 20:48:28 +00:00
if version == 5 :
2018-07-11 20:23:51 +00:00
( url , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time ) = old_serialisable_info
2018-07-04 20:48:28 +00:00
gallery_seed_log = ClientImportGallerySeeds . GallerySeedLog ( )
serialisable_gallery_seed_log = gallery_seed_log . GetSerialisableTuple ( )
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url , serialisable_gallery_seed_log , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time )
2018-07-04 20:48:28 +00:00
return ( 6 , new_serialisable_info )
2019-02-27 23:03:30 +00:00
if version == 6 :
( url , serialisable_gallery_seed_log , serialisable_file_seed_cache , urls_to_filenames , urls_to_md5_base64 , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time ) = old_serialisable_info
2020-09-16 20:46:54 +00:00
external_additional_service_keys_to_tags = ClientTags . ServiceKeysToTags ( )
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
serialisable_external_additional_service_keys_to_tags = external_additional_service_keys_to_tags . GetSerialisableTuple ( )
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
new_serialisable_info = ( url , serialisable_gallery_seed_log , serialisable_file_seed_cache , serialisable_external_additional_service_keys_to_tags , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time )
2019-02-27 23:03:30 +00:00
return ( 7 , new_serialisable_info )
2020-09-16 20:46:54 +00:00
if version == 7 :
( url , serialisable_gallery_seed_log , serialisable_file_seed_cache , serialisable_external_additional_service_keys_to_tags , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time ) = old_serialisable_info
filterable_tags = set ( )
serialisable_external_filterable_tags = list ( filterable_tags )
new_serialisable_info = ( url , serialisable_gallery_seed_log , serialisable_file_seed_cache , serialisable_external_filterable_tags , serialisable_external_additional_service_keys_to_tags , serialisable_checker_options , serialisable_file_import_options , serialisable_tag_import_options , last_check_time , files_paused , checking_paused , checking_status , subject , no_work_until , no_work_until_reason , creation_time )
return ( 8 , new_serialisable_info )
2018-05-23 21:05:06 +00:00
def _WorkOnFiles ( self ) :
2018-06-27 19:27:05 +00:00
file_seed = self . _file_seed_cache . GetNextFileSeed ( CC . STATUS_UNKNOWN )
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed is None :
2018-05-23 21:05:06 +00:00
return
2018-07-11 20:23:51 +00:00
def status_hook ( text ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2022-06-22 20:43:12 +00:00
self . _files_status = ClientImportControl . NeatenStatusText ( text )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
2018-08-15 20:40:30 +00:00
did_substantial_work = file_seed . WorkOnURL ( self . _file_seed_cache , status_hook , self . _NetworkJobFactory , self . _FileNetworkJobPresentationContextFactory , self . _file_import_options , self . _tag_import_options )
2018-07-11 20:23:51 +00:00
with self . _lock :
2018-05-23 21:05:06 +00:00
2021-11-24 21:59:58 +00:00
should_present = self . _publish_to_page and file_seed . ShouldPresent ( self . _file_import_options . GetPresentationImportOptions ( ) )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
page_key = self . _page_key
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
if should_present :
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
file_seed . PresentToPage ( page_key )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
did_substantial_work = True
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self . _lock :
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
self . _files_status = ' '
2018-05-23 21:05:06 +00:00
if did_substantial_work :
time . sleep ( ClientImporting . DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME )
2019-08-21 21:34:01 +00:00
def GetAPIInfoDict ( self , simple ) :
with self . _lock :
d = { }
d [ ' url ' ] = self . _url
d [ ' watcher_key ' ] = self . _watcher_key . hex ( )
d [ ' created ' ] = self . _creation_time
d [ ' last_check_time ' ] = self . _last_check_time
d [ ' next_check_time ' ] = self . _next_check_time
d [ ' files_paused ' ] = self . _files_paused
d [ ' checking_paused ' ] = self . _checking_paused
d [ ' checking_status ' ] = self . _checking_status
d [ ' subject ' ] = self . _subject
d [ ' imports ' ] = self . _file_seed_cache . GetAPIInfoDict ( simple )
d [ ' gallery_log ' ] = self . _gallery_seed_log . GetAPIInfoDict ( simple )
return d
2018-10-03 21:00:15 +00:00
def CanRetryFailed ( self ) :
with self . _lock :
return self . _file_seed_cache . GetFileSeedCount ( CC . STATUS_ERROR ) > 0
2020-01-02 03:05:35 +00:00
def CanRetryIgnored ( self ) :
with self . _lock :
return self . _file_seed_cache . GetFileSeedCount ( CC . STATUS_VETOED ) > 0
2018-08-01 20:44:57 +00:00
def CheckingPaused ( self ) :
with self . _lock :
return self . _checking_paused
2018-05-23 21:05:06 +00:00
def CheckNow ( self ) :
with self . _lock :
self . _check_now = True
self . _checking_paused = False
self . _no_work_until = 0
self . _no_work_until_reason = ' '
self . _checking_status = ClientImporting . CHECKER_STATUS_OK
self . _UpdateNextCheckTime ( )
ClientImporting . WakeRepeatingJob ( self . _checker_repeating_job )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
def CurrentlyAlive ( self ) :
with self . _lock :
return self . _checking_status == ClientImporting . CHECKER_STATUS_OK
def CurrentlyWorking ( self ) :
with self . _lock :
2018-06-27 19:27:05 +00:00
finished = not self . _file_seed_cache . WorkToDo ( )
2018-05-23 21:05:06 +00:00
return not finished and not self . _files_paused
2018-08-01 20:44:57 +00:00
def FilesPaused ( self ) :
with self . _lock :
return self . _files_paused
2018-05-23 21:05:06 +00:00
def GetCheckerOptions ( self ) :
with self . _lock :
return self . _checker_options
2021-11-10 21:53:57 +00:00
def GetCheckingStatus ( self ) :
with self . _lock :
return self . _checking_status
2018-06-20 20:20:22 +00:00
def GetCreationTime ( self ) :
with self . _lock :
return self . _creation_time
2018-08-01 20:44:57 +00:00
def GetFileImportOptions ( self ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2018-08-01 20:44:57 +00:00
return self . _file_import_options
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def GetFileSeedCache ( self ) :
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
with self . _lock :
return self . _file_seed_cache
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def GetGallerySeedLog ( self ) :
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
with self . _lock :
return self . _gallery_seed_log
2018-05-23 21:05:06 +00:00
2018-11-21 22:22:36 +00:00
def GetHashes ( self ) :
with self . _lock :
2020-08-05 20:10:36 +00:00
fsc = self . _file_seed_cache
2018-11-21 22:22:36 +00:00
2020-08-05 20:10:36 +00:00
return fsc . GetHashes ( )
2018-11-21 22:22:36 +00:00
2018-08-01 20:44:57 +00:00
def GetNetworkJobs ( self ) :
with self . _lock :
return ( self . _file_network_job , self . _checker_network_job )
2021-04-28 21:43:16 +00:00
def GetNextCheckTime ( self ) :
with self . _lock :
return self . _next_check_time
2019-10-16 20:47:55 +00:00
def GetNumSeeds ( self ) :
with self . _lock :
return len ( self . _file_seed_cache ) + len ( self . _gallery_seed_log )
2018-08-01 20:44:57 +00:00
def GetOptions ( self ) :
with self . _lock :
return ( self . _url , self . _file_import_options , self . _tag_import_options )
2018-07-04 20:48:28 +00:00
2018-08-01 20:44:57 +00:00
2021-11-24 21:59:58 +00:00
def GetPresentedHashes ( self , presentation_import_options = None ) :
2018-08-01 20:44:57 +00:00
with self . _lock :
2020-08-05 20:10:36 +00:00
fsc = self . _file_seed_cache
2021-11-24 21:59:58 +00:00
if presentation_import_options is None :
presentation_import_options = self . _file_import_options . GetPresentationImportOptions ( )
2018-08-01 20:44:57 +00:00
2018-07-04 20:48:28 +00:00
2021-11-24 21:59:58 +00:00
return fsc . GetPresentedHashes ( presentation_import_options )
2020-08-05 20:10:36 +00:00
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
def GetSimpleStatus ( self ) :
with self . _lock :
2021-02-11 01:59:52 +00:00
files_work_to_do = self . _file_seed_cache . WorkToDo ( )
2022-06-22 20:43:12 +00:00
checker_go = HydrusData . TimeHasPassed ( self . _next_check_time ) and not self . _checking_paused
2021-02-11 01:59:52 +00:00
files_go = files_work_to_do and not self . _files_paused
2022-06-22 20:43:12 +00:00
if not HydrusData . TimeHasPassed ( self . _no_work_until ) :
2018-10-17 21:00:09 +00:00
2021-03-10 23:10:11 +00:00
if self . _next_check_time is None :
text = ' {} - working again {} ' . format ( self . _no_work_until_reason , ClientData . TimestampToPrettyTimeDelta ( self . _no_work_until ) )
else :
text = ' {} - next check {} ' . format ( self . _no_work_until_reason , ClientData . TimestampToPrettyTimeDelta ( max ( self . _no_work_until , self . _next_check_time ) ) )
2021-02-11 01:59:52 +00:00
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_DEFERRED , text )
2018-10-17 21:00:09 +00:00
2022-06-22 20:43:12 +00:00
elif checker_go or files_go :
if self . _checker_working_lock . locked ( ) or self . _files_working_lock . locked ( ) :
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_WORKING , ' working ' )
else :
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_PENDING , ' pending ' )
elif self . _checking_status == ClientImporting . CHECKER_STATUS_404 :
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_DONE , ' 404 ' )
elif self . _checking_status == ClientImporting . CHECKER_STATUS_DEAD :
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_DONE , ' DEAD ' )
2018-05-23 21:05:06 +00:00
else :
2021-02-11 01:59:52 +00:00
if self . _checking_paused :
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_PAUSED , ' ' )
else :
2021-03-10 23:10:11 +00:00
if self . _next_check_time is None or HydrusData . TimeHasPassed ( self . _next_check_time ) :
2021-02-11 01:59:52 +00:00
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_PENDING , ' pending ' )
else :
2021-02-17 18:22:44 +00:00
return ( ClientImporting . DOWNLOADER_SIMPLE_STATUS_DEFERRED , ClientData . TimestampToPrettyTimeDelta ( self . _next_check_time , no_prefix = True ) )
2021-02-11 01:59:52 +00:00
2018-05-23 21:05:06 +00:00
def GetStatus ( self ) :
with self . _lock :
2022-06-22 20:43:12 +00:00
if not HydrusData . TimeHasPassed ( self . _no_work_until ) :
2018-05-30 20:13:21 +00:00
2021-03-10 23:10:11 +00:00
if self . _next_check_time is None :
no_work_text = ' {} - working again {} ' . format ( self . _no_work_until_reason , ClientData . TimestampToPrettyTimeDelta ( self . _no_work_until ) )
else :
no_work_text = ' {} - next check {} ' . format ( self . _no_work_until_reason , ClientData . TimestampToPrettyTimeDelta ( max ( self . _no_work_until , self . _next_check_time ) ) )
2018-10-17 21:00:09 +00:00
2022-06-22 20:43:12 +00:00
files_status = no_work_text
2018-10-17 21:00:09 +00:00
watcher_status = no_work_text
2018-05-30 20:13:21 +00:00
2018-05-23 21:05:06 +00:00
else :
2022-06-22 20:43:12 +00:00
files_work_to_do = self . _file_seed_cache . WorkToDo ( )
checker_go = HydrusData . TimeHasPassed ( self . _next_check_time ) and not self . _checking_paused
files_go = files_work_to_do and not self . _files_paused
if checker_go and not self . _checker_working_lock . locked ( ) :
self . _watcher_status = ' waiting for a work slot '
if files_go and not self . _files_working_lock . locked ( ) :
self . _files_status = ' waiting for a work slot '
files_status = ClientImportControl . GenerateLiveStatusText ( self . _files_status , self . _files_paused , self . _no_work_until , self . _no_work_until_reason )
watcher_status = ClientImportControl . GenerateLiveStatusText ( self . _watcher_status , self . _checking_paused , self . _no_work_until , self . _no_work_until_reason )
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
return ( files_status , self . _files_paused , self . _file_velocity_status , self . _next_check_time , watcher_status , self . _subject , self . _checking_status , self . _check_now , self . _checking_paused )
2018-05-23 21:05:06 +00:00
def GetSubject ( self ) :
with self . _lock :
if self . _subject in ( None , ' ' ) :
return ' unknown subject '
else :
return self . _subject
2018-08-01 20:44:57 +00:00
def GetTagImportOptions ( self ) :
with self . _lock :
return self . _tag_import_options
2018-05-23 21:05:06 +00:00
def GetWatcherKey ( self ) :
with self . _lock :
return self . _watcher_key
def GetURL ( self ) :
with self . _lock :
return self . _url
def GetValueRange ( self ) :
with self . _lock :
2018-06-27 19:27:05 +00:00
return self . _file_seed_cache . GetValueRange ( )
2018-05-23 21:05:06 +00:00
2021-09-22 21:12:34 +00:00
def HasSerialisableChangesSince ( self , since_timestamp ) :
return self . _last_serialisable_change_timestamp > since_timestamp
2018-05-23 21:05:06 +00:00
def HasURL ( self ) :
with self . _lock :
return self . _HasURL ( )
2018-05-30 20:13:21 +00:00
def _IsDead ( self ) :
return self . _checking_status in ( ClientImporting . CHECKER_STATUS_404 , ClientImporting . CHECKER_STATUS_DEAD )
2018-05-23 21:05:06 +00:00
def IsDead ( self ) :
with self . _lock :
2018-05-30 20:13:21 +00:00
return self . _IsDead ( )
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
def NotifyFileSeedsUpdated ( self , file_seed_cache_key , file_seeds ) :
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed_cache_key == self . _file_seed_cache . GetFileSeedCacheKey ( ) :
2018-05-23 21:05:06 +00:00
ClientImporting . WakeRepeatingJob ( self . _files_repeating_job )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def PausePlayChecking ( self ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2018-05-30 20:13:21 +00:00
if self . _checking_paused and self . _IsDead ( ) :
2018-05-23 21:05:06 +00:00
return # watcher is dead, so don't unpause until a checknow event
else :
self . _checking_paused = not self . _checking_paused
ClientImporting . WakeRepeatingJob ( self . _checker_repeating_job )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
def PausePlayFiles ( self ) :
with self . _lock :
self . _files_paused = not self . _files_paused
ClientImporting . WakeRepeatingJob ( self . _files_repeating_job )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def PublishToPage ( self , publish_to_page ) :
2018-05-30 20:13:21 +00:00
with self . _lock :
2018-08-01 20:44:57 +00:00
self . _publish_to_page = publish_to_page
2018-05-30 20:13:21 +00:00
2018-08-01 20:44:57 +00:00
def Repage ( self , page_key ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
self . _page_key = page_key
2018-10-03 21:00:15 +00:00
def RetryFailed ( self ) :
with self . _lock :
2020-06-11 12:01:08 +00:00
self . _file_seed_cache . RetryFailed ( )
2018-10-03 21:00:15 +00:00
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-10-03 21:00:15 +00:00
2021-07-14 20:42:19 +00:00
def RetryIgnored ( self , ignored_regex = None ) :
2020-01-02 03:05:35 +00:00
with self . _lock :
2021-07-14 20:42:19 +00:00
self . _file_seed_cache . RetryIgnored ( ignored_regex = ignored_regex )
2020-01-02 03:05:35 +00:00
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2020-01-02 03:05:35 +00:00
2021-09-22 21:12:34 +00:00
def SetCheckerOptions ( self , checker_options : ClientImportOptions . CheckerOptions ) :
2018-06-20 20:20:22 +00:00
with self . _lock :
2021-09-22 21:12:34 +00:00
if checker_options . DumpToString ( ) != self . _checker_options . DumpToString ( ) :
self . _checker_options = checker_options
self . _UpdateNextCheckTime ( )
self . _UpdateFileVelocityStatus ( )
ClientImporting . WakeRepeatingJob ( self . _checker_repeating_job )
self . _SerialisableChangeMade ( )
2018-06-20 20:20:22 +00:00
2021-09-22 21:12:34 +00:00
def SetFileImportOptions ( self , file_import_options : FileImportOptions . FileImportOptions ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2021-09-22 21:12:34 +00:00
if file_import_options . DumpToString ( ) != self . _file_import_options . DumpToString ( ) :
self . _file_import_options = file_import_options
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
2020-09-16 20:46:54 +00:00
def SetExternalAdditionalServiceKeysToTags ( self , service_keys_to_tags ) :
with self . _lock :
2021-09-22 21:12:34 +00:00
external_additional_service_keys_to_tags = ClientTags . ServiceKeysToTags ( service_keys_to_tags )
if external_additional_service_keys_to_tags . DumpToString ( ) != self . _external_additional_service_keys_to_tags . DumpToString ( ) :
self . _external_additional_service_keys_to_tags = external_additional_service_keys_to_tags
self . _SerialisableChangeMade ( )
2020-09-16 20:46:54 +00:00
def SetExternalFilterableTags ( self , tags ) :
2019-02-27 23:03:30 +00:00
with self . _lock :
2021-09-22 21:12:34 +00:00
tags_set = set ( tags )
if tags_set != self . _external_filterable_tags :
self . _external_filterable_tags = tags_set
self . _SerialisableChangeMade ( )
2019-02-27 23:03:30 +00:00
2021-09-22 21:12:34 +00:00
def SetTagImportOptions ( self , tag_import_options : TagImportOptions . TagImportOptions ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2021-09-22 21:12:34 +00:00
if tag_import_options . DumpToString ( ) != self . _tag_import_options . DumpToString ( ) :
self . _tag_import_options = tag_import_options
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
def SetURL ( self , url ) :
if url is None :
url = ' '
if url != ' ' :
2020-04-01 21:51:42 +00:00
try :
url = HG . client_controller . network_engine . domain_manager . NormaliseURL ( url )
except HydrusExceptions . URLClassException :
url = ' '
2018-05-23 21:05:06 +00:00
with self . _lock :
self . _url = url
ClientImporting . WakeRepeatingJob ( self . _checker_repeating_job )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
def Start ( self , page_key , publish_to_page ) :
2021-07-07 20:48:57 +00:00
with self . _lock :
if self . _have_started :
return
self . _page_key = page_key
self . _publish_to_page = publish_to_page
self . _UpdateNextCheckTime ( )
self . _UpdateFileVelocityStatus ( )
self . _files_repeating_job = HG . client_controller . CallRepeating ( ClientImporting . GetRepeatingJobInitialDelay ( ) , ClientImporting . REPEATING_JOB_TYPICAL_PERIOD , self . REPEATINGWorkOnFiles )
self . _checker_repeating_job = HG . client_controller . CallRepeating ( ClientImporting . GetRepeatingJobInitialDelay ( ) , ClientImporting . REPEATING_JOB_TYPICAL_PERIOD , self . REPEATINGWorkOnChecker )
self . _files_repeating_job . SetThreadSlotType ( ' watcher_files ' )
self . _checker_repeating_job . SetThreadSlotType ( ' watcher_check ' )
self . _have_started = True
2019-01-16 22:40:53 +00:00
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
def CheckCanDoFileWork ( self ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2022-06-22 20:43:12 +00:00
try :
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
ClientImportControl . CheckImporterCanDoWorkBecauseStopped ( self . _page_key )
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
except HydrusExceptions . VetoException :
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
self . _files_repeating_job . Cancel ( )
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
raise
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
ClientImportControl . CheckImporterCanDoFileWorkBecausePaused ( self . _files_paused , self . _file_seed_cache , self . _page_key )
2022-04-13 21:39:26 +00:00
try :
2022-06-22 20:43:12 +00:00
ClientImportControl . CheckImporterCanDoFileWorkBecausePausifyingProblem ( self . _file_import_options )
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
except HydrusExceptions . VetoException :
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
self . _files_paused = True
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
raise
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
self . CheckCanDoNetworkWork ( )
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
def CheckCanDoNetworkWork ( self ) :
2021-09-01 21:09:01 +00:00
with self . _lock :
2022-06-22 20:43:12 +00:00
ClientImportControl . CheckCanDoNetworkWork ( self . _no_work_until , self . _no_work_until_reason )
2018-05-23 21:05:06 +00:00
2021-09-01 21:09:01 +00:00
return True
def REPEATINGWorkOnFiles ( self ) :
2022-06-22 20:43:12 +00:00
with self . _files_working_lock :
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
while True :
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
try :
try :
self . CheckCanDoFileWork ( )
except HydrusExceptions . VetoException as e :
with self . _lock :
self . _files_status = str ( e )
break
self . _WorkOnFiles ( )
HG . client_controller . WaitUntilViewFree ( )
self . _SerialisableChangeMade ( )
except Exception as e :
with self . _lock :
self . _files_status = ' stopping work: {} ' . format ( str ( e ) )
HydrusData . ShowException ( e )
return
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
def CheckCanDoCheckerWork ( self ) :
2018-05-23 21:05:06 +00:00
with self . _lock :
2022-06-22 20:43:12 +00:00
try :
ClientImportControl . CheckImporterCanDoWorkBecauseStopped ( self . _page_key )
except HydrusExceptions . VetoException :
2018-05-23 21:05:06 +00:00
self . _checker_repeating_job . Cancel ( )
2022-06-22 20:43:12 +00:00
raise
2018-05-23 21:05:06 +00:00
2021-02-17 18:22:44 +00:00
while self . _gallery_seed_log . WorkToDo ( ) :
# some old unworked gallery url is hanging around, let's clear it
gallery_seed = self . _gallery_seed_log . GetNextGallerySeed ( CC . STATUS_UNKNOWN )
gallery_seed . SetStatus ( CC . STATUS_VETOED , note = ' check never finished ' )
self . _gallery_seed_log . NotifyGallerySeedsUpdated ( ( gallery_seed , ) )
2022-06-22 20:43:12 +00:00
if self . _checking_paused :
raise HydrusExceptions . VetoException ( ' paused ' )
2018-10-31 21:41:14 +00:00
2022-06-22 20:43:12 +00:00
if HG . client_controller . new_options . GetBoolean ( ' pause_all_watcher_checkers ' ) :
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
raise HydrusExceptions . VetoException ( ' all checkers are paused! ' )
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
if not self . _HasURL ( ) :
raise HydrusExceptions . VetoException ( ' no url set yet! ' )
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
if self . _checking_status == ClientImporting . CHECKER_STATUS_404 :
2021-09-01 21:09:01 +00:00
2022-06-22 20:43:12 +00:00
raise HydrusExceptions . VetoException ( ' URL 404 ' )
elif self . _checking_status == ClientImporting . CHECKER_STATUS_DEAD :
raise HydrusExceptions . VetoException ( ' URL DEAD ' )
2021-09-01 21:09:01 +00:00
2018-05-23 21:05:06 +00:00
check_due = HydrusData . TimeHasPassed ( self . _next_check_time )
2021-09-01 21:09:01 +00:00
if not check_due :
2022-06-22 20:43:12 +00:00
raise HydrusExceptions . VetoException ( ' ' )
2021-09-01 21:09:01 +00:00
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
return self . CheckCanDoNetworkWork ( )
2021-09-01 21:09:01 +00:00
def REPEATINGWorkOnChecker ( self ) :
2022-06-22 20:43:12 +00:00
with self . _checker_working_lock :
2018-05-23 21:05:06 +00:00
try :
2022-06-22 20:43:12 +00:00
try :
self . CheckCanDoCheckerWork ( )
except HydrusExceptions . VetoException as e :
with self . _lock :
self . _watcher_status = str ( e )
return
2018-05-23 21:05:06 +00:00
self . _CheckWatchableURL ( )
2021-09-22 21:12:34 +00:00
self . _SerialisableChangeMade ( )
2018-05-23 21:05:06 +00:00
except Exception as e :
2022-06-22 20:43:12 +00:00
with self . _lock :
self . _watcher_status = ' stopping work: {} ' . format ( str ( e ) )
2018-05-23 21:05:06 +00:00
HydrusData . ShowException ( e )
2022-06-22 20:43:12 +00:00
return
2018-05-23 21:05:06 +00:00
2022-06-22 20:43:12 +00:00
2018-05-23 21:05:06 +00:00
HydrusSerialisable . SERIALISABLE_TYPES_TO_OBJECT_TYPES [ HydrusSerialisable . SERIALISABLE_TYPE_WATCHER_IMPORT ] = WatcherImport