hydrus/include/ClientImportWatchers.py

1247 lines
38 KiB
Python
Raw Normal View History

2018-05-23 21:05:06 +00:00
import ClientConstants as CC
import ClientDownloading
import ClientImporting
import ClientImportOptions
2018-06-06 21:27:02 +00:00
import ClientImportSeeds
2018-05-23 21:05:06 +00:00
import ClientNetworkingJobs
import ClientParsing
import collections
import HydrusConstants as HC
import HydrusData
import HydrusExceptions
import HydrusGlobals as HG
import HydrusSerialisable
import threading
import time
import wx
class MultipleWatcherImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT
SERIALISABLE_NAME = 'Multiple Watcher'
SERIALISABLE_VERSION = 1
def __init__( self, url = None ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._lock = threading.Lock()
self._page_key = 'initialising page key'
self._watchers = HydrusSerialisable.SerialisableList()
self._watcher_keys_to_watchers = {}
self._watchers_repeating_job = None
self._status_dirty = True
self._status_cache = None
self._status_cache_generation_time = 0
#
if url is not None:
watcher = WatcherImport()
watcher.SetURL( url )
self._AddWatcher( watcher )
self._last_pubbed_value_range = ( 0, 0 )
self._next_pub_value_check_time = 0
def _AddWatcher( self, watcher ):
publish_to_page = False
watcher.Repage( self._page_key, publish_to_page )
self._watchers.append( watcher )
watcher_key = watcher.GetWatcherKey()
self._watcher_keys_to_watchers[ watcher_key ] = watcher
def _RegenerateStatus( self ):
statuses_to_counts = collections.Counter()
for watcher in self._watchers:
seed_cache = watcher.GetSeedCache()
statuses_to_counts.update( seed_cache.GetStatusesToCounts() )
2018-06-06 21:27:02 +00:00
self._status_cache = ClientImportSeeds.GenerateSeedCacheStatus( statuses_to_counts )
2018-05-23 21:05:06 +00:00
self._status_dirty = False
self._status_cache_generation_time = HydrusData.GetNow()
def _GetSerialisableInfo( self ):
serialisable_watchers = self._watchers.GetSerialisableTuple()
return serialisable_watchers
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
serialisable_watchers = serialisable_info
self._watchers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_watchers )
self._watcher_keys_to_watchers = { watcher.GetWatcherKey() : watcher for watcher in self._watchers }
def _RemoveWatcher( self, watcher_key ):
if watcher_key not in self._watcher_keys_to_watchers:
return
watcher = self._watcher_keys_to_watchers[ watcher_key ]
publish_to_page = False
watcher.Repage( 'dead page key', publish_to_page )
self._watchers.remove( watcher )
del self._watcher_keys_to_watchers[ watcher_key ]
def _SetDirty( self ):
self._status_dirty = True
def AddURL( self, url ):
if url == '':
return
with self._lock:
if url in ( watcher.GetURL() for watcher in self._watchers ):
return
watcher = WatcherImport()
watcher.SetURL( url )
publish_to_page = False
watcher.Start( self._page_key, publish_to_page )
self._AddWatcher( watcher )
def AddWatcher( self, watcher ):
with self._lock:
self._AddWatcher( watcher )
self._SetDirty()
2018-05-30 20:13:21 +00:00
def GetNumDead( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-05-30 20:13:21 +00:00
return len( [ watcher for watcher in self._watchers if watcher.IsDead() ] )
2018-05-23 21:05:06 +00:00
def GetTotalStatus( self ):
with self._lock:
if self._status_dirty:
self._RegenerateStatus()
return self._status_cache
def GetValueRange( self ):
with self._lock:
total_value = 0
total_range = 0
for watcher in self._watchers:
( value, range ) = watcher.GetValueRange()
if value != range:
total_value += value
total_range += range
return ( total_value, total_range )
def GetWatchers( self ):
with self._lock:
return list( self._watchers )
2018-05-30 20:13:21 +00:00
def GetWatcherKeys( self ):
with self._lock:
return set( self._watcher_keys_to_watchers.keys() )
2018-05-23 21:05:06 +00:00
def RemoveWatcher( self, watcher_key ):
with self._lock:
self._RemoveWatcher( watcher_key )
self._SetDirty()
def Start( self, page_key ):
with self._lock:
self._page_key = page_key
2018-05-30 20:13:21 +00:00
# set a 2s period so the page value/range is breddy snappy
self._watchers_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), 2.0, self.REPEATINGWorkOnWatchers )
2018-05-23 21:05:06 +00:00
publish_to_page = False
for watcher in self._watchers:
watcher.Start( page_key, publish_to_page )
def REPEATINGWorkOnWatchers( self ):
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._watchers_repeating_job.Cancel()
return
if not self._status_dirty: # if we think we are clean
for watcher in self._watchers:
seed_cache = watcher.GetSeedCache()
if seed_cache.GetStatusGenerationTime() > self._status_cache_generation_time: # has there has been an update?
self._SetDirty()
break
if HydrusData.TimeHasPassed( self._next_pub_value_check_time ):
self._next_pub_value_check_time = HydrusData.GetNow() + 5
current_value_range = self.GetValueRange()
if current_value_range != self._last_pubbed_value_range:
self._last_pubbed_value_range = current_value_range
HG.client_controller.pub( 'refresh_page_name', self._page_key )
# something like:
# if any are dead, do some stuff with them based on some options here
# might want to have this work on a 30s period or something
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT ] = MultipleWatcherImport
class WatcherImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT
SERIALISABLE_NAME = 'Watcher'
SERIALISABLE_VERSION = 4
MIN_CHECK_PERIOD = 30
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
new_options = HG.client_controller.new_options
tag_import_options = new_options.GetDefaultTagImportOptions( ClientDownloading.GalleryIdentifier( HC.SITE_TYPE_WATCHER ) )
self._page_key = 'initialising page key'
self._publish_to_page = False
self._url = ''
2018-06-06 21:27:02 +00:00
self._seed_cache = ClientImportSeeds.SeedCache()
2018-05-23 21:05:06 +00:00
self._urls_to_filenames = {}
self._urls_to_md5_base64 = {}
self._checker_options = new_options.GetDefaultWatcherCheckerOptions()
self._file_import_options = file_import_options
self._tag_import_options = tag_import_options
self._last_check_time = 0
self._checking_status = ClientImporting.CHECKER_STATUS_OK
self._subject = 'unknown subject'
self._next_check_time = None
self._download_control_file_set = None
self._download_control_file_clear = None
self._download_control_checker_set = None
self._download_control_checker_clear = None
self._check_now = False
self._files_paused = False
self._checking_paused = False
self._no_work_until = 0
self._no_work_until_reason = ''
self._file_velocity_status = ''
self._current_action = ''
self._watcher_status = ''
self._watcher_key = HydrusData.GenerateKey()
self._lock = threading.Lock()
self._last_pubbed_page_name = ''
self._files_repeating_job = None
self._checker_repeating_job = None
HG.client_controller.sub( self, 'NotifySeedsUpdated', 'seed_cache_seeds_updated' )
def _CheckWatchableURL( self ):
error_occurred = False
watcher_status_should_stick = True
( url_type, match_name, can_parse ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( self._url )
if url_type != HC.URL_TYPE_WATCHABLE:
error_occurred = True
watcher_status = 'Did not understand the given URL as watchable!'
elif not can_parse:
error_occurred = True
watcher_status = 'Could not parse the given URL!'
if not error_occurred:
2018-05-30 20:13:21 +00:00
try:
# convert to API url as appropriate
( url_to_check, parser ) = HG.client_controller.network_engine.domain_manager.GetURLToFetchAndParser( self._url )
except HydrusExceptions.URLMatchException:
2018-05-23 21:05:06 +00:00
error_occurred = True
watcher_status = 'Could not find a parser for the given URL!'
if error_occurred:
self._FinishCheck( watcher_status, error_occurred, watcher_status_should_stick )
return
#
with self._lock:
self._watcher_status = 'checking watchable url'
try:
network_job = ClientNetworkingJobs.NetworkJobWatcherPage( self._watcher_key, 'GET', url_to_check )
network_job.OverrideBandwidth()
HG.client_controller.network_engine.AddJob( network_job )
with self._CheckerNetworkJobPresentationContextFactory( network_job ):
network_job.WaitUntilDone()
data = network_job.GetContent()
parsing_context = {}
parsing_context[ 'watcher_url' ] = self._url
parsing_context[ 'url' ] = url_to_check
all_parse_results = parser.Parse( parsing_context, data )
subject = ClientParsing.GetTitleFromAllParseResults( all_parse_results )
if subject is None:
subject = ''
with self._lock:
self._subject = subject
( num_new, num_already_in ) = ClientImporting.UpdateSeedCacheWithAllParseResults( self._seed_cache, all_parse_results, source_url = self._url, tag_import_options = self._tag_import_options )
watcher_status = 'checked OK - ' + HydrusData.ConvertIntToPrettyString( num_new ) + ' new urls'
watcher_status_should_stick = False
if num_new > 0:
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
except HydrusExceptions.ShutdownException:
return
except HydrusExceptions.ParseException as e:
error_occurred = True
watcher_status = 'Was unable to parse the returned data! Full error written to log!'
HydrusData.PrintException( e )
except HydrusExceptions.NotFoundException:
error_occurred = True
with self._lock:
self._checking_status = ClientImporting.CHECKER_STATUS_404
watcher_status = ''
except HydrusExceptions.NetworkException as e:
self._DelayWork( 4 * 3600, 'Network problem: ' + HydrusData.ToUnicode( e ) )
watcher_status = ''
HydrusData.PrintException( e )
except Exception as e:
error_occurred = True
watcher_status = HydrusData.ToUnicode( e )
HydrusData.PrintException( e )
self._FinishCheck( watcher_status, error_occurred, watcher_status_should_stick )
def _DelayWork( self, time_delta, reason ):
self._no_work_until = HydrusData.GetNow() + time_delta
self._no_work_until_reason = reason
def _FileNetworkJobPresentationContextFactory( self, network_job ):
def enter_call():
with self._lock:
if self._download_control_file_set is not None:
wx.CallAfter( self._download_control_file_set, network_job )
def exit_call():
with self._lock:
if self._download_control_file_clear is not None:
wx.CallAfter( self._download_control_file_clear )
return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call )
def _FinishCheck( self, watcher_status, error_occurred, watcher_status_should_stick ):
if error_occurred:
# the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error
with self._lock:
self._checking_paused = True
self._watcher_status = watcher_status
time.sleep( 5 )
with self._lock:
if self._check_now:
self._check_now = False
self._watcher_status = watcher_status
self._last_check_time = HydrusData.GetNow()
self._UpdateFileVelocityStatus()
self._UpdateNextCheckTime()
self._PublishPageName()
if not watcher_status_should_stick:
time.sleep( 5 )
with self._lock:
self._watcher_status = ''
def _GetSerialisableInfo( self ):
serialisable_seed_cache = self._seed_cache.GetSerialisableTuple()
serialisable_checker_options = self._checker_options.GetSerialisableTuple()
serialisable_file_options = self._file_import_options.GetSerialisableTuple()
serialisable_tag_options = self._tag_import_options.GetSerialisableTuple()
return ( self._url, serialisable_seed_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, self._last_check_time, self._files_paused, self._checking_paused, self._checking_status, self._subject, self._no_work_until, self._no_work_until_reason )
def _HasURL( self ):
return self._url != ''
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( self._url, serialisable_seed_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, self._last_check_time, self._files_paused, self._checking_paused, self._checking_status, self._subject, self._no_work_until, self._no_work_until_reason ) = serialisable_info
self._seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_seed_cache )
self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options )
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_options )
def _PublishPageName( self ):
new_options = HG.client_controller.new_options
cannot_rename = not new_options.GetBoolean( 'permit_watchers_to_name_their_pages' )
if cannot_rename:
page_name = 'watcher'
elif self._subject in ( '', 'unknown subject' ):
page_name = 'watcher'
else:
page_name = self._subject
if self._checking_status == ClientImporting.CHECKER_STATUS_404:
thread_watcher_not_found_page_string = new_options.GetNoneableString( 'thread_watcher_not_found_page_string' )
if thread_watcher_not_found_page_string is not None:
page_name = thread_watcher_not_found_page_string + ' ' + page_name
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
thread_watcher_dead_page_string = new_options.GetNoneableString( 'thread_watcher_dead_page_string' )
if thread_watcher_dead_page_string is not None:
page_name = thread_watcher_dead_page_string + ' ' + page_name
elif self._checking_paused:
thread_watcher_paused_page_string = new_options.GetNoneableString( 'thread_watcher_paused_page_string' )
if thread_watcher_paused_page_string is not None:
page_name = thread_watcher_paused_page_string + ' ' + page_name
if page_name != self._last_pubbed_page_name:
self._last_pubbed_page_name = page_name
if self._publish_to_page:
HG.client_controller.pub( 'rename_page', self._page_key, page_name )
def _CheckerNetworkJobPresentationContextFactory( self, network_job ):
def enter_call():
with self._lock:
if self._download_control_checker_set is not None:
wx.CallAfter( self._download_control_checker_set, network_job )
def exit_call():
with self._lock:
if self._download_control_checker_clear is not None:
wx.CallAfter( self._download_control_checker_clear )
return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call )
def _UpdateFileVelocityStatus( self ):
self._file_velocity_status = self._checker_options.GetPrettyCurrentVelocity( self._seed_cache, self._last_check_time )
def _UpdateNextCheckTime( self ):
if self._check_now:
self._next_check_time = self._last_check_time + self.MIN_CHECK_PERIOD
else:
if not HydrusData.TimeHasPassed( self._no_work_until ):
self._next_check_time = self._no_work_until + 1
else:
if self._checking_status != ClientImporting.CHECKER_STATUS_404:
if self._checker_options.IsDead( self._seed_cache, self._last_check_time ):
self._checking_status = ClientImporting.CHECKER_STATUS_DEAD
self._checking_paused = True
self._next_check_time = self._checker_options.GetNextCheckTime( self._seed_cache, self._last_check_time )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( url, serialisable_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_options, serialisable_tag_options, times_to_check, check_period, last_check_time, paused ) = old_serialisable_info
checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check = 8, never_faster_than = 300, never_slower_than = 86400, death_file_velocity = ( 1, 86400 ) )
serialisable_checker_options = checker_options.GetSerialisableTuple()
files_paused = paused
checking_paused = paused
new_serialisable_info = ( url, serialisable_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, last_check_time, files_paused, checking_paused )
return ( 2, new_serialisable_info )
if version == 2:
( url, serialisable_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, last_check_time, files_paused, checking_paused ) = old_serialisable_info
checking_status = ClientImporting.CHECKER_STATUS_OK
subject = 'unknown subject'
new_serialisable_info = ( url, serialisable_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, last_check_time, files_paused, checking_paused, checking_status, subject )
return ( 3, new_serialisable_info )
if version == 3:
( url, serialisable_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, last_check_time, files_paused, checking_paused, checking_status, subject ) = old_serialisable_info
no_work_until = 0
no_work_until_reason = ''
new_serialisable_info = ( url, serialisable_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_options, serialisable_tag_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason )
return ( 4, new_serialisable_info )
def _WorkOnFiles( self ):
seed = self._seed_cache.GetNextSeed( CC.STATUS_UNKNOWN )
if seed is None:
return
did_substantial_work = False
try:
def status_hook( text ):
with self._lock:
self._current_action = text
2018-06-06 21:27:02 +00:00
did_substantial_work = seed.WorkOnURL( self._seed_cache, status_hook, ClientImporting.GenerateWatcherNetworkJobFactory( self._watcher_key ), self._FileNetworkJobPresentationContextFactory, self._file_import_options, self._tag_import_options )
2018-05-23 21:05:06 +00:00
with self._lock:
should_present = self._publish_to_page and seed.ShouldPresent( self._file_import_options )
page_key = self._page_key
if should_present:
seed.PresentToPage( page_key )
did_substantial_work = True
except HydrusExceptions.ShutdownException:
return
except HydrusExceptions.VetoException as e:
status = CC.STATUS_VETOED
note = HydrusData.ToUnicode( e )
seed.SetStatus( status, note = note )
if isinstance( e, HydrusExceptions.CancelledException ):
time.sleep( 2 )
except HydrusExceptions.NotFoundException:
status = CC.STATUS_VETOED
note = '404'
seed.SetStatus( status, note = note )
except Exception as e:
status = CC.STATUS_ERROR
seed.SetStatus( status, exception = e )
time.sleep( 3 )
finally:
self._seed_cache.NotifySeedsUpdated( ( seed, ) )
with self._lock:
self._current_action = ''
if did_substantial_work:
time.sleep( ClientImporting.DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME )
def CheckNow( self ):
with self._lock:
self._check_now = True
self._checking_paused = False
self._no_work_until = 0
self._no_work_until_reason = ''
self._checking_status = ClientImporting.CHECKER_STATUS_OK
self._UpdateNextCheckTime()
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def CurrentlyAlive( self ):
with self._lock:
return self._checking_status == ClientImporting.CHECKER_STATUS_OK
def CurrentlyWorking( self ):
with self._lock:
finished = not self._seed_cache.WorkToDo()
return not finished and not self._files_paused
def GetCheckerOptions( self ):
with self._lock:
return self._checker_options
def GetOptions( self ):
with self._lock:
return ( self._url, self._file_import_options, self._tag_import_options )
def GetPresentedHashes( self ):
return self._seed_cache.GetPresentedHashes( self._file_import_options )
def GetSeedCache( self ):
return self._seed_cache
def GetSimpleStatus( self ):
with self._lock:
if self._checking_status == ClientImporting.CHECKER_STATUS_404:
return '404'
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
return 'DEAD'
elif self._checking_paused or self._files_paused:
return 'paused'
else:
return ''
def GetStatus( self ):
with self._lock:
2018-05-30 20:13:21 +00:00
if self._checking_status == ClientImporting.CHECKER_STATUS_404:
2018-05-23 21:05:06 +00:00
watcher_status = 'URL 404'
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
watcher_status = 'URL DEAD'
2018-05-30 20:13:21 +00:00
elif not HydrusData.TimeHasPassed( self._no_work_until ):
watcher_status = self._no_work_until_reason + ' - ' + 'next check ' + HydrusData.ConvertTimestampToPrettyPending( self._next_check_time )
2018-05-23 21:05:06 +00:00
else:
watcher_status = self._watcher_status
return ( self._current_action, self._files_paused, self._file_velocity_status, self._next_check_time, watcher_status, self._subject, self._checking_status, self._check_now, self._checking_paused )
def GetSubject( self ):
with self._lock:
if self._subject in ( None, '' ):
return 'unknown subject'
else:
return self._subject
def GetWatcherKey( self ):
with self._lock:
return self._watcher_key
def GetURL( self ):
with self._lock:
return self._url
def GetValueRange( self ):
with self._lock:
return self._seed_cache.GetValueRange()
def HasURL( self ):
with self._lock:
return self._HasURL()
2018-05-30 20:13:21 +00:00
def _IsDead( self ):
return self._checking_status in ( ClientImporting.CHECKER_STATUS_404, ClientImporting.CHECKER_STATUS_DEAD )
2018-05-23 21:05:06 +00:00
def IsDead( self ):
with self._lock:
2018-05-30 20:13:21 +00:00
return self._IsDead()
2018-05-23 21:05:06 +00:00
def NotifySeedsUpdated( self, seed_cache_key, seeds ):
if seed_cache_key == self._seed_cache.GetSeedCacheKey():
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
def PausePlayChecker( self ):
with self._lock:
2018-05-30 20:13:21 +00:00
if self._checking_paused and self._IsDead():
2018-05-23 21:05:06 +00:00
return # watcher is dead, so don't unpause until a checknow event
else:
self._checking_paused = not self._checking_paused
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def PausePlayFiles( self ):
with self._lock:
self._files_paused = not self._files_paused
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2018-05-30 20:13:21 +00:00
def PausePlay( self ):
with self._lock:
if self._checking_paused:
if self._IsDead(): # can't unpause checker until a checknow event
self._files_paused = not self._files_paused
else:
self._checking_paused = False
self._files_paused = False
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
else:
self._checking_paused = True
self._files_paused = True
2018-05-23 21:05:06 +00:00
def Repage( self, page_key, publish_to_page ):
with self._lock:
self._page_key = page_key
self._publish_to_page = publish_to_page
def SetDownloadControlChecker( self, download_control ):
with self._lock:
self._download_control_checker_set = download_control.SetNetworkJob
self._download_control_checker_clear = download_control.ClearNetworkJob
def SetDownloadControlFile( self, download_control ):
with self._lock:
self._download_control_file_set = download_control.SetNetworkJob
self._download_control_file_clear = download_control.ClearNetworkJob
def SetFileImportOptions( self, file_import_options ):
with self._lock:
self._file_import_options = file_import_options
def SetTagImportOptions( self, tag_import_options ):
with self._lock:
self._tag_import_options = tag_import_options
def SetURL( self, url ):
if url is None:
url = ''
if url != '':
url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
with self._lock:
self._url = url
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def SetCheckerOptions( self, checker_options ):
with self._lock:
self._checker_options = checker_options
self._checking_paused = False
self._UpdateNextCheckTime()
self._UpdateFileVelocityStatus()
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def Start( self, page_key, publish_to_page ):
self._page_key = page_key
self._publish_to_page = publish_to_page
self._UpdateNextCheckTime()
self._PublishPageName()
self._UpdateFileVelocityStatus()
self._files_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnFiles )
self._checker_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnChecker )
def REPEATINGWorkOnFiles( self ):
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._files_repeating_job.Cancel()
return
work_to_do = self._seed_cache.WorkToDo() and not ( self._files_paused or HG.client_controller.PageClosedButNotDestroyed( self._page_key ) )
while work_to_do:
try:
self._WorkOnFiles()
HG.client_controller.WaitUntilViewFree()
except Exception as e:
HydrusData.ShowException( e )
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._files_repeating_job.Cancel()
return
work_to_do = self._seed_cache.WorkToDo() and not ( self._files_paused or HG.client_controller.PageClosedButNotDestroyed( self._page_key ) )
def REPEATINGWorkOnChecker( self ):
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._checker_repeating_job.Cancel()
return
able_to_check = self._HasURL() and not self._checking_paused
check_due = HydrusData.TimeHasPassed( self._next_check_time )
no_delays = HydrusData.TimeHasPassed( self._no_work_until )
page_shown = not HG.client_controller.PageClosedButNotDestroyed( self._page_key )
time_to_check = able_to_check and check_due and no_delays and page_shown
if time_to_check:
try:
self._CheckWatchableURL()
except Exception as e:
HydrusData.ShowException( e )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT ] = WatcherImport