hydrus/include/ClientImportWatchers.py

1388 lines
45 KiB
Python
Raw Normal View History

2019-01-09 22:59:03 +00:00
from . import ClientConstants as CC
from . import ClientDownloading
from . import ClientImporting
from . import ClientImportOptions
from . import ClientImportFileSeeds
from . import ClientImportGallerySeeds
from . import ClientNetworkingJobs
from . import ClientParsing
2018-05-23 21:05:06 +00:00
import collections
2019-01-09 22:59:03 +00:00
from . import HydrusConstants as HC
from . import HydrusData
from . import HydrusExceptions
from . import HydrusGlobals as HG
from . import HydrusSerialisable
2018-05-23 21:05:06 +00:00
import threading
import time
import wx
class MultipleWatcherImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT
SERIALISABLE_NAME = 'Multiple Watcher'
2018-06-20 20:20:22 +00:00
SERIALISABLE_VERSION = 2
2018-08-22 21:10:59 +00:00
ADDED_TIMESTAMP_DURATION = 15
2018-05-23 21:05:06 +00:00
def __init__( self, url = None ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._lock = threading.Lock()
self._page_key = 'initialising page key'
self._watchers = HydrusSerialisable.SerialisableList()
2018-06-20 20:20:22 +00:00
self._highlighted_watcher_url = None
self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
2018-07-11 20:23:51 +00:00
self._tag_import_options = ClientImportOptions.TagImportOptions( is_default = True )
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
self._watcher_keys_to_watchers = {}
2018-06-20 20:20:22 +00:00
self._watcher_keys_to_added_timestamps = {}
self._watcher_keys_to_already_in_timestamps = {}
2018-05-23 21:05:06 +00:00
self._watchers_repeating_job = None
self._status_dirty = True
self._status_cache = None
self._status_cache_generation_time = 0
#
if url is not None:
watcher = WatcherImport()
watcher.SetURL( url )
self._AddWatcher( watcher )
2018-08-08 20:29:54 +00:00
self._last_time_watchers_changed = HydrusData.GetNowPrecise()
2018-05-23 21:05:06 +00:00
self._last_pubbed_value_range = ( 0, 0 )
self._next_pub_value_check_time = 0
def _AddWatcher( self, watcher ):
2018-08-01 20:44:57 +00:00
watcher.PublishToPage( False )
watcher.Repage( self._page_key )
2018-05-23 21:05:06 +00:00
self._watchers.append( watcher )
2018-08-08 20:29:54 +00:00
self._last_time_watchers_changed = HydrusData.GetNowPrecise()
2018-05-23 21:05:06 +00:00
watcher_key = watcher.GetWatcherKey()
self._watcher_keys_to_watchers[ watcher_key ] = watcher
2018-06-20 20:20:22 +00:00
self._watcher_keys_to_added_timestamps[ watcher_key ] = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
def _CleanAddedTimestamps( self ):
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
keys = list( self._watcher_keys_to_added_timestamps.keys() )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
for key in keys:
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
if HydrusData.TimeHasPassed( self._watcher_keys_to_added_timestamps[ key ] + self.ADDED_TIMESTAMP_DURATION ):
del self._watcher_keys_to_added_timestamps[ key ]
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
keys = list( self._watcher_keys_to_already_in_timestamps.keys() )
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
for key in keys:
if HydrusData.TimeHasPassed( self._watcher_keys_to_already_in_timestamps[ key ] + self.ADDED_TIMESTAMP_DURATION ):
del self._watcher_keys_to_already_in_timestamps[ key ]
2018-05-23 21:05:06 +00:00
def _GetSerialisableInfo( self ):
serialisable_watchers = self._watchers.GetSerialisableTuple()
2018-06-20 20:20:22 +00:00
serialisable_checker_options = self._checker_options.GetSerialisableTuple()
serialisable_file_import_options = self._file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple()
return ( serialisable_watchers, self._highlighted_watcher_url, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options )
2018-05-23 21:05:06 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-06-20 20:20:22 +00:00
( serialisable_watchers, self._highlighted_watcher_url, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options ) = serialisable_info
2018-05-23 21:05:06 +00:00
self._watchers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_watchers )
self._watcher_keys_to_watchers = { watcher.GetWatcherKey() : watcher for watcher in self._watchers }
2018-06-20 20:20:22 +00:00
self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options )
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options )
self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options )
def _RegenerateStatus( self ):
2018-07-04 20:48:28 +00:00
file_seed_caches = [ watcher.GetFileSeedCache() for watcher in self._watchers ]
2018-06-20 20:20:22 +00:00
2018-07-04 20:48:28 +00:00
self._status_cache = ClientImportFileSeeds.GenerateFileSeedCachesStatus( file_seed_caches )
2018-06-20 20:20:22 +00:00
self._status_dirty = False
self._status_cache_generation_time = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
def _RemoveWatcher( self, watcher_key ):
if watcher_key not in self._watcher_keys_to_watchers:
return
watcher = self._watcher_keys_to_watchers[ watcher_key ]
2018-08-01 20:44:57 +00:00
watcher.PublishToPage( False )
watcher.Repage( 'dead page key' )
2018-05-23 21:05:06 +00:00
self._watchers.remove( watcher )
2018-08-08 20:29:54 +00:00
self._last_time_watchers_changed = HydrusData.GetNowPrecise()
2018-05-23 21:05:06 +00:00
del self._watcher_keys_to_watchers[ watcher_key ]
def _SetDirty( self ):
self._status_dirty = True
2018-06-20 20:20:22 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
serialisable_watchers = old_serialisable_info
try:
checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
2018-07-11 20:23:51 +00:00
tag_import_options = ClientImportOptions.TagImportOptions( is_default = True )
2018-06-20 20:20:22 +00:00
except:
checker_options = ClientImportOptions.CheckerOptions()
file_import_options = ClientImportOptions.FileImportOptions()
tag_import_options = ClientImportOptions.TagImportOptions()
serialisable_checker_options = checker_options.GetSerialisableTuple()
serialisable_file_import_options = file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = tag_import_options.GetSerialisableTuple()
highlighted_watcher_key = None
serialisable_highlighted_watcher_key = highlighted_watcher_key
new_serialisable_info = ( serialisable_watchers, serialisable_highlighted_watcher_key, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options )
return ( 2, new_serialisable_info )
2019-02-13 22:26:43 +00:00
def AddURL( self, url, service_keys_to_tags ):
2018-05-23 21:05:06 +00:00
if url == '':
2018-08-08 20:29:54 +00:00
return None
2018-05-23 21:05:06 +00:00
2019-02-13 22:26:43 +00:00
if service_keys_to_tags is None:
service_keys_to_tags = {}
2018-12-12 22:15:46 +00:00
url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
2018-05-23 21:05:06 +00:00
with self._lock:
2018-06-20 20:20:22 +00:00
for watcher in self._watchers:
2018-05-23 21:05:06 +00:00
2018-06-20 20:20:22 +00:00
if url == watcher.GetURL():
watcher_key = watcher.GetWatcherKey()
self._watcher_keys_to_already_in_timestamps[ watcher_key ] = HydrusData.GetNow()
2018-08-08 20:29:54 +00:00
return None
2018-06-20 20:20:22 +00:00
2018-05-23 21:05:06 +00:00
watcher = WatcherImport()
watcher.SetURL( url )
2018-06-20 20:20:22 +00:00
watcher.SetCheckerOptions( self._checker_options )
watcher.SetFileImportOptions( self._file_import_options )
watcher.SetTagImportOptions( self._tag_import_options )
2018-05-23 21:05:06 +00:00
publish_to_page = False
watcher.Start( self._page_key, publish_to_page )
self._AddWatcher( watcher )
2018-08-08 20:29:54 +00:00
return watcher
2018-05-23 21:05:06 +00:00
def AddWatcher( self, watcher ):
with self._lock:
self._AddWatcher( watcher )
self._SetDirty()
2018-06-20 20:20:22 +00:00
def GetHighlightedWatcher( self ):
with self._lock:
if self._highlighted_watcher_url is not None:
for watcher in self._watchers:
if watcher.GetURL() == self._highlighted_watcher_url:
return watcher
2018-08-01 20:44:57 +00:00
self._highlighted_watcher_url = None
2018-06-20 20:20:22 +00:00
return None
2018-08-08 20:29:54 +00:00
def GetLastTimeWatchersChanged( self ):
with self._lock:
return self._last_time_watchers_changed
2018-05-30 20:13:21 +00:00
def GetNumDead( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-05-30 20:13:21 +00:00
return len( [ watcher for watcher in self._watchers if watcher.IsDead() ] )
2018-05-23 21:05:06 +00:00
2018-08-08 20:29:54 +00:00
def GetNumWatchers( self ):
with self._lock:
return len( self._watchers )
2018-06-20 20:20:22 +00:00
def GetOptions( self ):
with self._lock:
return ( self._checker_options, self._file_import_options, self._tag_import_options )
2018-05-23 21:05:06 +00:00
def GetTotalStatus( self ):
with self._lock:
if self._status_dirty:
self._RegenerateStatus()
return self._status_cache
def GetValueRange( self ):
with self._lock:
total_value = 0
total_range = 0
for watcher in self._watchers:
( value, range ) = watcher.GetValueRange()
if value != range:
total_value += value
total_range += range
return ( total_value, total_range )
def GetWatchers( self ):
with self._lock:
return list( self._watchers )
2018-06-20 20:20:22 +00:00
def GetWatcherSimpleStatus( self, watcher ):
with self._lock:
watcher_key = watcher.GetWatcherKey()
if watcher_key in self._watcher_keys_to_added_timestamps:
added_timestamp = self._watcher_keys_to_added_timestamps[ watcher_key ]
if HydrusData.TimeHasPassed( added_timestamp + self.ADDED_TIMESTAMP_DURATION ):
self._CleanAddedTimestamps()
else:
return 'just added'
if watcher_key in self._watcher_keys_to_already_in_timestamps:
already_in_timestamp = self._watcher_keys_to_already_in_timestamps[ watcher_key ]
if HydrusData.TimeHasPassed( already_in_timestamp + self.ADDED_TIMESTAMP_DURATION ):
self._CleanAddedTimestamps()
else:
return 'already watching'
return watcher.GetSimpleStatus()
2018-05-23 21:05:06 +00:00
def RemoveWatcher( self, watcher_key ):
with self._lock:
self._RemoveWatcher( watcher_key )
self._SetDirty()
2018-06-20 20:20:22 +00:00
def SetHighlightedWatcher( self, highlighted_watcher ):
with self._lock:
if highlighted_watcher is None:
self._highlighted_watcher_url = None
else:
self._highlighted_watcher_url = highlighted_watcher.GetURL()
2018-08-01 20:44:57 +00:00
highlighted_watcher.PublishToPage( True )
2018-06-20 20:20:22 +00:00
def SetOptions( self, checker_options, file_import_options, tag_import_options ):
with self._lock:
self._checker_options = checker_options
self._file_import_options = file_import_options
self._tag_import_options = tag_import_options
2018-05-23 21:05:06 +00:00
def Start( self, page_key ):
with self._lock:
self._page_key = page_key
2018-05-30 20:13:21 +00:00
# set a 2s period so the page value/range is breddy snappy
self._watchers_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), 2.0, self.REPEATINGWorkOnWatchers )
2018-05-23 21:05:06 +00:00
for watcher in self._watchers:
2018-08-01 20:44:57 +00:00
publish_to_page = False
if self._highlighted_watcher_url is not None and watcher.GetURL() == self._highlighted_watcher_url:
publish_to_page = True
2018-05-23 21:05:06 +00:00
watcher.Start( page_key, publish_to_page )
def REPEATINGWorkOnWatchers( self ):
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._watchers_repeating_job.Cancel()
return
if not self._status_dirty: # if we think we are clean
for watcher in self._watchers:
2018-06-27 19:27:05 +00:00
file_seed_cache = watcher.GetFileSeedCache()
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed_cache.GetStatusGenerationTime() > self._status_cache_generation_time: # has there has been an update?
2018-05-23 21:05:06 +00:00
self._SetDirty()
break
if HydrusData.TimeHasPassed( self._next_pub_value_check_time ):
self._next_pub_value_check_time = HydrusData.GetNow() + 5
current_value_range = self.GetValueRange()
if current_value_range != self._last_pubbed_value_range:
self._last_pubbed_value_range = current_value_range
HG.client_controller.pub( 'refresh_page_name', self._page_key )
# something like:
# if any are dead, do some stuff with them based on some options here
# might want to have this work on a 30s period or something
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_MULTIPLE_WATCHER_IMPORT ] = MultipleWatcherImport
class WatcherImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT
SERIALISABLE_NAME = 'Watcher'
2018-07-04 20:48:28 +00:00
SERIALISABLE_VERSION = 6
2018-05-23 21:05:06 +00:00
MIN_CHECK_PERIOD = 30
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._page_key = 'initialising page key'
self._publish_to_page = False
self._url = ''
2018-07-04 20:48:28 +00:00
self._gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
2018-06-27 19:27:05 +00:00
self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
self._urls_to_filenames = {}
self._urls_to_md5_base64 = {}
2018-07-11 20:23:51 +00:00
self._checker_options = HG.client_controller.new_options.GetDefaultWatcherCheckerOptions()
self._file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
self._tag_import_options = ClientImportOptions.TagImportOptions( is_default = True )
2018-05-23 21:05:06 +00:00
self._last_check_time = 0
self._checking_status = ClientImporting.CHECKER_STATUS_OK
self._subject = 'unknown subject'
self._next_check_time = None
2018-08-01 20:44:57 +00:00
self._file_network_job = None
self._checker_network_job = None
2018-05-23 21:05:06 +00:00
self._check_now = False
self._files_paused = False
self._checking_paused = False
self._no_work_until = 0
self._no_work_until_reason = ''
2018-06-20 20:20:22 +00:00
self._creation_time = HydrusData.GetNow()
2018-05-23 21:05:06 +00:00
self._file_velocity_status = ''
self._current_action = ''
self._watcher_status = ''
self._watcher_key = HydrusData.GenerateKey()
self._lock = threading.Lock()
self._last_pubbed_page_name = ''
self._files_repeating_job = None
self._checker_repeating_job = None
2018-06-27 19:27:05 +00:00
HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' )
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def _CheckerNetworkJobPresentationContextFactory( self, network_job ):
def enter_call():
with self._lock:
self._checker_network_job = network_job
def exit_call():
with self._lock:
self._checker_network_job = None
return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call )
2018-05-23 21:05:06 +00:00
def _CheckWatchableURL( self ):
2018-08-22 21:10:59 +00:00
def file_seeds_callable( file_seeds ):
return ClientImporting.UpdateFileSeedCacheWithFileSeeds( self._file_seed_cache, file_seeds )
2018-07-11 20:23:51 +00:00
def status_hook( text ):
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
self._watcher_status = text
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
def title_hook( text ):
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
self._subject = text
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
gallery_seed = ClientImportGallerySeeds.GallerySeed( self._url, can_generate_more_pages = False )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
self._gallery_seed_log.AddGallerySeeds( ( gallery_seed, ) )
2018-05-23 21:05:06 +00:00
2018-08-22 21:10:59 +00:00
with self._lock:
self._watcher_status = 'checking'
2018-05-23 21:05:06 +00:00
try:
2018-09-05 20:52:32 +00:00
( num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason ) = gallery_seed.WorkOnURL( 'watcher', self._gallery_seed_log, file_seeds_callable, status_hook, title_hook, self._NetworkJobFactory, self._CheckerNetworkJobPresentationContextFactory, self._file_import_options )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
if num_urls_added > 0:
2018-05-23 21:05:06 +00:00
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2018-07-11 20:23:51 +00:00
if result_404:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2018-08-08 20:29:54 +00:00
self._checking_paused = True
2018-07-11 20:23:51 +00:00
self._checking_status = ClientImporting.CHECKER_STATUS_404
2018-05-23 21:05:06 +00:00
2018-07-18 21:07:15 +00:00
if gallery_seed.status == CC.STATUS_ERROR:
# the [DEAD] stuff can override watcher status, so let's give a brief time for this to display the error
with self._lock:
self._checking_paused = True
self._watcher_status = gallery_seed.note
time.sleep( 5 )
2018-05-23 21:05:06 +00:00
except HydrusExceptions.NetworkException as e:
2018-11-28 22:31:04 +00:00
delay = HG.client_controller.new_options.GetInteger( 'downloader_network_error_delay' )
2019-01-09 22:59:03 +00:00
self._DelayWork( delay, str( e ) )
2018-05-23 21:05:06 +00:00
HydrusData.PrintException( e )
2018-07-11 20:23:51 +00:00
watcher_status = gallery_seed.note
watcher_status_should_stick = gallery_seed.status != CC.STATUS_SUCCESSFUL_AND_NEW
2018-07-18 21:07:15 +00:00
with self._lock:
if self._check_now:
self._check_now = False
self._watcher_status = watcher_status
self._last_check_time = HydrusData.GetNow()
self._UpdateFileVelocityStatus()
self._UpdateNextCheckTime()
2018-08-22 21:10:59 +00:00
self._Compact()
2018-07-18 21:07:15 +00:00
if not watcher_status_should_stick:
time.sleep( 5 )
with self._lock:
self._watcher_status = ''
2018-05-23 21:05:06 +00:00
2018-08-22 21:10:59 +00:00
def _Compact( self ):
death_period = self._checker_options.GetDeathFileVelocityPeriod()
compact_before_this_time = self._last_check_time - ( death_period * 2 )
self._gallery_seed_log.Compact( compact_before_this_time )
2018-05-23 21:05:06 +00:00
def _DelayWork( self, time_delta, reason ):
self._no_work_until = HydrusData.GetNow() + time_delta
self._no_work_until_reason = reason
def _FileNetworkJobPresentationContextFactory( self, network_job ):
def enter_call():
with self._lock:
2018-08-01 20:44:57 +00:00
self._file_network_job = network_job
2018-05-23 21:05:06 +00:00
def exit_call():
with self._lock:
2018-08-01 20:44:57 +00:00
self._file_network_job = None
2018-05-23 21:05:06 +00:00
return ClientImporting.NetworkJobPresentationContext( enter_call, exit_call )
2018-08-15 20:40:30 +00:00
def _NetworkJobFactory( self, *args, **kwargs ):
network_job = ClientNetworkingJobs.NetworkJobWatcherPage( self._watcher_key, *args, **kwargs )
return network_job
2018-05-23 21:05:06 +00:00
def _GetSerialisableInfo( self ):
2018-07-04 20:48:28 +00:00
serialisable_gallery_seed_log = self._gallery_seed_log.GetSerialisableTuple()
2018-06-27 19:27:05 +00:00
serialisable_file_seed_cache = self._file_seed_cache.GetSerialisableTuple()
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
serialisable_checker_options = self._checker_options.GetSerialisableTuple()
2018-07-11 20:23:51 +00:00
serialisable_file_import_options = self._file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple()
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
return ( self._url, serialisable_gallery_seed_log, serialisable_file_seed_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, self._last_check_time, self._files_paused, self._checking_paused, self._checking_status, self._subject, self._no_work_until, self._no_work_until_reason, self._creation_time )
2018-05-23 21:05:06 +00:00
def _HasURL( self ):
return self._url != ''
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-07-11 20:23:51 +00:00
( self._url, serialisable_gallery_seed_log, serialisable_file_seed_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, self._last_check_time, self._files_paused, self._checking_paused, self._checking_status, self._subject, self._no_work_until, self._no_work_until_reason, self._creation_time ) = serialisable_info
2018-05-23 21:05:06 +00:00
2018-07-04 20:48:28 +00:00
self._gallery_seed_log = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_seed_log )
2018-06-27 19:27:05 +00:00
self._file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache )
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
self._checker_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_checker_options )
2018-07-11 20:23:51 +00:00
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options )
self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options )
2018-05-23 21:05:06 +00:00
def _UpdateFileVelocityStatus( self ):
2018-06-27 19:27:05 +00:00
self._file_velocity_status = self._checker_options.GetPrettyCurrentVelocity( self._file_seed_cache, self._last_check_time )
2018-05-23 21:05:06 +00:00
def _UpdateNextCheckTime( self ):
if self._check_now:
self._next_check_time = self._last_check_time + self.MIN_CHECK_PERIOD
else:
if not HydrusData.TimeHasPassed( self._no_work_until ):
self._next_check_time = self._no_work_until + 1
else:
2018-08-08 20:29:54 +00:00
if self._checking_status == ClientImporting.CHECKER_STATUS_OK:
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if self._checker_options.IsDead( self._file_seed_cache, self._last_check_time ):
2018-05-23 21:05:06 +00:00
self._checking_status = ClientImporting.CHECKER_STATUS_DEAD
2018-08-08 20:29:54 +00:00
if self._checking_status != ClientImporting.CHECKER_STATUS_OK:
self._checking_paused = True
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
last_next_check_time = self._next_check_time
self._next_check_time = self._checker_options.GetNextCheckTime( self._file_seed_cache, self._last_check_time, last_next_check_time )
2018-05-23 21:05:06 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_file_import_options, serialisable_tag_import_options, times_to_check, check_period, last_check_time, paused ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
checker_options = ClientImportOptions.CheckerOptions( intended_files_per_check = 8, never_faster_than = 300, never_slower_than = 86400, death_file_velocity = ( 1, 86400 ) )
serialisable_checker_options = checker_options.GetSerialisableTuple()
files_paused = paused
checking_paused = paused
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused )
2018-05-23 21:05:06 +00:00
return ( 2, new_serialisable_info )
if version == 2:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
checking_status = ClientImporting.CHECKER_STATUS_OK
subject = 'unknown subject'
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject )
2018-05-23 21:05:06 +00:00
return ( 3, new_serialisable_info )
if version == 3:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject ) = old_serialisable_info
2018-05-23 21:05:06 +00:00
no_work_until = 0
no_work_until_reason = ''
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason )
2018-05-23 21:05:06 +00:00
return ( 4, new_serialisable_info )
2018-06-20 20:20:22 +00:00
if version == 4:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason ) = old_serialisable_info
2018-06-20 20:20:22 +00:00
creation_time = HydrusData.GetNow()
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time )
2018-06-20 20:20:22 +00:00
return ( 5, new_serialisable_info )
2018-07-04 20:48:28 +00:00
if version == 5:
2018-07-11 20:23:51 +00:00
( url, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time ) = old_serialisable_info
2018-07-04 20:48:28 +00:00
gallery_seed_log = ClientImportGallerySeeds.GallerySeedLog()
serialisable_gallery_seed_log = gallery_seed_log.GetSerialisableTuple()
2018-07-11 20:23:51 +00:00
new_serialisable_info = ( url, serialisable_gallery_seed_log, serialisable_file_seed_cache, urls_to_filenames, urls_to_md5_base64, serialisable_checker_options, serialisable_file_import_options, serialisable_tag_import_options, last_check_time, files_paused, checking_paused, checking_status, subject, no_work_until, no_work_until_reason, creation_time )
2018-07-04 20:48:28 +00:00
return ( 6, new_serialisable_info )
2018-05-23 21:05:06 +00:00
def _WorkOnFiles( self ):
2018-06-27 19:27:05 +00:00
file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN )
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed is None:
2018-05-23 21:05:06 +00:00
return
did_substantial_work = False
2018-07-11 20:23:51 +00:00
def status_hook( text ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-07-11 20:23:51 +00:00
self._current_action = text
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
2018-08-15 20:40:30 +00:00
did_substantial_work = file_seed.WorkOnURL( self._file_seed_cache, status_hook, self._NetworkJobFactory, self._FileNetworkJobPresentationContextFactory, self._file_import_options, self._tag_import_options )
2018-07-11 20:23:51 +00:00
with self._lock:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
should_present = self._publish_to_page and file_seed.ShouldPresent( self._file_import_options )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
page_key = self._page_key
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
if should_present:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
file_seed.PresentToPage( page_key )
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
did_substantial_work = True
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
with self._lock:
2018-05-23 21:05:06 +00:00
2018-07-11 20:23:51 +00:00
self._current_action = ''
2018-05-23 21:05:06 +00:00
if did_substantial_work:
time.sleep( ClientImporting.DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME )
2018-10-03 21:00:15 +00:00
def CanRetryFailed( self ):
with self._lock:
return self._file_seed_cache.GetFileSeedCount( CC.STATUS_ERROR ) > 0
2018-08-01 20:44:57 +00:00
def CheckingPaused( self ):
with self._lock:
return self._checking_paused
2018-05-23 21:05:06 +00:00
def CheckNow( self ):
with self._lock:
self._check_now = True
self._checking_paused = False
self._no_work_until = 0
self._no_work_until_reason = ''
self._checking_status = ClientImporting.CHECKER_STATUS_OK
self._UpdateNextCheckTime()
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def CurrentlyAlive( self ):
with self._lock:
return self._checking_status == ClientImporting.CHECKER_STATUS_OK
def CurrentlyWorking( self ):
with self._lock:
2018-06-27 19:27:05 +00:00
finished = not self._file_seed_cache.WorkToDo()
2018-05-23 21:05:06 +00:00
return not finished and not self._files_paused
2018-08-01 20:44:57 +00:00
def FilesPaused( self ):
with self._lock:
return self._files_paused
2018-05-23 21:05:06 +00:00
def GetCheckerOptions( self ):
with self._lock:
return self._checker_options
2018-06-20 20:20:22 +00:00
def GetCreationTime( self ):
with self._lock:
return self._creation_time
2018-08-01 20:44:57 +00:00
def GetFileImportOptions( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-08-01 20:44:57 +00:00
return self._file_import_options
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def GetFileSeedCache( self ):
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
with self._lock:
return self._file_seed_cache
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
def GetGallerySeedLog( self ):
2018-05-23 21:05:06 +00:00
2018-08-01 20:44:57 +00:00
with self._lock:
return self._gallery_seed_log
2018-05-23 21:05:06 +00:00
2018-11-21 22:22:36 +00:00
def GetHashes( self ):
with self._lock:
return self._file_seed_cache.GetHashes()
2018-08-01 20:44:57 +00:00
def GetNetworkJobs( self ):
with self._lock:
return ( self._file_network_job, self._checker_network_job )
2018-11-21 22:22:36 +00:00
def GetNewHashes( self ):
with self._lock:
file_import_options = ClientImportOptions.FileImportOptions()
file_import_options.SetPresentationOptions( True, False, False )
return self._file_seed_cache.GetPresentedHashes( file_import_options )
2018-08-01 20:44:57 +00:00
def GetOptions( self ):
with self._lock:
return ( self._url, self._file_import_options, self._tag_import_options )
2018-07-04 20:48:28 +00:00
2018-08-01 20:44:57 +00:00
def GetPresentedHashes( self ):
with self._lock:
return self._file_seed_cache.GetPresentedHashes( self._file_import_options )
2018-07-04 20:48:28 +00:00
2018-05-23 21:05:06 +00:00
def GetSimpleStatus( self ):
with self._lock:
if self._checking_status == ClientImporting.CHECKER_STATUS_404:
return '404'
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
return 'DEAD'
2018-10-17 21:00:09 +00:00
elif not HydrusData.TimeHasPassed( self._no_work_until ):
return self._no_work_until_reason + ' - ' + 'next check ' + HydrusData.TimestampToPrettyTimeDelta( self._next_check_time )
2018-05-23 21:05:06 +00:00
else:
return ''
def GetStatus( self ):
with self._lock:
2018-10-17 21:00:09 +00:00
current_action = self._current_action
2018-05-30 20:13:21 +00:00
if self._checking_status == ClientImporting.CHECKER_STATUS_404:
2018-05-23 21:05:06 +00:00
watcher_status = 'URL 404'
elif self._checking_status == ClientImporting.CHECKER_STATUS_DEAD:
watcher_status = 'URL DEAD'
2018-05-30 20:13:21 +00:00
elif not HydrusData.TimeHasPassed( self._no_work_until ):
2018-10-17 21:00:09 +00:00
no_work_text = self._no_work_until_reason + ' - ' + 'next check ' + HydrusData.TimestampToPrettyTimeDelta( self._next_check_time )
current_action = no_work_text
watcher_status = no_work_text
2018-05-30 20:13:21 +00:00
2018-05-23 21:05:06 +00:00
else:
watcher_status = self._watcher_status
2018-10-17 21:00:09 +00:00
return ( current_action, self._files_paused, self._file_velocity_status, self._next_check_time, watcher_status, self._subject, self._checking_status, self._check_now, self._checking_paused )
2018-05-23 21:05:06 +00:00
def GetSubject( self ):
with self._lock:
if self._subject in ( None, '' ):
return 'unknown subject'
else:
return self._subject
2018-08-01 20:44:57 +00:00
def GetTagImportOptions( self ):
with self._lock:
return self._tag_import_options
2018-05-23 21:05:06 +00:00
def GetWatcherKey( self ):
with self._lock:
return self._watcher_key
def GetURL( self ):
with self._lock:
return self._url
def GetValueRange( self ):
with self._lock:
2018-06-27 19:27:05 +00:00
return self._file_seed_cache.GetValueRange()
2018-05-23 21:05:06 +00:00
def HasURL( self ):
with self._lock:
return self._HasURL()
2018-05-30 20:13:21 +00:00
def _IsDead( self ):
return self._checking_status in ( ClientImporting.CHECKER_STATUS_404, ClientImporting.CHECKER_STATUS_DEAD )
2018-05-23 21:05:06 +00:00
def IsDead( self ):
with self._lock:
2018-05-30 20:13:21 +00:00
return self._IsDead()
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
def NotifyFileSeedsUpdated( self, file_seed_cache_key, file_seeds ):
2018-05-23 21:05:06 +00:00
2018-06-27 19:27:05 +00:00
if file_seed_cache_key == self._file_seed_cache.GetFileSeedCacheKey():
2018-05-23 21:05:06 +00:00
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2018-08-01 20:44:57 +00:00
def PausePlayChecking( self ):
2018-05-23 21:05:06 +00:00
with self._lock:
2018-05-30 20:13:21 +00:00
if self._checking_paused and self._IsDead():
2018-05-23 21:05:06 +00:00
return # watcher is dead, so don't unpause until a checknow event
else:
self._checking_paused = not self._checking_paused
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def PausePlayFiles( self ):
with self._lock:
self._files_paused = not self._files_paused
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2018-08-01 20:44:57 +00:00
def PublishToPage( self, publish_to_page ):
2018-05-30 20:13:21 +00:00
with self._lock:
2018-08-01 20:44:57 +00:00
self._publish_to_page = publish_to_page
2018-05-30 20:13:21 +00:00
2018-08-01 20:44:57 +00:00
def Repage( self, page_key ):
2018-05-23 21:05:06 +00:00
with self._lock:
self._page_key = page_key
2018-10-03 21:00:15 +00:00
def RetryFailed( self ):
with self._lock:
self._file_seed_cache.RetryFailures()
2018-06-20 20:20:22 +00:00
def SetCheckerOptions( self, checker_options ):
with self._lock:
self._checker_options = checker_options
self._UpdateNextCheckTime()
self._UpdateFileVelocityStatus()
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
2018-05-23 21:05:06 +00:00
def SetFileImportOptions( self, file_import_options ):
with self._lock:
self._file_import_options = file_import_options
def SetTagImportOptions( self, tag_import_options ):
with self._lock:
self._tag_import_options = tag_import_options
def SetURL( self, url ):
if url is None:
url = ''
if url != '':
url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
with self._lock:
self._url = url
ClientImporting.WakeRepeatingJob( self._checker_repeating_job )
def Start( self, page_key, publish_to_page ):
self._page_key = page_key
self._publish_to_page = publish_to_page
self._UpdateNextCheckTime()
self._UpdateFileVelocityStatus()
self._files_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnFiles )
self._checker_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnChecker )
2019-01-16 22:40:53 +00:00
self._files_repeating_job.SetThreadSlotType( 'watcher_files' )
self._checker_repeating_job.SetThreadSlotType( 'watcher_check' )
2018-05-23 21:05:06 +00:00
def REPEATINGWorkOnFiles( self ):
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._files_repeating_job.Cancel()
return
2018-10-31 21:41:14 +00:00
files_paused = self._files_paused or HG.client_controller.new_options.GetBoolean( 'pause_all_file_queues' )
work_pending = self._file_seed_cache.WorkToDo() and not files_paused
2018-07-18 21:07:15 +00:00
no_delays = HydrusData.TimeHasPassed( self._no_work_until )
page_shown = not HG.client_controller.PageClosedButNotDestroyed( self._page_key )
2018-09-19 21:54:51 +00:00
network_engine_good = not HG.client_controller.network_engine.IsBusy()
2018-07-18 21:07:15 +00:00
2018-09-19 21:54:51 +00:00
ok_to_work = work_pending and no_delays and page_shown and network_engine_good
2018-05-23 21:05:06 +00:00
2018-07-18 21:07:15 +00:00
while ok_to_work:
2018-05-23 21:05:06 +00:00
try:
self._WorkOnFiles()
HG.client_controller.WaitUntilViewFree()
except Exception as e:
HydrusData.ShowException( e )
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._files_repeating_job.Cancel()
return
2018-10-31 21:41:14 +00:00
files_paused = self._files_paused or HG.client_controller.new_options.GetBoolean( 'pause_all_file_queues' )
work_pending = self._file_seed_cache.WorkToDo() and not files_paused
2018-07-18 21:07:15 +00:00
no_delays = HydrusData.TimeHasPassed( self._no_work_until )
page_shown = not HG.client_controller.PageClosedButNotDestroyed( self._page_key )
2018-09-19 21:54:51 +00:00
network_engine_good = not HG.client_controller.network_engine.IsBusy()
2018-07-18 21:07:15 +00:00
2018-09-19 21:54:51 +00:00
ok_to_work = work_pending and no_delays and page_shown and network_engine_good
2018-05-23 21:05:06 +00:00
def REPEATINGWorkOnChecker( self ):
with self._lock:
if ClientImporting.PageImporterShouldStopWorking( self._page_key ):
self._checker_repeating_job.Cancel()
return
2018-10-31 21:41:14 +00:00
checking_paused = self._checking_paused or HG.client_controller.new_options.GetBoolean( 'pause_all_watcher_checkers' )
able_to_check = self._checking_status == ClientImporting.CHECKER_STATUS_OK and self._HasURL() and not checking_paused
2018-05-23 21:05:06 +00:00
check_due = HydrusData.TimeHasPassed( self._next_check_time )
no_delays = HydrusData.TimeHasPassed( self._no_work_until )
page_shown = not HG.client_controller.PageClosedButNotDestroyed( self._page_key )
2018-09-19 21:54:51 +00:00
network_engine_good = not HG.client_controller.network_engine.IsBusy()
2018-05-23 21:05:06 +00:00
2018-09-19 21:54:51 +00:00
time_to_check = able_to_check and check_due and no_delays and page_shown and network_engine_good
2018-05-23 21:05:06 +00:00
if time_to_check:
try:
self._CheckWatchableURL()
except Exception as e:
HydrusData.ShowException( e )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_WATCHER_IMPORT ] = WatcherImport