hydrus/hydrus/client/importing/ClientImportGallerySeeds.py

1149 lines
37 KiB
Python

import collections
import itertools
import os
import random
import threading
import time
import traceback
import typing
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusSerialisable
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientParsing
from hydrus.client.importing import ClientImporting
from hydrus.client.metadata import ClientTags
def GenerateGallerySeedLogStatus( statuses_to_counts ):
num_successful = statuses_to_counts[ CC.STATUS_SUCCESSFUL_AND_NEW ]
num_ignored = statuses_to_counts[ CC.STATUS_VETOED ]
num_failed = statuses_to_counts[ CC.STATUS_ERROR ]
num_skipped = statuses_to_counts[ CC.STATUS_SKIPPED ]
num_unknown = statuses_to_counts[ CC.STATUS_UNKNOWN ]
# add some kind of '(512 files found (so far))', which may be asking too much here
# might be this is complicated and needs to be (partly) done in the object, which will know if it is paused or whatever.
status_strings = []
if num_successful > 0:
s = HydrusData.ToHumanInt( num_successful ) + ' successful'
status_strings.append( s )
if num_ignored > 0:
status_strings.append( HydrusData.ToHumanInt( num_ignored ) + ' ignored' )
if num_failed > 0:
status_strings.append( HydrusData.ToHumanInt( num_failed ) + ' failed' )
if num_skipped > 0:
status_strings.append( HydrusData.ToHumanInt( num_skipped ) + ' skipped' )
if num_unknown > 0:
status_strings.append( HydrusData.ToHumanInt( num_unknown ) + ' pending' )
status = ', '.join( status_strings )
total = sum( statuses_to_counts.values() )
total_processed = total - num_unknown
return ( status, ( total_processed, total ) )
class GallerySeed( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_SEED
SERIALISABLE_NAME = 'Gallery Log Entry'
SERIALISABLE_VERSION = 3
def __init__( self, url = None, can_generate_more_pages = True ):
if url is None:
url = 'https://nostrils-central.cx/index.php?post=s&tag=hyper_nostrils&page=3'
else:
try:
url = HG.client_controller.network_engine.domain_manager.NormaliseURL( url )
except HydrusExceptions.URLClassException:
pass
HydrusSerialisable.SerialisableBase.__init__( self )
self.url = url
self._can_generate_more_pages = can_generate_more_pages
self._external_filterable_tags = set()
self._external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags()
self.created = HydrusData.GetNow()
self.modified = self.created
self.status = CC.STATUS_UNKNOWN
self.note = ''
self._referral_url = None
self._force_next_page_url_generation = False
self._run_token = HydrusData.GenerateKey()
def __eq__( self, other ):
if isinstance( other, GallerySeed ):
return self.__hash__() == other.__hash__()
return NotImplemented
def __hash__( self ):
return ( self.url, self._run_token ).__hash__()
def __ne__( self, other ):
return self.__hash__() != other.__hash__()
def _GetSerialisableInfo( self ):
serialisable_external_filterable_tags = list( self._external_filterable_tags )
serialisable_external_additional_service_keys_to_tags = self._external_additional_service_keys_to_tags.GetSerialisableTuple()
return (
self.url,
self._can_generate_more_pages,
serialisable_external_filterable_tags,
serialisable_external_additional_service_keys_to_tags,
self.created,
self.modified,
self.status,
self.note,
self._referral_url
)
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
(
self.url,
self._can_generate_more_pages,
serialisable_external_filterable_tags,
serialisable_external_additional_service_keys_to_tags,
self.created,
self.modified,
self.status,
self.note,
self._referral_url
) = serialisable_info
self._external_filterable_tags = set( serialisable_external_filterable_tags )
self._external_additional_service_keys_to_tags = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_external_additional_service_keys_to_tags )
def _UpdateModified( self ):
self.modified = HydrusData.GetNow()
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( url, can_generate_more_pages, created, modified, status, note, referral_url ) = old_serialisable_info
external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags()
serialisable_external_additional_service_keys_to_tags = external_additional_service_keys_to_tags.GetSerialisableTuple()
new_serialisable_info = ( url, can_generate_more_pages, serialisable_external_additional_service_keys_to_tags, created, modified, status, note, referral_url )
return ( 2, new_serialisable_info )
if version == 2:
( url, can_generate_more_pages, serialisable_external_additional_service_keys_to_tags, created, modified, status, note, referral_url ) = old_serialisable_info
external_filterable_tags = set()
serialisable_external_filterable_tags = list( external_filterable_tags )
new_serialisable_info = ( url, can_generate_more_pages, serialisable_external_filterable_tags, serialisable_external_additional_service_keys_to_tags, created, modified, status, note, referral_url )
return ( 3, new_serialisable_info )
def ForceNextPageURLGeneration( self ):
self._force_next_page_url_generation = True
def GenerateRestartedDuplicate( self, can_generate_more_pages ):
gallery_seed = GallerySeed( url = self.url, can_generate_more_pages = can_generate_more_pages )
if can_generate_more_pages:
gallery_seed.ForceNextPageURLGeneration()
return gallery_seed
def GetAPIInfoDict( self, simple ):
d = {}
d[ 'url' ] = self.url
d[ 'created' ] = self.created
d[ 'modified' ] = self.modified
d[ 'status' ] = self.status
d[ 'note' ] = self.note
return d
def GetExampleNetworkJob( self, network_job_factory ):
try:
( url_to_check, parser ) = HG.client_controller.network_engine.domain_manager.GetURLToFetchAndParser( self.url )
except HydrusExceptions.URLClassException:
url_to_check = self.url
network_job = network_job_factory( 'GET', url_to_check )
return network_job
def SetExternalAdditionalServiceKeysToTags( self, service_keys_to_tags ):
self._external_additional_service_keys_to_tags = ClientTags.ServiceKeysToTags( service_keys_to_tags )
def SetExternalFilterableTags( self, tags ):
self._external_filterable_tags = set( tags )
def SetReferralURL( self, referral_url ):
self._referral_url = referral_url
def SetRunToken( self, run_token: bytes ):
self._run_token = run_token
def SetStatus( self, status, note = '', exception = None ):
if exception is not None:
first_line = str( exception ).split( os.linesep )[0]
note = first_line + '\u2026 (Copy note to see full error)'
note += os.linesep
note += traceback.format_exc()
HydrusData.Print( 'Error when processing ' + self.url + ' !' )
HydrusData.Print( traceback.format_exc() )
self.status = status
self.note = note
self._UpdateModified()
def WorksInNewSystem( self ):
( url_type, match_name, can_parse ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( self.url )
if url_type == HC.URL_TYPE_GALLERY and can_parse:
return True
return False
def WorkOnURL( self, gallery_token_name, gallery_seed_log, file_seeds_callable, status_hook, title_hook, network_job_factory, network_job_presentation_context_factory, file_import_options, gallery_urls_seen_before = None ):
if gallery_urls_seen_before is None:
gallery_urls_seen_before = set()
gallery_urls_seen_before.add( self.url )
# maybe something like 'append urls' vs 'reverse-prepend' for subs or something
# should also take--and populate--a set of urls we have seen this 'run', so we can bomb out if next_gallery_url ends up in some loop
num_urls_added = 0
num_urls_already_in_file_seed_cache = 0
num_urls_total = 0
result_404 = False
added_new_gallery_pages = False
stop_reason = ''
try:
gallery_url = self.url
( url_type, match_name, can_parse ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( gallery_url )
if url_type not in ( HC.URL_TYPE_GALLERY, HC.URL_TYPE_WATCHABLE ):
raise HydrusExceptions.VetoException( 'Did not recognise this as a gallery or watchable URL!' )
if not can_parse:
raise HydrusExceptions.VetoException( 'Did not have a parser for this URL!' )
( url_to_check, parser ) = HG.client_controller.network_engine.domain_manager.GetURLToFetchAndParser( gallery_url )
status_hook( 'downloading gallery page' )
if self._referral_url not in ( gallery_url, url_to_check ):
referral_url = self._referral_url
else:
referral_url = None
network_job = network_job_factory( 'GET', url_to_check, referral_url = referral_url )
network_job.SetGalleryToken( gallery_token_name )
network_job.OverrideBandwidth( 30 )
HG.client_controller.network_engine.AddJob( network_job )
with network_job_presentation_context_factory( network_job ) as njpc:
network_job.WaitUntilDone()
parsing_text = network_job.GetContentText()
actual_fetched_url = network_job.GetActualFetchedURL()
do_parse = True
if actual_fetched_url != url_to_check:
( url_type, match_name, can_parse ) = HG.client_controller.network_engine.domain_manager.GetURLParseCapability( actual_fetched_url )
if url_type == HC.URL_TYPE_GALLERY:
if can_parse:
gallery_url = actual_fetched_url
( url_to_check, parser ) = HG.client_controller.network_engine.domain_manager.GetURLToFetchAndParser( gallery_url )
else:
do_parse = False
from hydrus.client.importing import ClientImportFileSeeds
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, actual_fetched_url )
file_seed.SetReferralURL( gallery_url )
file_seeds = [ file_seed ]
all_parse_results = []
if do_parse:
parsing_context = {}
parsing_context[ 'gallery_url' ] = gallery_url
parsing_context[ 'url' ] = url_to_check
parsing_context[ 'post_index' ] = '0'
all_parse_results = parser.Parse( parsing_context, parsing_text )
if len( all_parse_results ) == 0:
raise HydrusExceptions.VetoException( 'The parser found nothing in the document!' )
file_seeds = ClientImporting.ConvertAllParseResultsToFileSeeds( all_parse_results, gallery_url, file_import_options )
title = ClientParsing.GetTitleFromAllParseResults( all_parse_results )
if title is not None:
title_hook( title )
for file_seed in file_seeds:
file_seed.SetExternalFilterableTags( self._external_filterable_tags )
file_seed.SetExternalAdditionalServiceKeysToTags( self._external_additional_service_keys_to_tags )
num_urls_total = len( file_seeds )
( num_urls_added, num_urls_already_in_file_seed_cache, can_search_for_more_files, stop_reason ) = file_seeds_callable( file_seeds )
status = CC.STATUS_SUCCESSFUL_AND_NEW
if do_parse:
note = HydrusData.ToHumanInt( num_urls_added ) + ' new urls found'
else:
note = 'was redirected to a non-gallery url, which has been queued as a file import'
if num_urls_already_in_file_seed_cache > 0:
note += ' (' + HydrusData.ToHumanInt( num_urls_already_in_file_seed_cache ) + ' of page already in)'
if not can_search_for_more_files:
note += ' - ' + stop_reason
if parser.CanOnlyGenerateGalleryURLs() or self._force_next_page_url_generation:
can_add_more_gallery_urls = True
else:
# only keep searching if we found any files, otherwise this could be a blank results page with another stub page
can_add_more_gallery_urls = num_urls_added > 0 and can_search_for_more_files
flattened_results = list( itertools.chain.from_iterable( all_parse_results ) )
sub_gallery_urls = ClientParsing.GetURLsFromParseResults( flattened_results, ( HC.URL_TYPE_SUB_GALLERY, ), only_get_top_priority = True )
sub_gallery_urls = HydrusData.DedupeList( sub_gallery_urls )
new_sub_gallery_urls = [ sub_gallery_url for sub_gallery_url in sub_gallery_urls if sub_gallery_url not in gallery_urls_seen_before ]
num_new_sub_gallery_urls = len( new_sub_gallery_urls )
if num_new_sub_gallery_urls > 0:
sub_gallery_seeds = [ GallerySeed( sub_gallery_url ) for sub_gallery_url in new_sub_gallery_urls ]
for sub_gallery_seed in sub_gallery_seeds:
sub_gallery_seed.SetRunToken( self._run_token )
sub_gallery_seed.SetExternalFilterableTags( self._external_filterable_tags )
sub_gallery_seed.SetExternalAdditionalServiceKeysToTags( self._external_additional_service_keys_to_tags )
gallery_seed_log.AddGallerySeeds( sub_gallery_seeds )
added_new_gallery_pages = True
gallery_urls_seen_before.update( sub_gallery_urls )
note += ' - {} sub-gallery urls found'.format( HydrusData.ToHumanInt( num_new_sub_gallery_urls ) )
if self._can_generate_more_pages and can_add_more_gallery_urls:
next_page_urls = ClientParsing.GetURLsFromParseResults( flattened_results, ( HC.URL_TYPE_NEXT, ), only_get_top_priority = True )
if self.url in next_page_urls:
next_page_urls.remove( self.url )
if url_to_check in next_page_urls:
next_page_urls.remove( url_to_check )
if len( next_page_urls ) > 0:
next_page_generation_phrase = ' next gallery pages found'
else:
# we have failed to parse a next page url, but we would still like one, so let's see if the url match can provide one
url_class = HG.client_controller.network_engine.domain_manager.GetURLClass( url_to_check )
if url_class is not None and url_class.CanGenerateNextGalleryPage():
try:
next_page_url = url_class.GetNextGalleryPage( url_to_check )
next_page_urls = [ next_page_url ]
except Exception as e:
note += ' - Attempted to generate a next gallery page url, but failed!'
note += os.linesep
note += traceback.format_exc()
next_page_generation_phrase = ' next gallery pages extrapolated from url class'
if len( next_page_urls ) > 0:
next_page_urls = HydrusData.DedupeList( next_page_urls )
new_next_page_urls = [ next_page_url for next_page_url in next_page_urls if next_page_url not in gallery_urls_seen_before ]
duplicate_next_page_urls = gallery_urls_seen_before.intersection( new_next_page_urls )
num_new_next_page_urls = len( new_next_page_urls )
num_dupe_next_page_urls = len( duplicate_next_page_urls )
if num_new_next_page_urls > 0:
next_gallery_seeds = [ GallerySeed( next_page_url ) for next_page_url in new_next_page_urls ]
for next_gallery_seed in next_gallery_seeds:
next_gallery_seed.SetRunToken( self._run_token )
next_gallery_seed.SetExternalFilterableTags( self._external_filterable_tags )
next_gallery_seed.SetExternalAdditionalServiceKeysToTags( self._external_additional_service_keys_to_tags )
gallery_seed_log.AddGallerySeeds( next_gallery_seeds )
added_new_gallery_pages = True
gallery_urls_seen_before.update( new_next_page_urls )
if num_dupe_next_page_urls == 0:
note += ' - ' + HydrusData.ToHumanInt( num_new_next_page_urls ) + next_page_generation_phrase
else:
note += ' - ' + HydrusData.ToHumanInt( num_new_next_page_urls ) + next_page_generation_phrase + ', but ' + HydrusData.ToHumanInt( num_dupe_next_page_urls ) + ' had already been visited this run and were not added'
else:
note += ' - ' + HydrusData.ToHumanInt( num_dupe_next_page_urls ) + next_page_generation_phrase + ', but they had already been visited this run and were not added'
self.SetStatus( status, note = note )
except HydrusExceptions.ShutdownException:
pass
except HydrusExceptions.VetoException as e:
status = CC.STATUS_VETOED
note = str( e )
self.SetStatus( status, note = note )
if isinstance( e, HydrusExceptions.CancelledException ):
status_hook( 'cancelled!' )
time.sleep( 2 )
except HydrusExceptions.InsufficientCredentialsException:
status = CC.STATUS_VETOED
note = '403'
self.SetStatus( status, note = note )
status_hook( '403' )
time.sleep( 2 )
result_404 = True
except HydrusExceptions.NotFoundException:
status = CC.STATUS_VETOED
note = '404'
self.SetStatus( status, note = note )
status_hook( '404' )
time.sleep( 2 )
result_404 = True
except Exception as e:
status = CC.STATUS_ERROR
self.SetStatus( status, exception = e )
status_hook( 'error!' )
time.sleep( 3 )
if isinstance( e, HydrusExceptions.NetworkException ): # so the larger queue can set a delaywork or whatever
raise
gallery_seed_log.NotifyGallerySeedsUpdated( ( self, ) )
return ( num_urls_added, num_urls_already_in_file_seed_cache, num_urls_total, result_404, added_new_gallery_pages, stop_reason )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_SEED ] = GallerySeed
class GallerySeedLog( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_SEED_LOG
SERIALISABLE_NAME = 'Gallery Log'
SERIALISABLE_VERSION = 1
COMPACT_NUMBER = 100
def __init__( self ):
HydrusSerialisable.SerialisableBase.__init__( self )
self._gallery_seeds = HydrusSerialisable.SerialisableList()
self._gallery_seeds_to_indices = {}
self._gallery_seed_log_key = HydrusData.GenerateKey()
self._status_cache = None
self._status_dirty = True
self._lock = threading.Lock()
def __len__( self ):
return len( self._gallery_seeds )
def _GenerateStatus( self ):
statuses_to_counts = self._GetStatusesToCounts()
self._status_cache = GenerateGallerySeedLogStatus( statuses_to_counts )
self._status_dirty = False
def _GetNextGallerySeed( self, status: int ) -> typing.Optional[ GallerySeed ]:
for gallery_seed in self._gallery_seeds:
if gallery_seed.status == status:
return gallery_seed
return None
def _GetStatusesToCounts( self ):
statuses_to_counts = collections.Counter()
for gallery_seed in self._gallery_seeds:
statuses_to_counts[ gallery_seed.status ] += 1
return statuses_to_counts
def _GetGallerySeeds( self, status = None ):
if status is None:
return list( self._gallery_seeds )
else:
return [ gallery_seed for gallery_seed in self._gallery_seeds if gallery_seed.status == status ]
def _GetSerialisableInfo( self ):
return self._gallery_seeds.GetSerialisableTuple()
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
with self._lock:
self._gallery_seeds = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_info )
self._gallery_seeds_to_indices = { gallery_seed : index for ( index, gallery_seed ) in enumerate( self._gallery_seeds ) }
def _SetStatusDirty( self ):
self._status_dirty = True
def AddGallerySeeds( self, gallery_seeds ):
if len( gallery_seeds ) == 0:
return 0
seen_urls = set()
new_gallery_seeds = []
with self._lock:
for gallery_seed in gallery_seeds:
if gallery_seed.url in seen_urls:
continue
if gallery_seed in self._gallery_seeds_to_indices:
continue
new_gallery_seeds.append( gallery_seed )
self._gallery_seeds.append( gallery_seed )
self._gallery_seeds_to_indices[ gallery_seed ] = len( self._gallery_seeds ) - 1
seen_urls.add( gallery_seed.url )
self._SetStatusDirty()
self.NotifyGallerySeedsUpdated( new_gallery_seeds )
return len( new_gallery_seeds )
def AdvanceGallerySeed( self, gallery_seed ):
with self._lock:
if gallery_seed in self._gallery_seeds_to_indices:
index = self._gallery_seeds_to_indices[ gallery_seed ]
if index > 0:
self._gallery_seeds.remove( gallery_seed )
self._gallery_seeds.insert( index - 1, gallery_seed )
self._gallery_seeds_to_indices = { gallery_seed : index for ( index, gallery_seed ) in enumerate( self._gallery_seeds ) }
self.NotifyGallerySeedsUpdated( ( gallery_seed, ) )
def CanCompact( self, compact_before_this_source_time ):
with self._lock:
if len( self._gallery_seeds ) <= self.COMPACT_NUMBER:
return False
for gallery_seed in self._gallery_seeds[:-self.COMPACT_NUMBER]:
if gallery_seed.status == CC.STATUS_UNKNOWN:
continue
if gallery_seed.created < compact_before_this_source_time:
return True
return False
def CanRestartFailedSearch( self ):
with self._lock:
if len( self._gallery_seeds ) == 0:
return False
last_gallery_seed = self._gallery_seeds[-1]
if last_gallery_seed.status == CC.STATUS_ERROR:
return True
def Compact( self, compact_before_this_source_time ):
with self._lock:
if len( self._gallery_seeds ) <= self.COMPACT_NUMBER:
return
new_gallery_seeds = HydrusSerialisable.SerialisableList()
for gallery_seed in self._gallery_seeds[:-self.COMPACT_NUMBER]:
still_to_do = gallery_seed.status == CC.STATUS_UNKNOWN
still_relevant = gallery_seed.created > compact_before_this_source_time
if still_to_do or still_relevant:
new_gallery_seeds.append( gallery_seed )
new_gallery_seeds.extend( self._gallery_seeds[-self.COMPACT_NUMBER:] )
self._gallery_seeds = new_gallery_seeds
self._gallery_seeds_to_indices = { gallery_seed : index for ( index, gallery_seed ) in enumerate( self._gallery_seeds ) }
self._SetStatusDirty()
def DelayGallerySeed( self, gallery_seed ):
with self._lock:
if gallery_seed in self._gallery_seeds_to_indices:
index = self._gallery_seeds_to_indices[ gallery_seed ]
if index < len( self._gallery_seeds ) - 1:
self._gallery_seeds.remove( gallery_seed )
self._gallery_seeds.insert( index + 1, gallery_seed )
self._gallery_seeds_to_indices = { gallery_seed : index for ( index, gallery_seed ) in enumerate( self._gallery_seeds ) }
self.NotifyGallerySeedsUpdated( ( gallery_seed, ) )
def GetExampleGallerySeed( self ):
with self._lock:
if len( self._gallery_seeds ) == 0:
return None
else:
example_seed = self._GetNextGallerySeed( CC.STATUS_UNKNOWN )
if example_seed is None:
example_seed = random.choice( self._gallery_seeds[-10:] )
return example_seed
def GetAPIInfoDict( self, simple ):
with self._lock:
d = {}
if self._status_dirty:
self._GenerateStatus()
( status, ( total_processed, total ) ) = self._status_cache
d[ 'status' ] = status
d[ 'total_processed' ] = total_processed
d[ 'total_to_process' ] = total
if not simple:
d[ 'log_items' ] = [ gallery_seed.GetAPIInfoDict( simple ) for gallery_seed in self._gallery_seeds ]
return d
def GetGallerySeedLogKey( self ):
return self._gallery_seed_log_key
def GetGallerySeedCount( self, status = None ):
result = 0
with self._lock:
if status is None:
result = len( self._gallery_seeds )
else:
for gallery_seed in self._gallery_seeds:
if gallery_seed.status == status:
result += 1
return result
def GetGallerySeeds( self, status = None ):
with self._lock:
return self._GetGallerySeeds( status )
def GetGallerySeedIndex( self, gallery_seed ):
with self._lock:
return self._gallery_seeds_to_indices[ gallery_seed ]
def GetNextGallerySeed( self, status ):
with self._lock:
return self._GetNextGallerySeed( status )
def GetStatus( self ):
with self._lock:
if self._status_dirty:
self._GenerateStatus()
return self._status_cache
def GetStatusesToCounts( self ):
with self._lock:
return self._GetStatusesToCounts()
def HasGallerySeed( self, gallery_seed ):
with self._lock:
return gallery_seed in self._gallery_seeds_to_indices
def HasGalleryURL( self, url ):
search_gallery_seed = GallerySeed( url )
search_url = search_gallery_seed.url
return search_url in ( gallery_seed.url for gallery_seed in self._gallery_seeds )
def NotifyGallerySeedsUpdated( self, gallery_seeds ):
with self._lock:
self._SetStatusDirty()
HG.client_controller.pub( 'gallery_seed_log_gallery_seeds_updated', self._gallery_seed_log_key, gallery_seeds )
def RemoveGallerySeeds( self, gallery_seeds ):
with self._lock:
gallery_seeds_to_delete = set( gallery_seeds )
self._gallery_seeds = HydrusSerialisable.SerialisableList( [ gallery_seed for gallery_seed in self._gallery_seeds if gallery_seed not in gallery_seeds_to_delete ] )
self._gallery_seeds_to_indices = { gallery_seed : index for ( index, gallery_seed ) in enumerate( self._gallery_seeds ) }
self._SetStatusDirty()
self.NotifyGallerySeedsUpdated( gallery_seeds_to_delete )
def RemoveGallerySeedsByStatus( self, statuses_to_remove ):
with self._lock:
gallery_seeds_to_delete = [ gallery_seed for gallery_seed in self._gallery_seeds if gallery_seed.status in statuses_to_remove ]
self.RemoveGallerySeeds( gallery_seeds_to_delete )
def RemoveAllButUnknownGallerySeeds( self ):
with self._lock:
gallery_seeds_to_delete = [ gallery_seed for gallery_seed in self._gallery_seeds if gallery_seed.status != CC.STATUS_UNKNOWN ]
self.RemoveGallerySeeds( gallery_seeds_to_delete )
def RestartFailedSearch( self ):
with self._lock:
if len( self._gallery_seeds ) == 0:
return
last_gallery_seed = self._gallery_seeds[-1]
if last_gallery_seed.status != CC.STATUS_ERROR:
return
can_generate_more_pages = True
new_gallery_seeds = ( last_gallery_seed.GenerateRestartedDuplicate( can_generate_more_pages ), )
self.AddGallerySeeds( new_gallery_seeds )
self.NotifyGallerySeedsUpdated( new_gallery_seeds )
def RetryFailed( self ):
with self._lock:
failed_gallery_seeds = self._GetGallerySeeds( CC.STATUS_ERROR )
for gallery_seed in failed_gallery_seeds:
gallery_seed.SetStatus( CC.STATUS_UNKNOWN )
self.NotifyGallerySeedsUpdated( failed_gallery_seeds )
def WorkToDo( self ):
with self._lock:
if self._status_dirty:
self._GenerateStatus()
( status, ( total_processed, total ) ) = self._status_cache
return total_processed < total
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_SEED_LOG ] = GallerySeedLog