392 lines
11 KiB
Python
392 lines
11 KiB
Python
import random
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusGlobals as HG
|
|
|
|
from hydrus.client import ClientConstants as CC
|
|
from hydrus.client import ClientParsing
|
|
from hydrus.client import ClientThreading
|
|
from hydrus.client.importing import ClientImportFileSeeds
|
|
from hydrus.client.networking import ClientNetworkingJobs
|
|
|
|
CHECKER_STATUS_OK = 0
|
|
CHECKER_STATUS_DEAD = 1
|
|
CHECKER_STATUS_404 = 2
|
|
|
|
DOWNLOADER_SIMPLE_STATUS_DONE = 0
|
|
DOWNLOADER_SIMPLE_STATUS_WORKING = 1
|
|
DOWNLOADER_SIMPLE_STATUS_PENDING = 2
|
|
DOWNLOADER_SIMPLE_STATUS_PAUSED = 3
|
|
DOWNLOADER_SIMPLE_STATUS_DEFERRED = 4
|
|
|
|
downloader_enum_sort_lookup = {
|
|
DOWNLOADER_SIMPLE_STATUS_DONE : 0,
|
|
DOWNLOADER_SIMPLE_STATUS_WORKING : 1,
|
|
DOWNLOADER_SIMPLE_STATUS_PENDING : 2,
|
|
DOWNLOADER_SIMPLE_STATUS_DEFERRED : 3,
|
|
DOWNLOADER_SIMPLE_STATUS_PAUSED : 4
|
|
}
|
|
|
|
DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME = 0.1
|
|
|
|
REPEATING_JOB_TYPICAL_PERIOD = 30.0
|
|
|
|
def ConvertAllParseResultsToFileSeeds( all_parse_results, source_url, file_import_options ):
|
|
|
|
file_seeds = []
|
|
|
|
seen_urls = set()
|
|
|
|
for parse_results in all_parse_results:
|
|
|
|
parsed_urls = ClientParsing.GetURLsFromParseResults( parse_results, ( HC.URL_TYPE_DESIRED, ), only_get_top_priority = True )
|
|
|
|
parsed_urls = HydrusData.DedupeList( parsed_urls )
|
|
|
|
parsed_urls = [ url for url in parsed_urls if url not in seen_urls ]
|
|
|
|
seen_urls.update( parsed_urls )
|
|
|
|
# note we do this recursively due to parse_results being appropriate only for these urls--don't move this out again, or tags will be messed up
|
|
|
|
for url in parsed_urls:
|
|
|
|
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
|
|
|
|
file_seed.SetReferralURL( source_url )
|
|
|
|
file_seed.AddParseResults( parse_results, file_import_options )
|
|
|
|
file_seeds.append( file_seed )
|
|
|
|
|
|
|
|
return file_seeds
|
|
|
|
def GenerateMultiplePopupNetworkJobPresentationContextFactory( job_key ):
|
|
|
|
def network_job_presentation_context_factory( network_job ):
|
|
|
|
def enter_call():
|
|
|
|
job_key.SetNetworkJob( network_job )
|
|
|
|
|
|
def exit_call():
|
|
|
|
job_key.DeleteNetworkJob()
|
|
|
|
|
|
return NetworkJobPresentationContext( enter_call, exit_call )
|
|
|
|
|
|
return network_job_presentation_context_factory
|
|
|
|
def GenerateSinglePopupNetworkJobPresentationContextFactory( job_key ):
|
|
|
|
def network_job_presentation_context_factory( network_job ):
|
|
|
|
def enter_call():
|
|
|
|
job_key.SetNetworkJob( network_job )
|
|
|
|
|
|
def exit_call():
|
|
|
|
job_key.DeleteNetworkJob()
|
|
|
|
|
|
return NetworkJobPresentationContext( enter_call, exit_call )
|
|
|
|
|
|
return network_job_presentation_context_factory
|
|
|
|
def GetRepeatingJobInitialDelay():
|
|
|
|
return 0.5 + ( random.random() * 0.5 )
|
|
|
|
def PublishPresentationHashes( publishing_label, hashes, publish_to_popup_button, publish_files_to_page ):
|
|
|
|
if publish_to_popup_button:
|
|
|
|
files_job_key = ClientThreading.JobKey()
|
|
|
|
files_job_key.SetVariable( 'popup_files_mergable', True )
|
|
files_job_key.SetVariable( 'popup_files', ( list( hashes ), publishing_label ) )
|
|
|
|
HG.client_controller.pub( 'message', files_job_key )
|
|
|
|
|
|
if publish_files_to_page:
|
|
|
|
HG.client_controller.pub( 'imported_files_to_page', list( hashes ), publishing_label )
|
|
|
|
|
|
def THREADDownloadURL( job_key, url, url_string ):
|
|
|
|
job_key.SetStatusTitle( url_string )
|
|
job_key.SetVariable( 'popup_text_1', 'initialising' )
|
|
|
|
#
|
|
|
|
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
|
|
|
|
def network_job_factory( *args, **kwargs ):
|
|
|
|
network_job = ClientNetworkingJobs.NetworkJob( *args, **kwargs )
|
|
|
|
network_job.OverrideBandwidth( 30 )
|
|
|
|
return network_job
|
|
|
|
|
|
def status_hook( text ):
|
|
|
|
if len( text ) > 0:
|
|
|
|
text = text.splitlines()[0]
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', text )
|
|
|
|
|
|
network_job_presentation_context_factory = GenerateSinglePopupNetworkJobPresentationContextFactory( job_key )
|
|
|
|
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
|
|
|
|
#
|
|
|
|
try:
|
|
|
|
file_seed.DownloadAndImportRawFile( url, file_import_options, network_job_factory, network_job_presentation_context_factory, status_hook )
|
|
|
|
status = file_seed.status
|
|
|
|
if status in CC.SUCCESSFUL_IMPORT_STATES:
|
|
|
|
if status == CC.STATUS_SUCCESSFUL_AND_NEW:
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'successful!' )
|
|
|
|
elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'was already in the database!' )
|
|
|
|
|
|
if file_seed.HasHash():
|
|
|
|
hash = file_seed.GetHash()
|
|
|
|
job_key.SetVariable( 'popup_files', ( [ hash ], 'download' ) )
|
|
|
|
|
|
elif status == CC.STATUS_DELETED:
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'had already been deleted!' )
|
|
|
|
|
|
finally:
|
|
|
|
job_key.Finish()
|
|
|
|
|
|
def THREADDownloadURLs( job_key: ClientThreading.JobKey, urls, title ):
|
|
|
|
job_key.SetStatusTitle( title )
|
|
job_key.SetVariable( 'popup_text_1', 'initialising' )
|
|
|
|
num_successful = 0
|
|
num_redundant = 0
|
|
num_deleted = 0
|
|
num_failed = 0
|
|
|
|
presentation_hashes = []
|
|
presentation_hashes_fast = set()
|
|
|
|
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
|
|
|
|
def network_job_factory( *args, **kwargs ):
|
|
|
|
network_job = ClientNetworkingJobs.NetworkJob( *args, **kwargs )
|
|
|
|
network_job.OverrideBandwidth()
|
|
|
|
return network_job
|
|
|
|
|
|
def status_hook( text ):
|
|
|
|
if len( text ) > 0:
|
|
|
|
text = text.splitlines()[0]
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_2', text )
|
|
|
|
|
|
network_job_presentation_context_factory = GenerateMultiplePopupNetworkJobPresentationContextFactory( job_key )
|
|
|
|
for ( i, url ) in enumerate( urls ):
|
|
|
|
( i_paused, should_quit ) = job_key.WaitIfNeeded()
|
|
|
|
if should_quit:
|
|
|
|
break
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i + 1, len( urls ) ) )
|
|
job_key.SetVariable( 'popup_gauge_1', ( i + 1, len( urls ) ) )
|
|
|
|
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
|
|
|
|
try:
|
|
|
|
file_seed.DownloadAndImportRawFile( url, file_import_options, network_job_factory, network_job_presentation_context_factory, status_hook )
|
|
|
|
status = file_seed.status
|
|
|
|
if status in CC.SUCCESSFUL_IMPORT_STATES:
|
|
|
|
if status == CC.STATUS_SUCCESSFUL_AND_NEW:
|
|
|
|
num_successful += 1
|
|
|
|
elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:
|
|
|
|
num_redundant += 1
|
|
|
|
|
|
if file_seed.HasHash():
|
|
|
|
hash = file_seed.GetHash()
|
|
|
|
if hash not in presentation_hashes_fast:
|
|
|
|
presentation_hashes.append( hash )
|
|
|
|
|
|
presentation_hashes_fast.add( hash )
|
|
|
|
|
|
if len( presentation_hashes ) > 0:
|
|
|
|
job_key.SetVariable( 'popup_files', ( presentation_hashes, 'downloads' ) )
|
|
|
|
|
|
elif status == CC.STATUS_DELETED:
|
|
|
|
num_deleted += 1
|
|
|
|
|
|
except Exception as e:
|
|
|
|
num_failed += 1
|
|
|
|
HydrusData.Print( url + ' failed to import!' )
|
|
HydrusData.PrintException( e )
|
|
|
|
finally:
|
|
|
|
job_key.DeleteVariable( 'popup_text_2' )
|
|
|
|
|
|
|
|
job_key.DeleteNetworkJob()
|
|
|
|
text_components = []
|
|
|
|
if num_successful > 0:
|
|
|
|
text_components.append( HydrusData.ToHumanInt( num_successful ) + ' successful' )
|
|
|
|
|
|
if num_redundant > 0:
|
|
|
|
text_components.append( HydrusData.ToHumanInt( num_redundant ) + ' already in db' )
|
|
|
|
|
|
if num_deleted > 0:
|
|
|
|
text_components.append( HydrusData.ToHumanInt( num_deleted ) + ' deleted' )
|
|
|
|
|
|
if num_failed > 0:
|
|
|
|
text_components.append( HydrusData.ToHumanInt( num_failed ) + ' failed (errors written to log)' )
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', ', '.join( text_components ) )
|
|
|
|
if len( presentation_hashes ) > 0:
|
|
|
|
job_key.SetVariable( 'popup_files', ( presentation_hashes, 'downloads' ) )
|
|
|
|
|
|
job_key.DeleteVariable( 'popup_gauge_1' )
|
|
|
|
job_key.Finish()
|
|
|
|
def UpdateFileSeedCacheWithFileSeeds( file_seed_cache, file_seeds, max_new_urls_allowed = None ):
|
|
|
|
new_file_seeds = []
|
|
|
|
num_urls_added = 0
|
|
num_urls_already_in_file_seed_cache = 0
|
|
can_search_for_more_files = True
|
|
stop_reason = ''
|
|
|
|
for file_seed in file_seeds:
|
|
|
|
if max_new_urls_allowed is not None and num_urls_added >= max_new_urls_allowed:
|
|
|
|
can_search_for_more_files = False
|
|
|
|
stop_reason = 'hit file limit'
|
|
|
|
break
|
|
|
|
|
|
if file_seed_cache.HasFileSeed( file_seed ):
|
|
|
|
num_urls_already_in_file_seed_cache += 1
|
|
|
|
else:
|
|
|
|
num_urls_added += 1
|
|
|
|
new_file_seeds.append( file_seed )
|
|
|
|
|
|
|
|
file_seed_cache.AddFileSeeds( new_file_seeds )
|
|
|
|
return ( num_urls_added, num_urls_already_in_file_seed_cache, can_search_for_more_files, stop_reason )
|
|
|
|
def WakeRepeatingJob( job ):
|
|
|
|
if job is not None:
|
|
|
|
job.Wake()
|
|
|
|
|
|
class NetworkJobPresentationContext( object ):
|
|
|
|
def __init__( self, enter_call, exit_call ):
|
|
|
|
self._enter_call = enter_call
|
|
self._exit_call = exit_call
|
|
|
|
|
|
def __enter__( self ):
|
|
|
|
self._enter_call()
|
|
|
|
|
|
def __exit__( self, exc_type, exc_val, exc_tb ):
|
|
|
|
self._exit_call()
|
|
|
|
|