2019-01-09 22:59:03 +00:00
|
|
|
from . import ClientConstants as CC
|
|
|
|
from . import ClientData
|
|
|
|
from . import ClientDefaults
|
|
|
|
from . import ClientDownloading
|
|
|
|
from . import ClientFiles
|
|
|
|
from . import ClientImportOptions
|
|
|
|
from . import ClientImportFileSeeds
|
|
|
|
from . import ClientImportGallerySeeds
|
|
|
|
from . import ClientNetworkingContexts
|
|
|
|
from . import ClientNetworkingJobs
|
|
|
|
from . import ClientParsing
|
|
|
|
from . import ClientPaths
|
|
|
|
from . import ClientThreading
|
|
|
|
from . import HydrusConstants as HC
|
|
|
|
from . import HydrusData
|
|
|
|
from . import HydrusExceptions
|
|
|
|
from . import HydrusFileHandling
|
|
|
|
from . import HydrusGlobals as HG
|
|
|
|
from . import HydrusPaths
|
|
|
|
from . import HydrusSerialisable
|
|
|
|
from . import HydrusThreading
|
2015-07-22 19:40:39 +00:00
|
|
|
import os
|
2015-09-09 22:04:39 +00:00
|
|
|
import random
|
2015-06-03 21:05:13 +00:00
|
|
|
import threading
|
2015-07-22 19:40:39 +00:00
|
|
|
import time
|
2015-06-03 21:05:13 +00:00
|
|
|
import traceback
|
2015-07-22 19:40:39 +00:00
|
|
|
import wx
|
2015-06-03 21:05:13 +00:00
|
|
|
|
2017-11-08 22:07:12 +00:00
|
|
|
CHECKER_STATUS_OK = 0
|
|
|
|
CHECKER_STATUS_DEAD = 1
|
|
|
|
CHECKER_STATUS_404 = 2
|
|
|
|
|
2018-01-17 22:52:10 +00:00
|
|
|
DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME = 0.1
|
2017-09-06 20:18:20 +00:00
|
|
|
|
2018-05-23 21:05:06 +00:00
|
|
|
REPEATING_JOB_TYPICAL_PERIOD = 30.0
|
2018-05-16 20:09:50 +00:00
|
|
|
|
2018-10-03 21:00:15 +00:00
|
|
|
def ConvertAllParseResultsToFileSeeds( all_parse_results, source_url, file_import_options ):
|
2018-08-22 21:10:59 +00:00
|
|
|
|
2018-10-31 21:41:14 +00:00
|
|
|
file_seeds = []
|
|
|
|
|
|
|
|
seen_urls = set()
|
2018-08-22 21:10:59 +00:00
|
|
|
|
|
|
|
for parse_results in all_parse_results:
|
|
|
|
|
|
|
|
parsed_urls = ClientParsing.GetURLsFromParseResults( parse_results, ( HC.URL_TYPE_DESIRED, ), only_get_top_priority = True )
|
|
|
|
|
2018-10-31 21:41:14 +00:00
|
|
|
parsed_urls = HydrusData.DedupeList( parsed_urls )
|
2018-10-24 21:34:02 +00:00
|
|
|
|
2018-10-31 21:41:14 +00:00
|
|
|
parsed_urls = [ url for url in parsed_urls if url not in seen_urls ]
|
2018-10-24 21:34:02 +00:00
|
|
|
|
2018-10-31 21:41:14 +00:00
|
|
|
seen_urls.update( parsed_urls )
|
2018-10-24 21:34:02 +00:00
|
|
|
|
2018-10-31 21:41:14 +00:00
|
|
|
# note we do this recursively due to parse_results being appropriate only for these urls--don't move this out again, or tags will be messed up
|
2018-10-24 21:34:02 +00:00
|
|
|
|
2018-10-31 21:41:14 +00:00
|
|
|
for url in parsed_urls:
|
|
|
|
|
|
|
|
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
|
|
|
|
|
|
|
|
file_seed.SetReferralURL( source_url )
|
|
|
|
|
|
|
|
file_seed.AddParseResults( parse_results, file_import_options )
|
|
|
|
|
|
|
|
file_seeds.append( file_seed )
|
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
return file_seeds
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
def GenerateMultiplePopupNetworkJobPresentationContextFactory( job_key ):
|
2018-05-02 20:45:20 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
def network_job_presentation_context_factory( network_job ):
|
2018-05-02 20:45:20 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
def enter_call():
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_network_job', network_job )
|
|
|
|
|
2018-05-02 20:45:20 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
def exit_call():
|
|
|
|
|
2018-08-15 20:40:30 +00:00
|
|
|
job_key.SetVariable( 'popup_network_job', None )
|
2018-05-09 20:23:00 +00:00
|
|
|
|
2018-05-02 20:45:20 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
return NetworkJobPresentationContext( enter_call, exit_call )
|
2018-05-02 20:45:20 +00:00
|
|
|
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
return network_job_presentation_context_factory
|
2018-05-02 20:45:20 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
def GenerateSinglePopupNetworkJobPresentationContextFactory( job_key ):
|
2018-05-02 20:45:20 +00:00
|
|
|
|
|
|
|
def network_job_presentation_context_factory( network_job ):
|
|
|
|
|
|
|
|
def enter_call():
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_network_job', network_job )
|
|
|
|
|
|
|
|
|
|
|
|
def exit_call():
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
job_key.DeleteVariable( 'popup_network_job' )
|
2018-05-02 20:45:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
return NetworkJobPresentationContext( enter_call, exit_call )
|
|
|
|
|
|
|
|
|
|
|
|
return network_job_presentation_context_factory
|
|
|
|
|
2018-05-23 21:05:06 +00:00
|
|
|
def GetRepeatingJobInitialDelay():
|
|
|
|
|
|
|
|
return 0.5 + ( random.random() * 0.5 )
|
|
|
|
|
2018-05-16 20:09:50 +00:00
|
|
|
def PageImporterShouldStopWorking( page_key ):
|
|
|
|
|
2018-05-23 21:05:06 +00:00
|
|
|
return HG.view_shutdown or not HG.client_controller.PageAlive( page_key )
|
2018-05-16 20:09:50 +00:00
|
|
|
|
2018-10-17 21:00:09 +00:00
|
|
|
def PublishPresentationHashes( publishing_label, hashes, publish_to_popup_button, publish_files_to_page ):
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if publish_to_popup_button:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
files_job_key = ClientThreading.JobKey()
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
files_job_key.SetVariable( 'popup_files_mergable', True )
|
2018-10-17 21:00:09 +00:00
|
|
|
files_job_key.SetVariable( 'popup_files', ( list( hashes ), publishing_label ) )
|
2017-08-30 20:27:47 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
HG.client_controller.pub( 'message', files_job_key )
|
2017-08-30 20:27:47 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
|
|
|
|
if publish_files_to_page:
|
2017-08-30 20:27:47 +00:00
|
|
|
|
2018-10-17 21:00:09 +00:00
|
|
|
HG.client_controller.pub( 'imported_files_to_page', list( hashes ), publishing_label )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
|
|
|
|
def THREADDownloadURL( job_key, url, url_string ):
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_title', url_string )
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'downloading and importing' )
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
|
|
|
|
|
|
|
|
def network_job_factory( *args, **kwargs ):
|
2017-08-30 20:27:47 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
network_job = ClientNetworkingJobs.NetworkJob( *args, **kwargs )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-23 21:05:06 +00:00
|
|
|
network_job.OverrideBandwidth( 30 )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
return network_job
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
|
|
|
|
network_job_presentation_context_factory = GenerateSinglePopupNetworkJobPresentationContextFactory( job_key )
|
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
|
2018-05-09 20:23:00 +00:00
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
try:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
file_seed.DownloadAndImportRawFile( url, file_import_options, network_job_factory, network_job_presentation_context_factory )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
status = file_seed.status
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if status in CC.SUCCESSFUL_IMPORT_STATES:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if status == CC.STATUS_SUCCESSFUL_AND_NEW:
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'successful!' )
|
|
|
|
|
|
|
|
elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'was already in the database!' )
|
|
|
|
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-10-24 21:34:02 +00:00
|
|
|
if file_seed.HasHash():
|
|
|
|
|
|
|
|
hash = file_seed.GetHash()
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_files', ( [ hash ], 'download' ) )
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
|
|
|
|
elif status == CC.STATUS_DELETED:
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'had already been deleted!' )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
finally:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
job_key.Finish()
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def THREADDownloadURLs( job_key, urls, title ):
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_title', title )
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'initialising' )
|
|
|
|
|
|
|
|
num_successful = 0
|
|
|
|
num_redundant = 0
|
|
|
|
num_deleted = 0
|
|
|
|
num_failed = 0
|
|
|
|
|
2017-12-20 22:55:48 +00:00
|
|
|
presentation_hashes = []
|
|
|
|
presentation_hashes_fast = set()
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
file_import_options = HG.client_controller.new_options.GetDefaultFileImportOptions( 'loud' )
|
|
|
|
|
|
|
|
def network_job_factory( *args, **kwargs ):
|
|
|
|
|
|
|
|
network_job = ClientNetworkingJobs.NetworkJob( *args, **kwargs )
|
|
|
|
|
|
|
|
network_job.OverrideBandwidth()
|
|
|
|
|
|
|
|
return network_job
|
|
|
|
|
|
|
|
|
|
|
|
network_job_presentation_context_factory = GenerateMultiplePopupNetworkJobPresentationContextFactory( job_key )
|
|
|
|
|
2017-07-19 21:21:41 +00:00
|
|
|
for ( i, url ) in enumerate( urls ):
|
|
|
|
|
|
|
|
( i_paused, should_quit ) = job_key.WaitIfNeeded()
|
|
|
|
|
|
|
|
if should_quit:
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i + 1, len( urls ) ) )
|
|
|
|
job_key.SetVariable( 'popup_gauge_1', ( i + 1, len( urls ) ) )
|
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_URL, url )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
file_seed.DownloadAndImportRawFile( url, file_import_options, network_job_factory, network_job_presentation_context_factory )
|
2017-08-30 20:27:47 +00:00
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
status = file_seed.status
|
2017-08-30 20:27:47 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if status in CC.SUCCESSFUL_IMPORT_STATES:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
if status == CC.STATUS_SUCCESSFUL_AND_NEW:
|
|
|
|
|
|
|
|
num_successful += 1
|
|
|
|
|
|
|
|
elif status == CC.STATUS_SUCCESSFUL_BUT_REDUNDANT:
|
|
|
|
|
|
|
|
num_redundant += 1
|
|
|
|
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-10-24 21:34:02 +00:00
|
|
|
if file_seed.HasHash():
|
2018-05-09 20:23:00 +00:00
|
|
|
|
2018-10-24 21:34:02 +00:00
|
|
|
hash = file_seed.GetHash()
|
|
|
|
|
|
|
|
if hash not in presentation_hashes_fast:
|
|
|
|
|
|
|
|
presentation_hashes.append( hash )
|
|
|
|
|
|
|
|
|
|
|
|
presentation_hashes_fast.add( hash )
|
2018-05-09 20:23:00 +00:00
|
|
|
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
elif status == CC.STATUS_DELETED:
|
2017-12-20 22:55:48 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
num_deleted += 1
|
2017-12-20 22:55:48 +00:00
|
|
|
|
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
except Exception as e:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
num_failed += 1
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2018-05-09 20:23:00 +00:00
|
|
|
HydrusData.Print( url + ' failed to import!' )
|
|
|
|
HydrusData.PrintException( e )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2017-08-30 20:27:47 +00:00
|
|
|
job_key.DeleteVariable( 'popup_network_job' )
|
|
|
|
|
2017-07-19 21:21:41 +00:00
|
|
|
text_components = []
|
|
|
|
|
|
|
|
if num_successful > 0:
|
|
|
|
|
2018-07-04 20:48:28 +00:00
|
|
|
text_components.append( HydrusData.ToHumanInt( num_successful ) + ' successful' )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
if num_redundant > 0:
|
|
|
|
|
2018-07-04 20:48:28 +00:00
|
|
|
text_components.append( HydrusData.ToHumanInt( num_redundant ) + ' already in db' )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
if num_deleted > 0:
|
|
|
|
|
2018-07-04 20:48:28 +00:00
|
|
|
text_components.append( HydrusData.ToHumanInt( num_deleted ) + ' deleted' )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
if num_failed > 0:
|
|
|
|
|
2018-07-04 20:48:28 +00:00
|
|
|
text_components.append( HydrusData.ToHumanInt( num_failed ) + ' failed (errors written to log)' )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', ', '.join( text_components ) )
|
|
|
|
|
2017-12-20 22:55:48 +00:00
|
|
|
if len( presentation_hashes ) > 0:
|
2017-07-19 21:21:41 +00:00
|
|
|
|
2017-12-20 22:55:48 +00:00
|
|
|
job_key.SetVariable( 'popup_files', ( presentation_hashes, 'downloads' ) )
|
2017-07-19 21:21:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
job_key.DeleteVariable( 'popup_gauge_1' )
|
|
|
|
|
|
|
|
job_key.Finish()
|
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
def UpdateFileSeedCacheWithFileSeeds( file_seed_cache, file_seeds, max_new_urls_allowed = None ):
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
new_file_seeds = []
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
num_urls_added = 0
|
|
|
|
num_urls_already_in_file_seed_cache = 0
|
2018-09-05 20:52:32 +00:00
|
|
|
can_search_for_more_files = True
|
2018-08-22 21:10:59 +00:00
|
|
|
stop_reason = ''
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
for file_seed in file_seeds:
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
if max_new_urls_allowed is not None and num_urls_added >= max_new_urls_allowed:
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-09-05 20:52:32 +00:00
|
|
|
can_search_for_more_files = False
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
stop_reason = 'hit file limit'
|
2018-04-18 22:10:15 +00:00
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
break
|
2018-07-04 20:48:28 +00:00
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
|
|
|
|
if file_seed_cache.HasFileSeed( file_seed ):
|
2018-07-04 20:48:28 +00:00
|
|
|
|
2018-08-22 21:10:59 +00:00
|
|
|
num_urls_already_in_file_seed_cache += 1
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
num_urls_added += 1
|
|
|
|
|
|
|
|
new_file_seeds.append( file_seed )
|
2018-02-07 23:40:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2018-06-27 19:27:05 +00:00
|
|
|
file_seed_cache.AddFileSeeds( new_file_seeds )
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-09-05 20:52:32 +00:00
|
|
|
return ( num_urls_added, num_urls_already_in_file_seed_cache, can_search_for_more_files, stop_reason )
|
2018-02-07 23:40:33 +00:00
|
|
|
|
2018-05-16 20:09:50 +00:00
|
|
|
def WakeRepeatingJob( job ):
|
|
|
|
|
|
|
|
if job is not None:
|
|
|
|
|
|
|
|
job.Wake()
|
|
|
|
|
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
class NetworkJobPresentationContext( object ):
|
2015-09-09 22:04:39 +00:00
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
def __init__( self, enter_call, exit_call ):
|
2018-02-14 21:47:18 +00:00
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
self._enter_call = enter_call
|
|
|
|
self._exit_call = exit_call
|
2018-02-14 21:47:18 +00:00
|
|
|
|
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
def __enter__( self ):
|
2015-09-09 22:04:39 +00:00
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
self._enter_call()
|
2018-02-14 21:47:18 +00:00
|
|
|
|
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
def __exit__( self, exc_type, exc_val, exc_tb ):
|
2018-02-14 21:47:18 +00:00
|
|
|
|
2018-08-01 20:44:57 +00:00
|
|
|
self._exit_call()
|
2018-02-14 21:47:18 +00:00
|
|
|
|
2018-05-16 20:09:50 +00:00
|
|
|
|