4317 lines
148 KiB
Python
4317 lines
148 KiB
Python
import bs4
|
|
import ClientConstants as CC
|
|
import ClientData
|
|
import ClientDefaults
|
|
import ClientDownloading
|
|
import ClientFiles
|
|
import ClientImageHandling
|
|
import ClientNetworking
|
|
import ClientThreading
|
|
import collections
|
|
import HydrusConstants as HC
|
|
import HydrusData
|
|
import HydrusExceptions
|
|
import HydrusFileHandling
|
|
import HydrusImageHandling
|
|
import HydrusGlobals as HG
|
|
import HydrusPaths
|
|
import HydrusSerialisable
|
|
import HydrusTags
|
|
import json
|
|
import os
|
|
import random
|
|
import shutil
|
|
import threading
|
|
import time
|
|
import traceback
|
|
import urlparse
|
|
import wx
|
|
import HydrusThreading
|
|
|
|
def THREADDownloadURL( job_key, url, url_string ):
|
|
|
|
job_key.SetVariable( 'popup_title', url_string )
|
|
job_key.SetVariable( 'popup_text_1', 'initialising' )
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
response = ClientNetworking.RequestsGet( url, stream = True )
|
|
|
|
with open( temp_path, 'wb' ) as f:
|
|
|
|
ClientNetworking.StreamResponseToFile( job_key, response, f )
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'importing' )
|
|
|
|
file_import_job = FileImportJob( temp_path )
|
|
|
|
client_files_manager = HG.client_controller.client_files_manager
|
|
|
|
( result, hash ) = client_files_manager.ImportFile( file_import_job )
|
|
|
|
except HydrusExceptions.CancelledException:
|
|
|
|
return
|
|
|
|
except HydrusExceptions.NetworkException:
|
|
|
|
job_key.Cancel()
|
|
|
|
raise
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
if result in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
if result == CC.STATUS_SUCCESSFUL:
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'successful!' )
|
|
|
|
else:
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'was already in the database!' )
|
|
|
|
|
|
job_key.SetVariable( 'popup_files', { hash } )
|
|
|
|
elif result == CC.STATUS_DELETED:
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'had already been deleted!' )
|
|
|
|
|
|
job_key.Finish()
|
|
|
|
def THREADDownloadURLs( job_key, urls, title ):
|
|
|
|
job_key.SetVariable( 'popup_title', title )
|
|
job_key.SetVariable( 'popup_text_1', 'initialising' )
|
|
|
|
num_successful = 0
|
|
num_redundant = 0
|
|
num_deleted = 0
|
|
num_failed = 0
|
|
|
|
successful_hashes = set()
|
|
|
|
for ( i, url ) in enumerate( urls ):
|
|
|
|
( i_paused, should_quit ) = job_key.WaitIfNeeded()
|
|
|
|
if should_quit:
|
|
|
|
break
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', HydrusData.ConvertValueRangeToPrettyString( i + 1, len( urls ) ) )
|
|
job_key.SetVariable( 'popup_gauge_1', ( i + 1, len( urls ) ) )
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
response = ClientNetworking.RequestsGet( url, stream = True )
|
|
|
|
with open( temp_path, 'wb' ) as f:
|
|
|
|
ClientNetworking.StreamResponseToFile( job_key, response, f )
|
|
|
|
|
|
except HydrusExceptions.CancelledException:
|
|
|
|
return
|
|
|
|
except HydrusExceptions.NetworkException:
|
|
|
|
job_key.Cancel()
|
|
|
|
raise
|
|
|
|
|
|
try:
|
|
|
|
job_key.SetVariable( 'popup_text_2', 'importing' )
|
|
|
|
file_import_job = FileImportJob( temp_path )
|
|
|
|
client_files_manager = HG.client_controller.client_files_manager
|
|
|
|
( result, hash ) = client_files_manager.ImportFile( file_import_job )
|
|
|
|
except Exception as e:
|
|
|
|
job_key.DeleteVariable( 'popup_text_2' )
|
|
|
|
HydrusData.Print( url + ' failed to import!' )
|
|
HydrusData.PrintException( e )
|
|
|
|
num_failed += 1
|
|
|
|
continue
|
|
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
if result in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
if result == CC.STATUS_SUCCESSFUL:
|
|
|
|
num_successful += 1
|
|
|
|
else:
|
|
|
|
num_redundant += 1
|
|
|
|
|
|
successful_hashes.add( hash )
|
|
|
|
elif result == CC.STATUS_DELETED:
|
|
|
|
num_deleted += 1
|
|
|
|
|
|
|
|
text_components = []
|
|
|
|
if num_successful > 0:
|
|
|
|
text_components.append( HydrusData.ConvertIntToPrettyString( num_successful ) + ' successful' )
|
|
|
|
|
|
if num_redundant > 0:
|
|
|
|
text_components.append( HydrusData.ConvertIntToPrettyString( num_redundant ) + ' already in db' )
|
|
|
|
|
|
if num_deleted > 0:
|
|
|
|
text_components.append( HydrusData.ConvertIntToPrettyString( num_deleted ) + ' deleted' )
|
|
|
|
|
|
if num_failed > 0:
|
|
|
|
text_components.append( HydrusData.ConvertIntToPrettyString( num_failed ) + ' failed (errors written to log)' )
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', ', '.join( text_components ) )
|
|
|
|
if len( successful_hashes ) > 0:
|
|
|
|
job_key.SetVariable( 'popup_files', successful_hashes )
|
|
|
|
|
|
job_key.DeleteVariable( 'popup_gauge_1' )
|
|
job_key.DeleteVariable( 'popup_text_2' )
|
|
job_key.DeleteVariable( 'popup_gauge_2' )
|
|
|
|
job_key.Finish()
|
|
|
|
class FileImportJob( object ):
|
|
|
|
def __init__( self, temp_path, import_file_options = None ):
|
|
|
|
if import_file_options is None:
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
|
|
self._temp_path = temp_path
|
|
self._import_file_options = import_file_options
|
|
|
|
self._hash = None
|
|
self._pre_import_status = None
|
|
|
|
self._file_info = None
|
|
self._thumbnail = None
|
|
self._phashes = None
|
|
self._extra_hashes = None
|
|
|
|
|
|
def GetExtraHashes( self ):
|
|
|
|
return self._extra_hashes
|
|
|
|
|
|
def GetImportFileOptions( self ):
|
|
|
|
return self._import_file_options
|
|
|
|
|
|
def GetFileInfo( self ):
|
|
|
|
return self._file_info
|
|
|
|
|
|
def GetHash( self ):
|
|
|
|
return self._hash
|
|
|
|
|
|
def GetMime( self ):
|
|
|
|
( size, mime, width, height, duration, num_frames, num_words ) = self._file_info
|
|
|
|
return mime
|
|
|
|
|
|
def GetPreImportStatus( self ):
|
|
|
|
return self._pre_import_status
|
|
|
|
|
|
def GetPHashes( self ):
|
|
|
|
return self._phashes
|
|
|
|
|
|
def GetTempPathAndThumbnail( self ):
|
|
|
|
return ( self._temp_path, self._thumbnail )
|
|
|
|
|
|
def PubsubContentUpdates( self ):
|
|
|
|
if self._pre_import_status == CC.STATUS_REDUNDANT:
|
|
|
|
( automatic_archive, exclude_deleted, min_size, min_resolution ) = self._import_file_options.ToTuple()
|
|
|
|
if automatic_archive:
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_ARCHIVE, set( ( self._hash, ) ) ) ] }
|
|
|
|
HG.client_controller.Write( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
|
|
|
|
def IsGoodToImport( self ):
|
|
|
|
( automatic_archive, exclude_deleted, min_size, min_resolution ) = self._import_file_options.ToTuple()
|
|
|
|
( size, mime, width, height, duration, num_frames, num_words ) = self._file_info
|
|
|
|
if width is not None and height is not None:
|
|
|
|
if min_resolution is not None:
|
|
|
|
( min_x, min_y ) = min_resolution
|
|
|
|
if width < min_x or height < min_y:
|
|
|
|
return ( False, 'Resolution too small.' )
|
|
|
|
|
|
|
|
|
|
if min_size is not None:
|
|
|
|
if size < min_size:
|
|
|
|
return ( False, 'File too small.' )
|
|
|
|
|
|
|
|
return ( True, 'File looks good.' )
|
|
|
|
|
|
def IsNewToDB( self ):
|
|
|
|
if self._pre_import_status == CC.STATUS_NEW:
|
|
|
|
return True
|
|
|
|
|
|
if self._pre_import_status == CC.STATUS_DELETED:
|
|
|
|
( automatic_archive, exclude_deleted, min_size, min_resolution ) = self._import_file_options.ToTuple()
|
|
|
|
if not exclude_deleted:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
def GenerateHashAndStatus( self ):
|
|
|
|
HydrusImageHandling.ConvertToPngIfBmp( self._temp_path )
|
|
|
|
self._hash = HydrusFileHandling.GetHashFromPath( self._temp_path )
|
|
|
|
self._pre_import_status = HG.client_controller.Read( 'hash_status', self._hash )
|
|
|
|
|
|
def GenerateInfo( self ):
|
|
|
|
self._file_info = HydrusFileHandling.GetFileInfo( self._temp_path )
|
|
|
|
( size, mime, width, height, duration, num_frames, num_words ) = self._file_info
|
|
|
|
if mime in HC.MIMES_WITH_THUMBNAILS:
|
|
|
|
self._thumbnail = HydrusFileHandling.GenerateThumbnail( self._temp_path, mime = mime )
|
|
|
|
|
|
if mime in HC.MIMES_WE_CAN_PHASH:
|
|
|
|
self._phashes = ClientImageHandling.GenerateShapePerceptualHashes( self._temp_path )
|
|
|
|
|
|
self._extra_hashes = HydrusFileHandling.GetExtraHashesFromPath( self._temp_path )
|
|
|
|
|
|
class GalleryImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self, gallery_identifier = None ):
|
|
|
|
if gallery_identifier is None:
|
|
|
|
gallery_identifier = ClientDownloading.GalleryIdentifier( HC.SITE_TYPE_DEVIANT_ART )
|
|
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
self._gallery_identifier = gallery_identifier
|
|
|
|
self._gallery_stream_identifiers = ClientDownloading.GetGalleryStreamIdentifiers( self._gallery_identifier )
|
|
|
|
self._current_query = None
|
|
self._current_query_num_urls = 0
|
|
|
|
self._current_gallery_stream_identifier = None
|
|
self._current_gallery_stream_identifier_page_index = 0
|
|
self._current_gallery_stream_identifier_found_urls = set()
|
|
|
|
self._pending_gallery_stream_identifiers = []
|
|
|
|
self._pending_queries = []
|
|
|
|
new_options = HG.client_controller.GetNewOptions()
|
|
|
|
self._get_tags_if_url_known_and_file_redundant = new_options.GetBoolean( 'get_tags_if_url_known_and_file_redundant' )
|
|
|
|
self._file_limit = HC.options[ 'gallery_file_limit' ]
|
|
self._gallery_paused = False
|
|
self._files_paused = False
|
|
|
|
self._import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
self._import_tag_options = new_options.GetDefaultImportTagOptions( self._gallery_identifier )
|
|
|
|
self._seed_cache = SeedCache()
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
self._gallery = None
|
|
|
|
self._gallery_status = ''
|
|
self._current_action = ''
|
|
|
|
self._download_control_file_set = None
|
|
self._download_control_file_clear = None
|
|
|
|
self._download_control_gallery_set = None
|
|
self._download_control_gallery_clear = None
|
|
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_gallery_identifier = self._gallery_identifier.GetSerialisableTuple()
|
|
serialisable_gallery_stream_identifiers = [ gallery_stream_identifier.GetSerialisableTuple() for gallery_stream_identifier in self._gallery_stream_identifiers ]
|
|
|
|
if self._current_gallery_stream_identifier is None:
|
|
|
|
serialisable_current_gallery_stream_identifier = None
|
|
|
|
else:
|
|
|
|
serialisable_current_gallery_stream_identifier = self._current_gallery_stream_identifier.GetSerialisableTuple()
|
|
|
|
|
|
serialisable_current_gallery_stream_identifier_found_urls = list( self._current_gallery_stream_identifier_found_urls )
|
|
|
|
serialisable_pending_gallery_stream_identifiers = [ pending_gallery_stream_identifier.GetSerialisableTuple() for pending_gallery_stream_identifier in self._pending_gallery_stream_identifiers ]
|
|
|
|
serialisable_file_options = self._import_file_options.GetSerialisableTuple()
|
|
serialisable_tag_options = self._import_tag_options.GetSerialisableTuple()
|
|
serialisable_seed_cache = self._seed_cache.GetSerialisableTuple()
|
|
|
|
serialisable_current_query_stuff = ( self._current_query, self._current_query_num_urls, serialisable_current_gallery_stream_identifier, self._current_gallery_stream_identifier_page_index, serialisable_current_gallery_stream_identifier_found_urls, serialisable_pending_gallery_stream_identifiers )
|
|
|
|
return ( serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, self._pending_queries, self._get_tags_if_url_known_and_file_redundant, self._file_limit, self._gallery_paused, self._files_paused, serialisable_file_options, serialisable_tag_options, serialisable_seed_cache )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, serialisable_current_query_stuff, self._pending_queries, self._get_tags_if_url_known_and_file_redundant, self._file_limit, self._gallery_paused, self._files_paused, serialisable_file_options, serialisable_tag_options, serialisable_seed_cache ) = serialisable_info
|
|
|
|
( self._current_query, self._current_query_num_urls, serialisable_current_gallery_stream_identifier, self._current_gallery_stream_identifier_page_index, serialisable_current_gallery_stream_identifier_found_urls, serialisable_pending_gallery_stream_identifier ) = serialisable_current_query_stuff
|
|
|
|
self._gallery_identifier = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_identifier )
|
|
|
|
self._gallery_stream_identifiers = [ HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_stream_identifier ) for serialisable_gallery_stream_identifier in serialisable_gallery_stream_identifiers ]
|
|
|
|
if serialisable_current_gallery_stream_identifier is None:
|
|
|
|
self._current_gallery_stream_identifier = None
|
|
|
|
else:
|
|
|
|
self._current_gallery_stream_identifier = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_current_gallery_stream_identifier )
|
|
|
|
|
|
self._current_gallery_stream_identifier_found_urls = set( serialisable_current_gallery_stream_identifier_found_urls )
|
|
|
|
self._pending_gallery_stream_identifiers = [ HydrusSerialisable.CreateFromSerialisableTuple( serialisable_pending_gallery_stream_identifier ) for serialisable_pending_gallery_stream_identifier in serialisable_pending_gallery_stream_identifier ]
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
self._import_tag_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_options )
|
|
self._seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_seed_cache )
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
do_wait = True
|
|
|
|
url = self._seed_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if url is None:
|
|
|
|
return
|
|
|
|
|
|
def network_job_factory( method, url, **kwargs ):
|
|
|
|
network_job = ClientNetworking.NetworkJobDownloaderQueryTemporary( page_key, method, url, **kwargs )
|
|
|
|
wx.CallAfter( self._download_control_file_set, network_job )
|
|
|
|
return network_job
|
|
|
|
|
|
gallery = ClientDownloading.GetGallery( self._gallery_identifier )
|
|
|
|
gallery.SetNetworkJobFactory( network_job_factory )
|
|
|
|
try:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'reviewing file'
|
|
|
|
|
|
( status, hash ) = HG.client_controller.Read( 'url_status', url )
|
|
|
|
if status == CC.STATUS_DELETED:
|
|
|
|
if not self._import_file_options.GetExcludeDeleted():
|
|
|
|
status = CC.STATUS_NEW
|
|
|
|
|
|
|
|
downloaded_tags = []
|
|
|
|
if status == CC.STATUS_REDUNDANT:
|
|
|
|
if self._get_tags_if_url_known_and_file_redundant and self._import_tag_options.InterestedInTags():
|
|
|
|
downloaded_tags = gallery.GetTags( url )
|
|
|
|
else:
|
|
|
|
do_wait = False
|
|
|
|
|
|
elif status == CC.STATUS_NEW:
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'downloading file'
|
|
|
|
|
|
if self._import_tag_options.InterestedInTags():
|
|
|
|
downloaded_tags = gallery.GetFileAndTags( temp_path, url )
|
|
|
|
else:
|
|
|
|
gallery.GetFile( temp_path, url )
|
|
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
client_files_manager = HG.client_controller.client_files_manager
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'importing file'
|
|
|
|
|
|
( status, hash ) = client_files_manager.ImportFile( file_import_job )
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
else:
|
|
|
|
do_wait = False
|
|
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( hash, ( url, ) ) ) ] }
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status )
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
service_keys_to_content_updates = self._import_tag_options.GetServiceKeysToContentUpdates( hash, downloaded_tags )
|
|
|
|
if len( service_keys_to_content_updates ) > 0:
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
( media_result, ) = HG.client_controller.Read( 'media_results', ( hash, ) )
|
|
|
|
HG.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
|
|
except HydrusExceptions.CancelledException:
|
|
|
|
status = CC.STATUS_SKIPPED
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status )
|
|
|
|
except HydrusExceptions.MimeException as e:
|
|
|
|
status = CC.STATUS_UNINTERESTING_MIME
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status )
|
|
|
|
except Exception as e:
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status, exception = e )
|
|
|
|
finally:
|
|
|
|
wx.CallAfter( self._download_control_file_clear )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = ''
|
|
|
|
|
|
|
|
def _WorkOnGallery( self, page_key ):
|
|
|
|
with self._lock:
|
|
|
|
if self._current_query is None:
|
|
|
|
if len( self._pending_queries ) == 0:
|
|
|
|
self._gallery_status = ''
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self._current_query = self._pending_queries.pop( 0 )
|
|
self._current_query_num_urls = 0
|
|
|
|
self._current_gallery_stream_identifier = None
|
|
self._pending_gallery_stream_identifiers = list( self._gallery_stream_identifiers )
|
|
|
|
|
|
|
|
if self._current_gallery_stream_identifier is None:
|
|
|
|
if len( self._pending_gallery_stream_identifiers ) == 0:
|
|
|
|
self._gallery_status = self._current_query + ' produced ' + HydrusData.ConvertIntToPrettyString( self._current_query_num_urls ) + ' urls'
|
|
|
|
self._current_query = None
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self._current_gallery_stream_identifier = self._pending_gallery_stream_identifiers.pop( 0 )
|
|
self._current_gallery_stream_identifier_page_index = 0
|
|
self._current_gallery_stream_identifier_found_urls = set()
|
|
|
|
|
|
|
|
def network_job_factory( method, url, **kwargs ):
|
|
|
|
network_job = ClientNetworking.NetworkJobDownloaderQueryTemporary( page_key, method, url, **kwargs )
|
|
|
|
network_job.OverrideBandwidth()
|
|
|
|
wx.CallAfter( self._download_control_gallery_set, network_job )
|
|
|
|
return network_job
|
|
|
|
|
|
gallery = ClientDownloading.GetGallery( self._current_gallery_stream_identifier )
|
|
|
|
gallery.SetNetworkJobFactory( network_job_factory )
|
|
|
|
query = self._current_query
|
|
page_index = self._current_gallery_stream_identifier_page_index
|
|
|
|
self._gallery_status = HydrusData.ConvertIntToPrettyString( self._current_query_num_urls ) + ' urls found, now checking page ' + HydrusData.ConvertIntToPrettyString( self._current_gallery_stream_identifier_page_index + 1 )
|
|
|
|
|
|
error_occured = False
|
|
|
|
try:
|
|
|
|
( page_of_urls, definitely_no_more_pages ) = gallery.GetPage( query, page_index )
|
|
|
|
with self._lock:
|
|
|
|
no_urls_found = len( page_of_urls ) == 0
|
|
no_new_urls = len( self._current_gallery_stream_identifier_found_urls.intersection( page_of_urls ) ) == len( page_of_urls )
|
|
|
|
if definitely_no_more_pages or no_urls_found or no_new_urls:
|
|
|
|
self._current_gallery_stream_identifier = None
|
|
|
|
else:
|
|
|
|
self._current_gallery_stream_identifier_page_index += 1
|
|
self._current_gallery_stream_identifier_found_urls.update( page_of_urls )
|
|
|
|
|
|
|
|
new_urls = []
|
|
|
|
for url in page_of_urls:
|
|
|
|
if not self._seed_cache.HasSeed( url ):
|
|
|
|
with self._lock:
|
|
|
|
if self._file_limit is not None and self._current_query_num_urls + 1 > self._file_limit:
|
|
|
|
self._current_gallery_stream_identifier = None
|
|
|
|
self._pending_gallery_stream_identifiers = []
|
|
|
|
break
|
|
|
|
|
|
self._current_query_num_urls += 1
|
|
|
|
|
|
new_urls.append( url )
|
|
|
|
|
|
|
|
self._seed_cache.AddSeeds( new_urls )
|
|
|
|
except Exception as e:
|
|
|
|
if isinstance( e, HydrusExceptions.CancelledException ):
|
|
|
|
text = 'cancelled'
|
|
|
|
elif isinstance( e, HydrusExceptions.NotFoundException ):
|
|
|
|
text = 'Gallery 404'
|
|
|
|
else:
|
|
|
|
text = HydrusData.ToUnicode( e )
|
|
|
|
HydrusData.DebugPrint( traceback.format_exc() )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._current_gallery_stream_identifier = None
|
|
|
|
self._gallery_status = text
|
|
|
|
|
|
time.sleep( 5 )
|
|
|
|
finally:
|
|
|
|
wx.CallAfter( self._download_control_gallery_clear )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._gallery_status = HydrusData.ConvertIntToPrettyString( self._current_query_num_urls ) + ' urls found so far for ' + query
|
|
|
|
|
|
|
|
def _THREADWorkOnFiles( self, page_key ):
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._files_paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
did_work = self._WorkOnFiles( page_key )
|
|
|
|
if did_work:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
time.sleep( 1 )
|
|
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def _THREADWorkOnGallery( self, page_key ):
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._gallery_paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
self._WorkOnGallery( page_key )
|
|
|
|
time.sleep( 5 )
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def AdvanceQuery( self, query ):
|
|
|
|
with self._lock:
|
|
|
|
if query in self._pending_queries:
|
|
|
|
index = self._pending_queries.index( query )
|
|
|
|
if index - 1 >= 0:
|
|
|
|
self._pending_queries.remove( query )
|
|
|
|
self._pending_queries.insert( index - 1, query )
|
|
|
|
|
|
|
|
|
|
|
|
def DelayQuery( self, query ):
|
|
|
|
with self._lock:
|
|
|
|
if query in self._pending_queries:
|
|
|
|
index = self._pending_queries.index( query )
|
|
|
|
if index + 1 < len( self._pending_queries ):
|
|
|
|
self._pending_queries.remove( query )
|
|
|
|
self._pending_queries.insert( index + 1, query )
|
|
|
|
|
|
|
|
|
|
|
|
def DeleteQuery( self, query ):
|
|
|
|
with self._lock:
|
|
|
|
if query in self._pending_queries:
|
|
|
|
self._pending_queries.remove( query )
|
|
|
|
|
|
|
|
|
|
def FinishCurrentQuery( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._current_query = None
|
|
self._gallery_paused = False
|
|
|
|
|
|
|
|
def GetGalleryIdentifier( self ):
|
|
|
|
return self._gallery_identifier
|
|
|
|
|
|
def GetOptions( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._import_file_options, self._import_tag_options, self._file_limit )
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._seed_cache
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
cancellable = self._current_query is not None
|
|
|
|
return ( list( self._pending_queries ), self._gallery_status, self._current_action, self._files_paused, self._gallery_paused, cancellable )
|
|
|
|
|
|
|
|
def GetTagsIfURLKnownAndFileRedundant( self ):
|
|
|
|
with self._lock:
|
|
|
|
return self._get_tags_if_url_known_and_file_redundant
|
|
|
|
|
|
|
|
def InvertGetTagsIfURLKnownAndFileRedundant( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._get_tags_if_url_known_and_file_redundant = not self._get_tags_if_url_known_and_file_redundant
|
|
|
|
|
|
|
|
def PausePlayFiles( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._files_paused = not self._files_paused
|
|
|
|
|
|
|
|
def PausePlayGallery( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._gallery_paused = not self._gallery_paused
|
|
|
|
|
|
|
|
def PendQuery( self, query ):
|
|
|
|
with self._lock:
|
|
|
|
if query not in self._pending_queries:
|
|
|
|
self._pending_queries.append( query )
|
|
|
|
|
|
|
|
|
|
def SetDownloadControls( self, file_download_control, gallery_download_control ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_control_file_set = file_download_control.SetNetworkJob
|
|
self._download_control_file_clear = file_download_control.ClearNetworkJob
|
|
|
|
self._download_control_gallery_set = gallery_download_control.SetNetworkJob
|
|
self._download_control_gallery_clear = gallery_download_control.ClearNetworkJob
|
|
|
|
|
|
|
|
def SetFileLimit( self, file_limit ):
|
|
|
|
with self._lock:
|
|
|
|
self._file_limit = file_limit
|
|
|
|
|
|
|
|
def SetImportFileOptions( self, import_file_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_file_options = import_file_options
|
|
|
|
|
|
|
|
def SetImportTagOptions( self, import_tag_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_tag_options = import_tag_options
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWorkOnGallery, args = ( page_key, ) ).start()
|
|
threading.Thread( target = self._THREADWorkOnFiles, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_IMPORT ] = GalleryImport
|
|
|
|
class HDDImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_HDD_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self, paths = None, import_file_options = None, paths_to_tags = None, delete_after_success = None ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
if paths is None:
|
|
|
|
self._paths_cache = None
|
|
|
|
else:
|
|
|
|
self._paths_cache = SeedCache()
|
|
|
|
self._paths_cache.AddSeeds( paths )
|
|
|
|
|
|
self._import_file_options = import_file_options
|
|
self._paths_to_tags = paths_to_tags
|
|
self._delete_after_success = delete_after_success
|
|
self._paused = False
|
|
|
|
self._seed_cache_status = ( 'initialising', ( 0, 1 ) )
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = self._paths_cache.GetSerialisableTuple()
|
|
serialisable_options = self._import_file_options.GetSerialisableTuple()
|
|
serialisable_paths_to_tags = { path : { service_key.encode( 'hex' ) : tags for ( service_key, tags ) in service_keys_to_tags.items() } for ( path, service_keys_to_tags ) in self._paths_to_tags.items() }
|
|
|
|
return ( serialisable_url_cache, serialisable_options, serialisable_paths_to_tags, self._delete_after_success, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_url_cache, serialisable_options, serialisable_paths_to_tags, self._delete_after_success, self._paused ) = serialisable_info
|
|
|
|
self._paths_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_options )
|
|
self._paths_to_tags = { path : { service_key.decode( 'hex' ) : tags for ( service_key, tags ) in service_keys_to_tags.items() } for ( path, service_keys_to_tags ) in serialisable_paths_to_tags.items() }
|
|
|
|
|
|
def _RegenerateSeedCacheStatus( self ):
|
|
|
|
new_seed_cache_status = self._paths_cache.GetStatus()
|
|
|
|
if self._seed_cache_status != new_seed_cache_status:
|
|
|
|
self._seed_cache_status = new_seed_cache_status
|
|
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
path = self._paths_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if path is None:
|
|
|
|
return False
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
if path in self._paths_to_tags:
|
|
|
|
service_keys_to_tags = self._paths_to_tags[ path ]
|
|
|
|
else:
|
|
|
|
service_keys_to_tags = {}
|
|
|
|
|
|
|
|
try:
|
|
|
|
if not os.path.exists( path ):
|
|
|
|
raise Exception( 'Source file does not exist!' )
|
|
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
copied = HydrusPaths.MirrorFile( path, temp_path )
|
|
|
|
if not copied:
|
|
|
|
raise Exception( 'File failed to copy--see log for error.' )
|
|
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
client_files_manager = HG.client_controller.client_files_manager
|
|
|
|
( status, hash ) = client_files_manager.ImportFile( file_import_job )
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
self._paths_cache.UpdateSeedStatus( path, status )
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
service_keys_to_content_updates = ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( { hash }, service_keys_to_tags )
|
|
|
|
if len( service_keys_to_content_updates ) > 0:
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
( media_result, ) = HG.client_controller.Read( 'media_results', ( hash, ) )
|
|
|
|
HG.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
if self._delete_after_success:
|
|
|
|
try:
|
|
|
|
ClientData.DeletePath( path )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'While attempting to delete ' + path + ', the following error occured:' )
|
|
HydrusData.ShowException( e )
|
|
|
|
|
|
txt_path = path + '.txt'
|
|
|
|
if os.path.exists( txt_path ):
|
|
|
|
try:
|
|
|
|
ClientData.DeletePath( txt_path )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'While attempting to delete ' + txt_path + ', the following error occured:' )
|
|
HydrusData.ShowException( e )
|
|
|
|
|
|
|
|
|
|
|
|
except HydrusExceptions.MimeException as e:
|
|
|
|
status = CC.STATUS_UNINTERESTING_MIME
|
|
|
|
self._paths_cache.UpdateSeedStatus( path, status )
|
|
|
|
except Exception as e:
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._paths_cache.UpdateSeedStatus( path, status, exception = e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
return True
|
|
|
|
|
|
def _THREADWork( self, page_key ):
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
did_work = self._WorkOnFiles( page_key )
|
|
|
|
if not did_work:
|
|
|
|
time.sleep( 1 )
|
|
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._paths_cache
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._seed_cache_status, self._paused )
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWork, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_HDD_IMPORT ] = HDDImport
|
|
|
|
class ImportFolder( HydrusSerialisable.SerialisableBaseNamed ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_IMPORT_FOLDER
|
|
SERIALISABLE_VERSION = 3
|
|
|
|
def __init__( self, name, path = '', import_file_options = None, import_tag_options = None, txt_parse_tag_service_keys = None, mimes = None, actions = None, action_locations = None, period = 3600, open_popup = True ):
|
|
|
|
if mimes is None:
|
|
|
|
mimes = HC.ALLOWED_MIMES
|
|
|
|
|
|
if import_file_options is None:
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
|
|
if import_tag_options is None:
|
|
|
|
new_options = HG.client_controller.GetNewOptions()
|
|
|
|
import_tag_options = new_options.GetDefaultImportTagOptions( ClientDownloading.GalleryIdentifier( HC.SITE_TYPE_DEFAULT ) )
|
|
|
|
|
|
if txt_parse_tag_service_keys is None:
|
|
|
|
txt_parse_tag_service_keys = []
|
|
|
|
|
|
if actions is None:
|
|
|
|
actions = {}
|
|
|
|
actions[ CC.STATUS_SUCCESSFUL ] = CC.IMPORT_FOLDER_DELETE
|
|
actions[ CC.STATUS_REDUNDANT ] = CC.IMPORT_FOLDER_DELETE
|
|
actions[ CC.STATUS_DELETED ] = CC.IMPORT_FOLDER_DELETE
|
|
actions[ CC.STATUS_FAILED ] = CC.IMPORT_FOLDER_IGNORE
|
|
|
|
|
|
if action_locations is None:
|
|
|
|
action_locations = {}
|
|
|
|
|
|
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
|
|
|
|
self._path = path
|
|
self._mimes = mimes
|
|
self._import_file_options = import_file_options
|
|
self._import_tag_options = import_tag_options
|
|
self._txt_parse_tag_service_keys = txt_parse_tag_service_keys
|
|
self._actions = actions
|
|
self._action_locations = action_locations
|
|
self._period = period
|
|
self._open_popup = open_popup
|
|
|
|
self._path_cache = SeedCache()
|
|
self._last_checked = 0
|
|
self._paused = False
|
|
|
|
|
|
def _ActionPaths( self ):
|
|
|
|
for status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT, CC.STATUS_DELETED, CC.STATUS_FAILED ):
|
|
|
|
action = self._actions[ status ]
|
|
|
|
if action == CC.IMPORT_FOLDER_DELETE:
|
|
|
|
while True:
|
|
|
|
path = self._path_cache.GetNextSeed( status )
|
|
|
|
if path is None or HG.view_shutdown:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
if os.path.exists( path ):
|
|
|
|
ClientData.DeletePath( path )
|
|
|
|
|
|
txt_path = path + '.txt'
|
|
|
|
if os.path.exists( txt_path ):
|
|
|
|
ClientData.DeletePath( txt_path )
|
|
|
|
|
|
self._path_cache.RemoveSeeds( ( path, ) )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'Import folder tried to delete ' + path + ', but could not:' )
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
HydrusData.ShowText( 'Import folder has been paused.' )
|
|
|
|
self._paused = True
|
|
|
|
return
|
|
|
|
|
|
|
|
elif action == CC.IMPORT_FOLDER_MOVE:
|
|
|
|
while True:
|
|
|
|
path = self._path_cache.GetNextSeed( status )
|
|
|
|
if path is None or HG.view_shutdown:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
if os.path.exists( path ):
|
|
|
|
dest_dir = self._action_locations[ status ]
|
|
|
|
filename = os.path.basename( path )
|
|
|
|
dest_path = os.path.join( dest_dir, filename )
|
|
|
|
dest_path = HydrusPaths.AppendPathUntilNoConflicts( dest_path )
|
|
|
|
HydrusPaths.MergeFile( path, dest_path )
|
|
|
|
|
|
txt_path = path + '.txt'
|
|
|
|
if os.path.exists( txt_path ):
|
|
|
|
dest_dir = self._action_locations[ status ]
|
|
|
|
txt_filename = os.path.basename( txt_path )
|
|
|
|
txt_dest_path = os.path.join( dest_dir, txt_filename )
|
|
|
|
txt_dest_path = HydrusPaths.AppendPathUntilNoConflicts( txt_dest_path )
|
|
|
|
HydrusPaths.MergeFile( txt_path, txt_dest_path )
|
|
|
|
|
|
self._path_cache.RemoveSeeds( ( path, ) )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'Import folder tried to move ' + path + ', but could not:' )
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
HydrusData.ShowText( 'Import folder has been paused.' )
|
|
|
|
self._paused = True
|
|
|
|
return
|
|
|
|
|
|
|
|
elif status == CC.IMPORT_FOLDER_IGNORE:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_import_file_options = self._import_file_options.GetSerialisableTuple()
|
|
serialisable_import_tag_options = self._import_tag_options.GetSerialisableTuple()
|
|
serialisable_txt_parse_tag_service_keys = [ service_key.encode( 'hex' ) for service_key in self._txt_parse_tag_service_keys ]
|
|
serialisable_path_cache = self._path_cache.GetSerialisableTuple()
|
|
|
|
# json turns int dict keys to strings
|
|
action_pairs = self._actions.items()
|
|
action_location_pairs = self._action_locations.items()
|
|
|
|
return ( self._path, self._mimes, serialisable_import_file_options, serialisable_import_tag_options, serialisable_txt_parse_tag_service_keys, action_pairs, action_location_pairs, self._period, self._open_popup, serialisable_path_cache, self._last_checked, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._path, self._mimes, serialisable_import_file_options, serialisable_import_tag_options, serialisable_txt_parse_service_keys, action_pairs, action_location_pairs, self._period, self._open_popup, serialisable_path_cache, self._last_checked, self._paused ) = serialisable_info
|
|
|
|
self._actions = dict( action_pairs )
|
|
self._action_locations = dict( action_location_pairs )
|
|
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_import_file_options )
|
|
self._import_tag_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_import_tag_options )
|
|
self._txt_parse_tag_service_keys = [ service_key.decode( 'hex' ) for service_key in serialisable_txt_parse_service_keys ]
|
|
self._path_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_path_cache )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
( path, mimes, serialisable_import_file_options, action_pairs, action_location_pairs, period, open_popup, tag, serialisable_path_cache, last_checked, paused ) = old_serialisable_info
|
|
|
|
service_keys_to_explicit_tags = {}
|
|
|
|
if tag is not None:
|
|
|
|
service_keys_to_explicit_tags[ CC.LOCAL_TAG_SERVICE_KEY ] = { tag }
|
|
|
|
|
|
import_tag_options = ClientData.ImportTagOptions( service_keys_to_explicit_tags = service_keys_to_explicit_tags )
|
|
|
|
serialisable_import_tag_options = import_tag_options.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( path, mimes, serialisable_import_file_options, serialisable_import_tag_options, action_pairs, action_location_pairs, period, open_popup, serialisable_path_cache, last_checked, paused )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
if version == 2:
|
|
|
|
( path, mimes, serialisable_import_file_options, serialisable_import_tag_options, action_pairs, action_location_pairs, period, open_popup, serialisable_path_cache, last_checked, paused ) = old_serialisable_info
|
|
|
|
serialisable_txt_parse_tag_service_keys = []
|
|
|
|
new_serialisable_info = ( path, mimes, serialisable_import_file_options, serialisable_import_tag_options, serialisable_txt_parse_tag_service_keys, action_pairs, action_location_pairs, period, open_popup, serialisable_path_cache, last_checked, paused )
|
|
|
|
return ( 3, new_serialisable_info )
|
|
|
|
|
|
|
|
def DoWork( self ):
|
|
|
|
if HG.view_shutdown:
|
|
|
|
return
|
|
|
|
|
|
if not self._paused and HydrusData.TimeHasPassed( self._last_checked + self._period ):
|
|
|
|
if os.path.exists( self._path ) and os.path.isdir( self._path ):
|
|
|
|
filenames = os.listdir( HydrusData.ToUnicode( self._path ) )
|
|
|
|
raw_paths = [ os.path.join( self._path, filename ) for filename in filenames ]
|
|
|
|
all_paths = ClientFiles.GetAllPaths( raw_paths )
|
|
|
|
all_paths = HydrusPaths.FilterFreePaths( all_paths )
|
|
|
|
new_paths = []
|
|
|
|
for path in all_paths:
|
|
|
|
if path.endswith( '.txt' ):
|
|
|
|
continue
|
|
|
|
|
|
if not self._path_cache.HasSeed( path ):
|
|
|
|
new_paths.append( path )
|
|
|
|
|
|
|
|
self._path_cache.AddSeeds( new_paths )
|
|
|
|
successful_hashes = set()
|
|
|
|
i = 0
|
|
|
|
while True:
|
|
|
|
path = self._path_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if path is None or HG.view_shutdown:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
mime = HydrusFileHandling.GetMime( path )
|
|
|
|
if mime in self._mimes:
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
copied = HydrusPaths.MirrorFile( path, temp_path )
|
|
|
|
if not copied:
|
|
|
|
raise Exception( 'File failed to copy--see log for error.' )
|
|
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
client_files_manager = HG.client_controller.client_files_manager
|
|
|
|
( status, hash ) = client_files_manager.ImportFile( file_import_job )
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
self._path_cache.UpdateSeedStatus( path, status )
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
downloaded_tags = []
|
|
|
|
service_keys_to_content_updates = self._import_tag_options.GetServiceKeysToContentUpdates( hash, downloaded_tags ) # explicit tags
|
|
|
|
if len( service_keys_to_content_updates ) > 0:
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
txt_path = path + '.txt'
|
|
|
|
if len( self._txt_parse_tag_service_keys ) > 0 and os.path.exists( txt_path ):
|
|
|
|
try:
|
|
|
|
with open( txt_path, 'rb' ) as f:
|
|
|
|
txt_tags_string = f.read()
|
|
|
|
|
|
txt_tags = [ HydrusData.ToUnicode( tag ) for tag in HydrusData.SplitByLinesep( txt_tags_string ) ]
|
|
|
|
if True in ( len( txt_tag ) > 1024 for txt_tag in txt_tags ):
|
|
|
|
HydrusData.ShowText( 'Tags were too long--I think this was not a regular text file!' )
|
|
|
|
raise Exception()
|
|
|
|
|
|
txt_tags = HydrusTags.CleanTags( txt_tags )
|
|
|
|
siblings_manager = HG.client_controller.GetManager( 'tag_siblings' )
|
|
|
|
service_keys_to_tags = { service_key : siblings_manager.CollapseTags( service_key, txt_tags ) for service_key in self._txt_parse_tag_service_keys }
|
|
|
|
service_keys_to_content_updates = ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( { hash }, service_keys_to_tags )
|
|
|
|
if len( service_keys_to_content_updates ) > 0:
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'Trying to load tags from the .txt file "' + txt_path + '" in the import folder "' + self._name + '" threw an error!' )
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
|
|
|
|
|
|
if status == CC.STATUS_SUCCESSFUL:
|
|
|
|
successful_hashes.add( hash )
|
|
|
|
|
|
else:
|
|
|
|
self._path_cache.UpdateSeedStatus( path, CC.STATUS_UNINTERESTING_MIME )
|
|
|
|
|
|
except Exception as e:
|
|
|
|
error_text = traceback.format_exc()
|
|
|
|
HydrusData.Print( 'A file failed to import from import folder ' + self._name + ':' )
|
|
|
|
self._path_cache.UpdateSeedStatus( path, CC.STATUS_FAILED, exception = e )
|
|
|
|
|
|
i += 1
|
|
|
|
if i % 100 == 0:
|
|
|
|
self._ActionPaths()
|
|
|
|
|
|
|
|
if len( successful_hashes ) > 0:
|
|
|
|
HydrusData.Print( 'Import folder ' + self._name + ' imported ' + HydrusData.ConvertIntToPrettyString( len( successful_hashes ) ) + ' files.' )
|
|
|
|
if self._open_popup:
|
|
|
|
job_key = ClientThreading.JobKey()
|
|
|
|
job_key.SetVariable( 'popup_title', 'import folder - ' + self._name )
|
|
job_key.SetVariable( 'popup_files', successful_hashes )
|
|
|
|
HG.client_controller.pub( 'message', job_key )
|
|
|
|
|
|
|
|
self._ActionPaths()
|
|
|
|
|
|
self._last_checked = HydrusData.GetNow()
|
|
|
|
HG.client_controller.WriteSynchronous( 'serialisable', self )
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._path_cache
|
|
|
|
|
|
def ToListBoxTuple( self ):
|
|
|
|
return ( self._name, self._path, self._period )
|
|
|
|
|
|
def ToTuple( self ):
|
|
|
|
return ( self._name, self._path, self._mimes, self._import_file_options, self._import_tag_options, self._txt_parse_tag_service_keys, self._actions, self._action_locations, self._period, self._open_popup, self._paused )
|
|
|
|
|
|
def SetSeedCache( self, seed_cache ):
|
|
|
|
self._path_cache = seed_cache
|
|
|
|
|
|
def SetTuple( self, name, path, mimes, import_file_options, import_tag_options, txt_parse_tag_service_keys, actions, action_locations, period, open_popup, paused ):
|
|
|
|
if path != self._path:
|
|
|
|
self._path_cache = SeedCache()
|
|
|
|
|
|
if set( mimes ) != set( self._mimes ):
|
|
|
|
self._path_cache.RemoveSeedsByStatus( CC.STATUS_UNINTERESTING_MIME )
|
|
|
|
|
|
self._name = name
|
|
self._path = path
|
|
self._mimes = mimes
|
|
self._import_file_options = import_file_options
|
|
self._import_tag_options = import_tag_options
|
|
self._txt_parse_tag_service_keys = txt_parse_tag_service_keys
|
|
self._actions = actions
|
|
self._action_locations = action_locations
|
|
self._period = period
|
|
self._open_popup = open_popup
|
|
self._paused = paused
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_IMPORT_FOLDER ] = ImportFolder
|
|
|
|
class PageOfImagesImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_PAGE_OF_IMAGES_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
self._pending_page_urls = []
|
|
self._urls_cache = SeedCache()
|
|
self._import_file_options = import_file_options
|
|
self._download_image_links = True
|
|
self._download_unlinked_images = False
|
|
self._paused = False
|
|
|
|
self._parser_status = ''
|
|
self._current_action = ''
|
|
|
|
self._download_control_file_set = None
|
|
self._download_control_file_clear = None
|
|
self._download_control_page_set = None
|
|
self._download_control_page_clear = None
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = self._urls_cache.GetSerialisableTuple()
|
|
serialisable_file_options = self._import_file_options.GetSerialisableTuple()
|
|
|
|
return ( self._pending_page_urls, serialisable_url_cache, serialisable_file_options, self._download_image_links, self._download_unlinked_images, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._pending_page_urls, serialisable_url_cache, serialisable_file_options, self._download_image_links, self._download_unlinked_images, self._paused ) = serialisable_info
|
|
|
|
self._urls_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
file_url = self._urls_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if file_url is None:
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'reviewing file'
|
|
|
|
|
|
( status, hash ) = HG.client_controller.Read( 'url_status', file_url )
|
|
|
|
url_not_known_beforehand = status == CC.STATUS_NEW
|
|
|
|
if status == CC.STATUS_DELETED:
|
|
|
|
if not self._import_file_options.GetExcludeDeleted():
|
|
|
|
status = CC.STATUS_NEW
|
|
|
|
|
|
|
|
if status == CC.STATUS_NEW:
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'downloading file'
|
|
|
|
|
|
network_job = ClientNetworking.NetworkJob( 'GET', file_url, temp_path = temp_path )
|
|
|
|
try:
|
|
|
|
HG.client_controller.network_engine.AddJob( network_job )
|
|
|
|
with self._lock:
|
|
|
|
if self._download_control_file_set is not None:
|
|
|
|
wx.CallAfter( self._download_control_file_set, network_job )
|
|
|
|
|
|
|
|
while not network_job.IsDone():
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
|
|
finally:
|
|
|
|
if self._download_control_file_clear is not None:
|
|
|
|
wx.CallAfter( self._download_control_file_clear )
|
|
|
|
|
|
|
|
if HG.view_shutdown:
|
|
|
|
return
|
|
|
|
elif network_job.HasError():
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = network_job.GetErrorText() )
|
|
|
|
time.sleep( 2 )
|
|
|
|
elif network_job.IsCancelled():
|
|
|
|
status = CC.STATUS_SKIPPED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = 'cancelled during download!' )
|
|
|
|
else:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'importing file'
|
|
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
( status, hash ) = HG.client_controller.client_files_manager.ImportFile( file_import_job )
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
if url_not_known_beforehand and hash is not None:
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( hash, ( file_url, ) ) ) ] }
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
else:
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
( media_result, ) = HG.client_controller.Read( 'media_results', ( hash, ) )
|
|
|
|
HG.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
|
|
except HydrusExceptions.MimeException as e:
|
|
|
|
status = CC.STATUS_UNINTERESTING_MIME
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
except Exception as e:
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, exception = e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = ''
|
|
|
|
|
|
return True
|
|
|
|
|
|
def _WorkOnQueue( self, page_key ):
|
|
|
|
if len( self._pending_page_urls ) > 0:
|
|
|
|
with self._lock:
|
|
|
|
page_url = self._pending_page_urls.pop( 0 )
|
|
|
|
self._parser_status = 'checking ' + page_url
|
|
|
|
|
|
error_occurred = False
|
|
|
|
try:
|
|
|
|
network_job = ClientNetworking.NetworkJob( 'GET', page_url )
|
|
|
|
network_job.OverrideBandwidth()
|
|
|
|
try:
|
|
|
|
HG.client_controller.network_engine.AddJob( network_job )
|
|
|
|
with self._lock:
|
|
|
|
if self._download_control_page_set is not None:
|
|
|
|
wx.CallAfter( self._download_control_page_set, network_job )
|
|
|
|
|
|
|
|
while not network_job.IsDone():
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
|
|
finally:
|
|
|
|
if self._download_control_page_clear is not None:
|
|
|
|
wx.CallAfter( self._download_control_page_clear )
|
|
|
|
|
|
|
|
if HG.view_shutdown:
|
|
|
|
raise HydrusExceptions.ShutdownException()
|
|
|
|
elif network_job.HasError():
|
|
|
|
e = network_job.GetErrorException()
|
|
|
|
raise e
|
|
|
|
elif network_job.IsCancelled():
|
|
|
|
raise Exception( 'Page download cancelled!' )
|
|
|
|
else:
|
|
|
|
html = network_job.GetContent()
|
|
|
|
soup = ClientDownloading.GetSoup( html )
|
|
|
|
#
|
|
|
|
all_links = soup.find_all( 'a' )
|
|
|
|
links_with_images = [ link for link in all_links if len( link.find_all( 'img' ) ) > 0 ]
|
|
|
|
all_linked_images = []
|
|
|
|
for link in all_links:
|
|
|
|
images = link.find_all( 'img' )
|
|
|
|
all_linked_images.extend( images )
|
|
|
|
|
|
all_images = soup.find_all( 'img' )
|
|
|
|
unlinked_images = [ image for image in all_images if image not in all_linked_images ]
|
|
|
|
#
|
|
|
|
file_urls = []
|
|
|
|
if self._download_image_links:
|
|
|
|
file_urls.extend( [ urlparse.urljoin( page_url, link[ 'href' ] ) for link in links_with_images if link.has_attr( 'href' ) ] )
|
|
|
|
|
|
if self._download_unlinked_images:
|
|
|
|
file_urls.extend( [ urlparse.urljoin( page_url, image[ 'src' ] ) for image in unlinked_images if image.has_attr( 'src' ) ] )
|
|
|
|
|
|
new_urls = [ file_url for file_url in file_urls if not self._urls_cache.HasSeed( file_url ) ]
|
|
|
|
num_new = len( new_urls )
|
|
|
|
self._urls_cache.AddSeeds( new_urls )
|
|
|
|
parser_status = 'page checked OK - ' + HydrusData.ConvertIntToPrettyString( num_new ) + ' new urls'
|
|
|
|
|
|
except HydrusExceptions.NotFoundException:
|
|
|
|
error_occurred = True
|
|
|
|
parser_status = 'page 404'
|
|
|
|
except Exception as e:
|
|
|
|
error_occurred = True
|
|
|
|
parser_status = HydrusData.ToUnicode( e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._parser_status = parser_status
|
|
|
|
|
|
if error_occurred:
|
|
|
|
time.sleep( 5 )
|
|
|
|
|
|
with self._lock:
|
|
|
|
if len( self._pending_page_urls ) == 0:
|
|
|
|
self._parser_status = ''
|
|
|
|
|
|
|
|
|
|
|
|
def _THREADWorkOnFiles( self, page_key ):
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
did_work = self._WorkOnFiles( page_key )
|
|
|
|
if did_work:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
time.sleep( 1 )
|
|
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def _THREADWorkOnQueue( self, page_key ):
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
self._WorkOnQueue( page_key )
|
|
|
|
time.sleep( 5 )
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def AdvancePageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url in self._pending_page_urls:
|
|
|
|
index = self._pending_page_urls.index( page_url )
|
|
|
|
if index - 1 >= 0:
|
|
|
|
self._pending_page_urls.remove( page_url )
|
|
|
|
self._pending_page_urls.insert( index - 1, page_url )
|
|
|
|
|
|
|
|
|
|
|
|
def CurrentlyWorking( self ):
|
|
|
|
with self._lock:
|
|
|
|
finished = not self._urls_cache.WorkToDo() or len( self._pending_page_urls ) > 0
|
|
|
|
return not finished and not self._paused
|
|
|
|
|
|
|
|
def DelayPageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url in self._pending_page_urls:
|
|
|
|
index = self._pending_page_urls.index( page_url )
|
|
|
|
if index + 1 < len( self._pending_page_urls ):
|
|
|
|
self._pending_page_urls.remove( page_url )
|
|
|
|
self._pending_page_urls.insert( index + 1, page_url )
|
|
|
|
|
|
|
|
|
|
|
|
def DeletePageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url in self._pending_page_urls:
|
|
|
|
self._pending_page_urls.remove( page_url )
|
|
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._urls_cache
|
|
|
|
|
|
def GetOptions( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._import_file_options, self._download_image_links, self._download_unlinked_images )
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( list( self._pending_page_urls ), self._parser_status, self._current_action, self._paused )
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def PendPageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url not in self._pending_page_urls:
|
|
|
|
self._pending_page_urls.append( page_url )
|
|
|
|
|
|
|
|
|
|
def SetDownloadControlFile( self, download_control ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_control_file_set = download_control.SetNetworkJob
|
|
self._download_control_file_clear = download_control.ClearNetworkJob
|
|
|
|
|
|
|
|
def SetDownloadControlPage( self, download_control ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_control_page_set = download_control.SetNetworkJob
|
|
self._download_control_page_clear = download_control.ClearNetworkJob
|
|
|
|
|
|
|
|
def SetDownloadImageLinks( self, value ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_image_links = value
|
|
|
|
|
|
|
|
def SetDownloadUnlinkedImages( self, value ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_unlinked_images = value
|
|
|
|
|
|
|
|
def SetImportFileOptions( self, import_file_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_file_options = import_file_options
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWorkOnQueue, args = ( page_key, ) ).start()
|
|
threading.Thread( target = self._THREADWorkOnFiles, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_PAGE_OF_IMAGES_IMPORT ] = PageOfImagesImport
|
|
|
|
class SeedCache( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_SEED_CACHE
|
|
SERIALISABLE_VERSION = 5
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
self._seeds_ordered = []
|
|
self._seeds_to_info = {}
|
|
|
|
self._seed_cache_key = HydrusData.GenerateKey()
|
|
|
|
self._status_cache = None
|
|
|
|
self._dirty = True
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def __len__( self ):
|
|
|
|
return len( self._seeds_to_info )
|
|
|
|
|
|
def _GenerateStatus( self ):
|
|
|
|
statuses_to_counts = collections.Counter()
|
|
|
|
for seed_info in self._seeds_to_info.values():
|
|
|
|
statuses_to_counts[ seed_info[ 'status' ] ] += 1
|
|
|
|
|
|
num_successful = statuses_to_counts[ CC.STATUS_SUCCESSFUL ]
|
|
num_failed = statuses_to_counts[ CC.STATUS_FAILED ]
|
|
num_deleted = statuses_to_counts[ CC.STATUS_DELETED ]
|
|
num_redundant = statuses_to_counts[ CC.STATUS_REDUNDANT ]
|
|
num_unknown = statuses_to_counts[ CC.STATUS_UNKNOWN ]
|
|
|
|
status_strings = []
|
|
|
|
if num_successful > 0: status_strings.append( str( num_successful ) + ' successful' )
|
|
if num_failed > 0: status_strings.append( str( num_failed ) + ' failed' )
|
|
if num_deleted > 0: status_strings.append( str( num_deleted ) + ' previously deleted' )
|
|
if num_redundant > 0: status_strings.append( str( num_redundant ) + ' already in db' )
|
|
|
|
status = ', '.join( status_strings )
|
|
|
|
total_processed = len( self._seeds_ordered ) - num_unknown
|
|
total = len( self._seeds_ordered )
|
|
|
|
self._status_cache = ( status, ( total_processed, total ) )
|
|
|
|
self._dirty = False
|
|
|
|
|
|
def _GetSeedTuple( self, seed ):
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
status = seed_info[ 'status' ]
|
|
added_timestamp = seed_info[ 'added_timestamp' ]
|
|
last_modified_timestamp = seed_info[ 'last_modified_timestamp' ]
|
|
note = seed_info[ 'note' ]
|
|
|
|
return ( seed, status, added_timestamp, last_modified_timestamp, note )
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
with self._lock:
|
|
|
|
serialisable_info = []
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
serialisable_info.append( ( seed, seed_info ) )
|
|
|
|
|
|
return serialisable_info
|
|
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
with self._lock:
|
|
|
|
for ( seed, seed_info ) in serialisable_info:
|
|
|
|
self._seeds_ordered.append( seed )
|
|
|
|
self._seeds_to_info[ seed ] = seed_info
|
|
|
|
|
|
|
|
|
|
def _SetDirty( self ):
|
|
|
|
self._dirty = True
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
new_serialisable_info = []
|
|
|
|
for ( seed, seed_info ) in old_serialisable_info:
|
|
|
|
if 'note' in seed_info:
|
|
|
|
seed_info[ 'note' ] = HydrusData.ToUnicode( seed_info[ 'note' ] )
|
|
|
|
|
|
new_serialisable_info.append( ( seed, seed_info ) )
|
|
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
if version in ( 2, 3 ):
|
|
|
|
# gelbooru replaced their thumbnail links with this redirect spam
|
|
# 'https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgucGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MzY4ODA1OA=='
|
|
|
|
# I missed some http ones here, so I've broadened the test and rescheduled it
|
|
|
|
new_serialisable_info = []
|
|
|
|
for ( seed, seed_info ) in old_serialisable_info:
|
|
|
|
if 'gelbooru.com/redirect.php' in seed:
|
|
|
|
continue
|
|
|
|
|
|
new_serialisable_info.append( ( seed, seed_info ) )
|
|
|
|
|
|
return ( 4, new_serialisable_info )
|
|
|
|
|
|
if version == 4:
|
|
|
|
def ConvertRegularToRawURL( regular_url ):
|
|
|
|
# convert this:
|
|
# http://68.media.tumblr.com/5af0d991f26ef9fdad5a0c743fb1eca2/tumblr_opl012ZBOu1tiyj7vo1_500.jpg
|
|
# to this:
|
|
# http://68.media.tumblr.com/5af0d991f26ef9fdad5a0c743fb1eca2/tumblr_opl012ZBOu1tiyj7vo1_raw.jpg
|
|
# the 500 part can be a bunch of stuff, including letters
|
|
|
|
url_components = regular_url.split( '_' )
|
|
|
|
last_component = url_components[ -1 ]
|
|
|
|
( number_gubbins, file_ext ) = last_component.split( '.' )
|
|
|
|
raw_last_component = 'raw.' + file_ext
|
|
|
|
url_components[ -1 ] = raw_last_component
|
|
|
|
raw_url = '_'.join( url_components )
|
|
|
|
return raw_url
|
|
|
|
|
|
def Remove68Subdomain( long_url ):
|
|
|
|
# sometimes the 68 subdomain gives a 404 on the raw url, so:
|
|
|
|
# convert this:
|
|
# http://68.media.tumblr.com/5af0d991f26ef9fdad5a0c743fb1eca2/tumblr_opl012ZBOu1tiyj7vo1_raw.jpg
|
|
# to this:
|
|
# http://media.tumblr.com/5af0d991f26ef9fdad5a0c743fb1eca2/tumblr_opl012ZBOu1tiyj7vo1_raw.jpg
|
|
|
|
# I am not sure if it is always 68, but let's not assume
|
|
|
|
( scheme, rest ) = long_url.split( '://', 1 )
|
|
|
|
if rest.startswith( 'media.tumblr.com' ):
|
|
|
|
return long_url
|
|
|
|
|
|
( gumpf, shorter_rest ) = rest.split( '.', 1 )
|
|
|
|
shorter_url = scheme + '://' + shorter_rest
|
|
|
|
return shorter_url
|
|
|
|
|
|
new_serialisable_info = []
|
|
|
|
good_seeds = set()
|
|
|
|
for ( seed, seed_info ) in old_serialisable_info:
|
|
|
|
try:
|
|
|
|
parse = urlparse.urlparse( seed )
|
|
|
|
if 'media.tumblr.com' in parse.netloc:
|
|
|
|
seed = Remove68Subdomain( seed )
|
|
|
|
seed = ConvertRegularToRawURL( seed )
|
|
|
|
seed = ClientData.ConvertHTTPToHTTPS( seed )
|
|
|
|
|
|
if 'pixiv.net' in parse.netloc:
|
|
|
|
seed = ClientData.ConvertHTTPToHTTPS( seed )
|
|
|
|
|
|
if seed in good_seeds: # we hit a dupe, so skip it
|
|
|
|
continue
|
|
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
good_seeds.add( seed )
|
|
|
|
new_serialisable_info.append( ( seed, seed_info ) )
|
|
|
|
|
|
return ( 5, new_serialisable_info )
|
|
|
|
|
|
|
|
def AddSeeds( self, seeds ):
|
|
|
|
if len( seeds ) == 0:
|
|
|
|
return
|
|
|
|
|
|
with self._lock:
|
|
|
|
for seed in seeds:
|
|
|
|
if seed.startswith( 'http' ):
|
|
|
|
search_seeds = ClientData.GetSearchURLs( seed )
|
|
|
|
else:
|
|
|
|
search_seeds = [ seed ]
|
|
|
|
|
|
for search_seed in search_seeds:
|
|
|
|
if search_seed in self._seeds_to_info:
|
|
|
|
continue
|
|
|
|
|
|
|
|
self._seeds_ordered.append( seed )
|
|
|
|
now = HydrusData.GetNow()
|
|
|
|
seed_info = {}
|
|
|
|
seed_info[ 'status' ] = CC.STATUS_UNKNOWN
|
|
seed_info[ 'added_timestamp' ] = now
|
|
seed_info[ 'last_modified_timestamp' ] = now
|
|
seed_info[ 'note' ] = ''
|
|
|
|
self._seeds_to_info[ seed ] = seed_info
|
|
|
|
|
|
self._SetDirty()
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, seeds )
|
|
|
|
|
|
def AdvanceSeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
index = self._seeds_ordered.index( seed )
|
|
|
|
if index > 0:
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
self._seeds_ordered.insert( index - 1, seed )
|
|
|
|
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, ( seed, ) )
|
|
|
|
|
|
def DelaySeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
index = self._seeds_ordered.index( seed )
|
|
|
|
if index < len( self._seeds_ordered ) - 1:
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
self._seeds_ordered.insert( index + 1, seed )
|
|
|
|
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, ( seed, ) )
|
|
|
|
|
|
def GetNextSeed( self, status ):
|
|
|
|
with self._lock:
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
if seed_info[ 'status' ] == status:
|
|
|
|
return seed
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
def GetSeedCacheKey( self ):
|
|
|
|
return self._seed_cache_key
|
|
|
|
|
|
def GetSeedCount( self, status = None ):
|
|
|
|
result = 0
|
|
|
|
with self._lock:
|
|
|
|
if status is None:
|
|
|
|
result = len( self._seeds_ordered )
|
|
|
|
else:
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
if seed_info[ 'status' ] == status:
|
|
|
|
result += 1
|
|
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
def GetSeeds( self, status = None ):
|
|
|
|
with self._lock:
|
|
|
|
if status is None:
|
|
|
|
return list( self._seeds_ordered )
|
|
|
|
else:
|
|
|
|
seeds = []
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
if seed_info[ 'status' ] == status:
|
|
|
|
seeds.append( seed )
|
|
|
|
|
|
|
|
return seeds
|
|
|
|
|
|
|
|
|
|
def GetSeedsWithInfo( self ):
|
|
|
|
with self._lock:
|
|
|
|
all_info = []
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_tuple = self._GetSeedTuple( seed )
|
|
|
|
all_info.append( seed_tuple )
|
|
|
|
|
|
return all_info
|
|
|
|
|
|
|
|
def GetSeedInfo( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
return self._GetSeedTuple( seed )
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
if self._dirty:
|
|
|
|
self._GenerateStatus()
|
|
|
|
|
|
return self._status_cache
|
|
|
|
|
|
|
|
def HasSeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed.startswith( 'http' ):
|
|
|
|
search_seeds = ClientData.GetSearchURLs( seed )
|
|
|
|
else:
|
|
|
|
search_seeds = [ seed ]
|
|
|
|
|
|
for search_seed in search_seeds:
|
|
|
|
if search_seed in self._seeds_to_info:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
def RemoveSeeds( self, seeds ):
|
|
|
|
with self._lock:
|
|
|
|
for seed in seeds:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
del self._seeds_to_info[ seed ]
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
|
|
|
|
self._SetDirty()
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, seeds )
|
|
|
|
|
|
def RemoveSeedsByStatus( self, status ):
|
|
|
|
with self._lock:
|
|
|
|
seeds_to_delete = set()
|
|
|
|
for ( seed, seed_info ) in self._seeds_to_info.items():
|
|
|
|
if seed_info[ 'status' ] == status:
|
|
|
|
seeds_to_delete.add( seed )
|
|
|
|
|
|
|
|
for seed in seeds_to_delete:
|
|
|
|
del self._seeds_to_info[ seed ]
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
|
|
self._SetDirty()
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, seeds_to_delete )
|
|
|
|
|
|
def UpdateSeedStatus( self, seed, status, note = '', exception = None ):
|
|
|
|
with self._lock:
|
|
|
|
if exception is not None:
|
|
|
|
first_line = HydrusData.ToUnicode( exception ).split( os.linesep )[0]
|
|
|
|
note = first_line + u'\u2026 (Copy note to see full error)'
|
|
note += os.linesep
|
|
note += HydrusData.ToUnicode( traceback.format_exc() )
|
|
|
|
HydrusData.Print( 'Error when processing ' + seed + '!' )
|
|
HydrusData.Print( traceback.format_exc() )
|
|
|
|
|
|
note = HydrusData.ToUnicode( note )
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
seed_info[ 'status' ] = status
|
|
seed_info[ 'last_modified_timestamp' ] = HydrusData.GetNow()
|
|
seed_info[ 'note' ] = note
|
|
|
|
self._SetDirty()
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, ( seed, ) )
|
|
|
|
|
|
def UpdateSeedsStatus( self, seeds, status ):
|
|
|
|
with self._lock:
|
|
|
|
for seed in seeds:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
seed_info[ 'status' ] = status
|
|
seed_info[ 'last_modified_timestamp' ] = HydrusData.GetNow()
|
|
seed_info[ 'note' ] = ''
|
|
|
|
|
|
self._SetDirty()
|
|
|
|
|
|
HG.client_controller.pub( 'seed_cache_seeds_updated', self._seed_cache_key, seeds )
|
|
|
|
|
|
def WorkToDo( self ):
|
|
|
|
with self._lock:
|
|
|
|
if self._dirty:
|
|
|
|
self._GenerateStatus()
|
|
|
|
|
|
( status, ( total_processed, total ) ) = self._status_cache
|
|
|
|
return total_processed < total
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_SEED_CACHE ] = SeedCache
|
|
|
|
class Subscription( HydrusSerialisable.SerialisableBaseNamed ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_SUBSCRIPTION
|
|
SERIALISABLE_VERSION = 2
|
|
|
|
def __init__( self, name ):
|
|
|
|
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
|
|
|
|
self._gallery_identifier = ClientDownloading.GalleryIdentifier( HC.SITE_TYPE_DEVIANT_ART )
|
|
|
|
self._gallery_stream_identifiers = ClientDownloading.GetGalleryStreamIdentifiers( self._gallery_identifier )
|
|
|
|
( namespaces, search_value ) = ClientDefaults.GetDefaultNamespacesAndSearchValue( self._gallery_identifier )
|
|
|
|
new_options = HG.client_controller.GetNewOptions()
|
|
|
|
self._query = search_value
|
|
self._period = 86400 * 7
|
|
self._get_tags_if_url_known_and_file_redundant = new_options.GetBoolean( 'get_tags_if_url_known_and_file_redundant' )
|
|
|
|
if HC.options[ 'gallery_file_limit' ] is None:
|
|
|
|
self._initial_file_limit = 200
|
|
|
|
else:
|
|
|
|
self._initial_file_limit = min( 200, HC.options[ 'gallery_file_limit' ] )
|
|
|
|
|
|
self._periodic_file_limit = 50
|
|
self._paused = False
|
|
|
|
self._import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
new_options = HG.client_controller.GetNewOptions()
|
|
|
|
self._import_tag_options = new_options.GetDefaultImportTagOptions( self._gallery_identifier )
|
|
|
|
self._last_checked = 0
|
|
self._last_error = 0
|
|
self._check_now = False
|
|
self._seed_cache = SeedCache()
|
|
|
|
|
|
def _GetErrorProhibitionTime( self ):
|
|
|
|
return self._last_error + HC.UPDATE_DURATION
|
|
|
|
|
|
def _GetNextPeriodicSyncTime( self ):
|
|
|
|
return self._last_checked + self._period
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_gallery_identifier = self._gallery_identifier.GetSerialisableTuple()
|
|
serialisable_gallery_stream_identifiers = [ gallery_stream_identifier.GetSerialisableTuple() for gallery_stream_identifier in self._gallery_stream_identifiers ]
|
|
serialisable_file_options = self._import_file_options.GetSerialisableTuple()
|
|
serialisable_tag_options = self._import_tag_options.GetSerialisableTuple()
|
|
serialisable_seed_cache = self._seed_cache.GetSerialisableTuple()
|
|
|
|
return ( serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, self._query, self._period, self._get_tags_if_url_known_and_file_redundant, self._initial_file_limit, self._periodic_file_limit, self._paused, serialisable_file_options, serialisable_tag_options, self._last_checked, self._check_now, self._last_error, serialisable_seed_cache )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, self._query, self._period, self._get_tags_if_url_known_and_file_redundant, self._initial_file_limit, self._periodic_file_limit, self._paused, serialisable_file_options, serialisable_tag_options, self._last_checked, self._check_now, self._last_error, serialisable_seed_cache ) = serialisable_info
|
|
|
|
self._gallery_identifier = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_identifier )
|
|
self._gallery_stream_identifiers = [ HydrusSerialisable.CreateFromSerialisableTuple( serialisable_gallery_stream_identifier ) for serialisable_gallery_stream_identifier in serialisable_gallery_stream_identifiers ]
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
self._import_tag_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_options )
|
|
self._seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_seed_cache )
|
|
|
|
|
|
def _NoRecentErrors( self ):
|
|
|
|
return HydrusData.TimeHasPassed( self._GetErrorProhibitionTime() )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
( serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, query, period, get_tags_if_url_known_and_file_redundant, initial_file_limit, periodic_file_limit, paused, serialisable_file_options, serialisable_tag_options, last_checked, last_error, serialisable_seed_cache ) = old_serialisable_info
|
|
|
|
check_now = False
|
|
|
|
new_serialisable_info = ( serialisable_gallery_identifier, serialisable_gallery_stream_identifiers, query, period, get_tags_if_url_known_and_file_redundant, initial_file_limit, periodic_file_limit, paused, serialisable_file_options, serialisable_tag_options, last_checked, check_now, last_error, serialisable_seed_cache )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
|
|
def _WorkOnFiles( self, job_key ):
|
|
|
|
error_count = 0
|
|
|
|
num_urls = self._seed_cache.GetSeedCount()
|
|
|
|
successful_hashes = set()
|
|
|
|
def network_job_factory( method, url, **kwargs ):
|
|
|
|
network_job = ClientNetworking.NetworkJobSubscriptionTemporary( self._name, method, url, **kwargs )
|
|
|
|
# this is prob actually a call to the job_key
|
|
#wx.CallAfter( self._download_control_set, network_job )
|
|
|
|
return network_job
|
|
|
|
|
|
gallery = ClientDownloading.GetGallery( self._gallery_identifier )
|
|
|
|
gallery.SetNetworkJobFactory( network_job_factory )
|
|
|
|
while True:
|
|
|
|
num_unknown = self._seed_cache.GetSeedCount( CC.STATUS_UNKNOWN )
|
|
num_done = num_urls - num_unknown
|
|
|
|
do_wait = True
|
|
|
|
url = self._seed_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if url is None:
|
|
|
|
break
|
|
|
|
|
|
p1 = HC.options[ 'pause_subs_sync' ]
|
|
p2 = job_key.IsCancelled()
|
|
p3 = HG.view_shutdown
|
|
|
|
example_nj = network_job_factory( 'GET', url )
|
|
|
|
p4 = not HG.client_controller.network_engine.bandwidth_manager.CanDoWork( example_nj.GetNetworkContexts() )
|
|
|
|
if p1 or p2 or p3 or p4:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
x_out_of_y = 'file ' + HydrusData.ConvertValueRangeToPrettyString( num_done, num_urls ) + ': '
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'checking url status' )
|
|
job_key.SetVariable( 'popup_gauge_1', ( num_done, num_urls ) )
|
|
|
|
( status, hash ) = HG.client_controller.Read( 'url_status', url )
|
|
|
|
if status == CC.STATUS_DELETED:
|
|
|
|
if not self._import_file_options.GetExcludeDeleted():
|
|
|
|
status = CC.STATUS_NEW
|
|
|
|
|
|
|
|
downloaded_tags = []
|
|
|
|
if status == CC.STATUS_REDUNDANT:
|
|
|
|
if self._get_tags_if_url_known_and_file_redundant and self._import_tag_options.InterestedInTags():
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'found file in db, fetching tags' )
|
|
|
|
downloaded_tags = gallery.GetTags( url )
|
|
|
|
else:
|
|
|
|
do_wait = False
|
|
|
|
|
|
elif status == CC.STATUS_NEW:
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'downloading file' )
|
|
|
|
if self._import_tag_options.InterestedInTags():
|
|
|
|
downloaded_tags = gallery.GetFileAndTags( temp_path, url )
|
|
|
|
else:
|
|
|
|
gallery.GetFile( temp_path, url )
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'importing file' )
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
( status, hash ) = HG.client_controller.client_files_manager.ImportFile( file_import_job )
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( hash, ( url, ) ) ) ] }
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
if status == CC.STATUS_SUCCESSFUL:
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'import successful' )
|
|
|
|
successful_hashes.add( hash )
|
|
|
|
elif status == CC.STATUS_DELETED:
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'previously deleted' )
|
|
|
|
elif status == CC.STATUS_REDUNDANT:
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'already in db' )
|
|
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
else:
|
|
|
|
do_wait = False
|
|
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status )
|
|
|
|
if hash is not None:
|
|
|
|
service_keys_to_content_updates = self._import_tag_options.GetServiceKeysToContentUpdates( hash, downloaded_tags )
|
|
|
|
if len( service_keys_to_content_updates ) > 0:
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
|
|
except HydrusExceptions.CancelledException:
|
|
|
|
break
|
|
|
|
except HydrusExceptions.MimeException as e:
|
|
|
|
status = CC.STATUS_UNINTERESTING_MIME
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status )
|
|
|
|
except Exception as e:
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
job_key.SetVariable( 'popup_text_1', x_out_of_y + 'file failed' )
|
|
|
|
self._seed_cache.UpdateSeedStatus( url, status, exception = e )
|
|
|
|
# DataMissing is a quick thing to avoid subscription abandons when lots of deleted files in e621 (or any other booru)
|
|
# this should be richer in any case in the new system
|
|
if not isinstance( e, HydrusExceptions.DataMissing ):
|
|
|
|
error_count += 1
|
|
|
|
time.sleep( 10 )
|
|
|
|
|
|
if error_count > 4:
|
|
|
|
raise Exception( 'The subscription ' + self._name + ' encountered several errors when downloading files, so it abandoned its sync.' )
|
|
|
|
|
|
|
|
if len( successful_hashes ) > 0:
|
|
|
|
job_key.SetVariable( 'popup_files', set( successful_hashes ) )
|
|
|
|
|
|
if do_wait:
|
|
|
|
ClientData.WaitPolitely()
|
|
|
|
|
|
|
|
job_key.DeleteVariable( 'popup_text_1' )
|
|
job_key.DeleteVariable( 'popup_gauge_1' )
|
|
job_key.DeleteVariable( 'popup_gauge_2' )
|
|
|
|
|
|
def _WorkOnFilesCanDoWork( self ):
|
|
|
|
def network_job_factory( method, url, **kwargs ):
|
|
|
|
network_job = ClientNetworking.NetworkJobSubscriptionTemporary( self._name, method, url, **kwargs )
|
|
|
|
# this is prob actually a call to the job_key
|
|
#wx.CallAfter( self._download_control_set, network_job )
|
|
|
|
return network_job
|
|
|
|
|
|
url = self._seed_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if url is None:
|
|
|
|
return False
|
|
|
|
|
|
example_nj = network_job_factory( 'GET', url )
|
|
|
|
if HG.client_controller.network_engine.bandwidth_manager.CanDoWork( example_nj.GetNetworkContexts() ):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _SyncQuery( self, job_key ):
|
|
|
|
if self._SyncQueryCanDoWork():
|
|
|
|
this_is_initial_sync = self._last_checked == 0
|
|
total_new_urls = 0
|
|
|
|
urls_to_add = set()
|
|
urls_to_add_ordered = []
|
|
|
|
prefix = 'synchronising gallery query'
|
|
|
|
job_key.SetVariable( 'popup_text_1', prefix )
|
|
|
|
for gallery_stream_identifier in self._gallery_stream_identifiers:
|
|
|
|
if this_is_initial_sync:
|
|
|
|
if self._initial_file_limit is not None and total_new_urls + 1 > self._initial_file_limit:
|
|
|
|
continue
|
|
|
|
|
|
else:
|
|
|
|
if self._periodic_file_limit is not None and total_new_urls + 1 > self._periodic_file_limit:
|
|
|
|
continue
|
|
|
|
|
|
|
|
p1 = HC.options[ 'pause_subs_sync' ]
|
|
p2 = job_key.IsCancelled()
|
|
p3 = HG.view_shutdown
|
|
|
|
if p1 or p2 or p3:
|
|
|
|
return
|
|
|
|
|
|
def network_job_factory( method, url, **kwargs ):
|
|
|
|
network_job = ClientNetworking.NetworkJobSubscriptionTemporary( self._name, method, url, **kwargs )
|
|
|
|
# this is prob actually a call to the job_key
|
|
#wx.CallAfter( self._download_control_set, network_job )
|
|
|
|
network_job.OverrideBandwidth()
|
|
|
|
return network_job
|
|
|
|
|
|
gallery = ClientDownloading.GetGallery( gallery_stream_identifier )
|
|
|
|
gallery.SetNetworkJobFactory( network_job_factory )
|
|
|
|
page_index = 0
|
|
keep_checking = True
|
|
|
|
while keep_checking:
|
|
|
|
new_urls_this_page = 0
|
|
|
|
try:
|
|
|
|
( page_of_urls, definitely_no_more_pages ) = gallery.GetPage( self._query, page_index )
|
|
|
|
page_index += 1
|
|
|
|
if definitely_no_more_pages:
|
|
|
|
keep_checking = False
|
|
|
|
|
|
for url in page_of_urls:
|
|
|
|
if this_is_initial_sync:
|
|
|
|
if self._initial_file_limit is not None and total_new_urls + 1 > self._initial_file_limit:
|
|
|
|
keep_checking = False
|
|
|
|
break
|
|
|
|
|
|
else:
|
|
|
|
if self._periodic_file_limit is not None and total_new_urls + 1 > self._periodic_file_limit:
|
|
|
|
keep_checking = False
|
|
|
|
break
|
|
|
|
|
|
|
|
if url in urls_to_add:
|
|
|
|
# this catches the occasional overflow when a new file is uploaded while gallery parsing is going on
|
|
|
|
continue
|
|
|
|
|
|
if self._seed_cache.HasSeed( url ):
|
|
|
|
keep_checking = False
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
urls_to_add.add( url )
|
|
urls_to_add_ordered.append( url )
|
|
|
|
new_urls_this_page += 1
|
|
total_new_urls += 1
|
|
|
|
|
|
|
|
except HydrusExceptions.CancelledException:
|
|
|
|
break
|
|
|
|
except HydrusExceptions.NotFoundException:
|
|
|
|
# paheal now 404s when no results, so just move on and naturally break
|
|
|
|
pass
|
|
|
|
|
|
if new_urls_this_page == 0:
|
|
|
|
keep_checking = False
|
|
|
|
|
|
job_key.SetVariable( 'popup_text_1', prefix + ': found ' + HydrusData.ConvertIntToPrettyString( total_new_urls ) + ' new urls' )
|
|
|
|
time.sleep( 5 )
|
|
|
|
|
|
|
|
self._last_checked = HydrusData.GetNow()
|
|
|
|
urls_to_add_ordered.reverse()
|
|
|
|
# 'first' urls are now at the end, so the seed_cache should stay roughly in oldest->newest order
|
|
|
|
new_urls = [ url for url in urls_to_add_ordered if not self._seed_cache.HasSeed( url ) ]
|
|
|
|
self._seed_cache.AddSeeds( new_urls )
|
|
|
|
|
|
|
|
def _SyncQueryCanDoWork( self ):
|
|
|
|
return HydrusData.TimeHasPassed( self._GetNextPeriodicSyncTime() ) or self._check_now
|
|
|
|
|
|
def CheckNow( self ):
|
|
|
|
self._check_now = True
|
|
|
|
|
|
def GetGalleryIdentifier( self ):
|
|
|
|
return self._gallery_identifier
|
|
|
|
|
|
def GetQuery( self ):
|
|
|
|
return self._query
|
|
|
|
|
|
def GetImportTagOptions( self ):
|
|
|
|
return self._import_tag_options
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._seed_cache
|
|
|
|
|
|
def PauseResume( self ):
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
def Reset( self ):
|
|
|
|
self._last_checked = 0
|
|
self._last_error = 0
|
|
self._seed_cache = SeedCache()
|
|
|
|
|
|
def SetTuple( self, gallery_identifier, gallery_stream_identifiers, query, period, get_tags_if_url_known_and_file_redundant, initial_file_limit, periodic_file_limit, paused, import_file_options, import_tag_options, last_checked, last_error, check_now, seed_cache ):
|
|
|
|
self._gallery_identifier = gallery_identifier
|
|
self._gallery_stream_identifiers = gallery_stream_identifiers
|
|
self._query = query
|
|
self._period = period
|
|
self._get_tags_if_url_known_and_file_redundant = get_tags_if_url_known_and_file_redundant
|
|
self._initial_file_limit = initial_file_limit
|
|
self._periodic_file_limit = periodic_file_limit
|
|
self._paused = paused
|
|
|
|
self._import_file_options = import_file_options
|
|
self._import_tag_options = import_tag_options
|
|
|
|
self._last_checked = last_checked
|
|
self._last_error = last_error
|
|
self._check_now = check_now
|
|
self._seed_cache = seed_cache
|
|
|
|
|
|
def Sync( self ):
|
|
|
|
p1 = not self._paused
|
|
p2 = not HG.view_shutdown
|
|
p3 = self._check_now
|
|
p4 = self._NoRecentErrors()
|
|
p5 = self._SyncQueryCanDoWork()
|
|
p6 = self._WorkOnFilesCanDoWork()
|
|
|
|
if p1 and p2 and ( p3 or p4 ) and ( p5 or p6 ):
|
|
|
|
job_key = ClientThreading.JobKey( pausable = False, cancellable = True )
|
|
|
|
try:
|
|
|
|
job_key.SetVariable( 'popup_title', 'subscriptions - ' + self._name )
|
|
|
|
HG.client_controller.pub( 'message', job_key )
|
|
|
|
self._SyncQuery( job_key )
|
|
|
|
self._WorkOnFiles( job_key )
|
|
|
|
self._last_error = 0
|
|
|
|
except HydrusExceptions.NetworkException as e:
|
|
|
|
HydrusData.Print( 'The subscription ' + self._name + ' encountered an exception when trying to sync:' )
|
|
HydrusData.PrintException( e )
|
|
|
|
job_key.SetVariable( 'popup_text_1', 'Encountered a network error, will retry again later' )
|
|
|
|
self._last_error = HydrusData.GetNow()
|
|
|
|
time.sleep( 5 )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'The subscription ' + self._name + ' encountered an exception when trying to sync:' )
|
|
HydrusData.ShowException( e )
|
|
|
|
self._last_error = HydrusData.GetNow()
|
|
|
|
finally:
|
|
|
|
self._check_now = False
|
|
|
|
|
|
HG.client_controller.WriteSynchronous( 'serialisable', self )
|
|
|
|
if job_key.HasVariable( 'popup_files' ):
|
|
|
|
job_key.Finish()
|
|
|
|
else:
|
|
|
|
job_key.Delete()
|
|
|
|
|
|
|
|
|
|
def ToTuple( self ):
|
|
|
|
return ( self._name, self._gallery_identifier, self._gallery_stream_identifiers, self._query, self._period, self._get_tags_if_url_known_and_file_redundant, self._initial_file_limit, self._periodic_file_limit, self._paused, self._import_file_options, self._import_tag_options, self._last_checked, self._last_error, self._check_now, self._seed_cache )
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_SUBSCRIPTION ] = Subscription
|
|
|
|
class ThreadWatcherImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_THREAD_WATCHER_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
MIN_CHECK_PERIOD = 30
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
( times_to_check, check_period ) = HC.options[ 'thread_checker_timings' ]
|
|
|
|
self._thread_url = ''
|
|
self._urls_cache = SeedCache()
|
|
self._urls_to_filenames = {}
|
|
self._urls_to_md5_base64 = {}
|
|
self._import_file_options = import_file_options
|
|
self._import_tag_options = ClientData.ImportTagOptions()
|
|
self._times_to_check = times_to_check
|
|
self._check_period = check_period
|
|
self._last_time_checked = 0
|
|
|
|
self._download_control_file_set = None
|
|
self._download_control_file_clear = None
|
|
self._download_control_thread_set = None
|
|
self._download_control_thread_clear = None
|
|
|
|
self._check_now = False
|
|
self._paused = False
|
|
|
|
self._watcher_status = ''
|
|
self._current_action = ''
|
|
|
|
self._thread_key = HydrusData.GenerateKey()
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = self._urls_cache.GetSerialisableTuple()
|
|
serialisable_file_options = self._import_file_options.GetSerialisableTuple()
|
|
serialisable_tag_options = self._import_tag_options.GetSerialisableTuple()
|
|
|
|
return ( self._thread_url, serialisable_url_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_file_options, serialisable_tag_options, self._times_to_check, self._check_period, self._last_time_checked, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._thread_url, serialisable_url_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_file_options, serialisable_tag_options, self._times_to_check, self._check_period, self._last_time_checked, self._paused ) = serialisable_info
|
|
|
|
self._urls_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
self._import_tag_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_options )
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
file_url = self._urls_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if file_url is None:
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'reviewing file'
|
|
|
|
|
|
file_original_filename = self._urls_to_filenames[ file_url ]
|
|
|
|
downloaded_tags = [ 'filename:' + file_original_filename ]
|
|
|
|
# we now do both url and md5 tests here because cloudflare was sometimes giving optimised versions of images, meaning the api's md5 was unreliable
|
|
# if someone set up a thread watcher of a thread they had previously watched, any optimised images would be redownloaded
|
|
|
|
( status, hash ) = HG.client_controller.Read( 'url_status', file_url )
|
|
|
|
url_not_known_beforehand = status == CC.STATUS_NEW
|
|
|
|
if status == CC.STATUS_NEW:
|
|
|
|
if file_url in self._urls_to_md5_base64:
|
|
|
|
file_md5_base64 = self._urls_to_md5_base64[ file_url ]
|
|
|
|
file_md5 = file_md5_base64.decode( 'base64' )
|
|
|
|
( status, hash ) = HG.client_controller.Read( 'md5_status', file_md5 )
|
|
|
|
|
|
|
|
if status == CC.STATUS_DELETED:
|
|
|
|
if not self._import_file_options.GetExcludeDeleted():
|
|
|
|
status = CC.STATUS_NEW
|
|
|
|
|
|
|
|
if status == CC.STATUS_NEW:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'downloading file'
|
|
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
network_job = ClientNetworking.NetworkJobThreadWatcher( self._thread_key, 'GET', file_url, temp_path = temp_path )
|
|
|
|
try:
|
|
|
|
HG.client_controller.network_engine.AddJob( network_job )
|
|
|
|
with self._lock:
|
|
|
|
if self._download_control_file_set is not None:
|
|
|
|
wx.CallAfter( self._download_control_file_set, network_job )
|
|
|
|
|
|
|
|
while not network_job.IsDone():
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
|
|
finally:
|
|
|
|
if self._download_control_file_clear is not None:
|
|
|
|
wx.CallAfter( self._download_control_file_clear )
|
|
|
|
|
|
|
|
if HG.view_shutdown:
|
|
|
|
return
|
|
|
|
elif network_job.HasError():
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = network_job.GetErrorText() )
|
|
|
|
time.sleep( 2 )
|
|
|
|
elif network_job.IsCancelled():
|
|
|
|
status = CC.STATUS_SKIPPED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = 'cancelled during download!' )
|
|
|
|
else:
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = 'importing file'
|
|
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
( status, hash ) = HG.client_controller.client_files_manager.ImportFile( file_import_job )
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
if url_not_known_beforehand and hash is not None:
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( hash, ( file_url, ) ) ) ] }
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
else:
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
with self._lock:
|
|
|
|
service_keys_to_content_updates = self._import_tag_options.GetServiceKeysToContentUpdates( hash, downloaded_tags )
|
|
|
|
|
|
if len( service_keys_to_content_updates ) > 0:
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
( media_result, ) = HG.client_controller.Read( 'media_results', ( hash, ) )
|
|
|
|
HG.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
|
|
except HydrusExceptions.MimeException as e:
|
|
|
|
status = CC.STATUS_UNINTERESTING_MIME
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
except Exception as e:
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, exception = e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._current_action = ''
|
|
|
|
|
|
return True
|
|
|
|
|
|
def _WorkOnThread( self, page_key ):
|
|
|
|
error_occurred = False
|
|
|
|
with self._lock:
|
|
|
|
p1 = self._check_now and HydrusData.TimeHasPassed( self._last_time_checked + self.MIN_CHECK_PERIOD )
|
|
p2 = self._times_to_check > 0 and HydrusData.TimeHasPassed( self._last_time_checked + self._check_period )
|
|
|
|
|
|
if p1 or p2:
|
|
|
|
with self._lock:
|
|
|
|
self._watcher_status = 'checking thread'
|
|
|
|
|
|
try:
|
|
|
|
json_url = ClientDownloading.GetImageboardThreadJSONURL( self._thread_url )
|
|
|
|
network_job = ClientNetworking.NetworkJobThreadWatcher( self._thread_key, 'GET', json_url )
|
|
|
|
network_job.OverrideBandwidth()
|
|
|
|
try:
|
|
|
|
HG.client_controller.network_engine.AddJob( network_job )
|
|
|
|
with self._lock:
|
|
|
|
if self._download_control_thread_set is not None:
|
|
|
|
wx.CallAfter( self._download_control_thread_set, network_job )
|
|
|
|
|
|
|
|
while not network_job.IsDone():
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
|
|
finally:
|
|
|
|
if self._download_control_thread_clear is not None:
|
|
|
|
wx.CallAfter( self._download_control_thread_clear )
|
|
|
|
|
|
|
|
if HG.view_shutdown:
|
|
|
|
raise HydrusExceptions.ShutdownException()
|
|
|
|
elif network_job.HasError():
|
|
|
|
e = network_job.GetErrorException()
|
|
|
|
raise e
|
|
|
|
elif network_job.IsCancelled():
|
|
|
|
raise Exception( 'Page download cancelled!' )
|
|
|
|
else:
|
|
|
|
raw_json = network_job.GetContent()
|
|
|
|
file_infos = ClientDownloading.ParseImageboardFileURLsFromJSON( self._thread_url, raw_json )
|
|
|
|
new_urls = []
|
|
|
|
for ( file_url, file_md5_base64, file_original_filename ) in file_infos:
|
|
|
|
if not self._urls_cache.HasSeed( file_url ):
|
|
|
|
new_urls.append( file_url )
|
|
|
|
self._urls_to_filenames[ file_url ] = file_original_filename
|
|
|
|
if file_md5_base64 is not None:
|
|
|
|
self._urls_to_md5_base64[ file_url ] = file_md5_base64
|
|
|
|
|
|
|
|
|
|
self._urls_cache.AddSeeds( new_urls )
|
|
|
|
num_new = len( new_urls )
|
|
|
|
watcher_status = 'thread checked OK - ' + HydrusData.ConvertIntToPrettyString( num_new ) + ' new urls'
|
|
|
|
|
|
except HydrusExceptions.NotFoundException:
|
|
|
|
error_occurred = True
|
|
|
|
watcher_status = 'thread 404'
|
|
|
|
with self._lock:
|
|
|
|
for i in range( self._times_to_check ):
|
|
|
|
HG.client_controller.pub( 'decrement_times_to_check', page_key )
|
|
|
|
|
|
self._times_to_check = 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
error_occurred = True
|
|
|
|
watcher_status = HydrusData.ToUnicode( e )
|
|
|
|
HydrusData.PrintException( e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
if self._check_now:
|
|
|
|
self._check_now = False
|
|
|
|
else:
|
|
|
|
self._times_to_check -= 1
|
|
|
|
HG.client_controller.pub( 'decrement_times_to_check', page_key )
|
|
|
|
|
|
self._last_time_checked = HydrusData.GetNow()
|
|
|
|
|
|
else:
|
|
|
|
with self._lock:
|
|
|
|
if self._check_now or self._times_to_check > 0:
|
|
|
|
if self._check_now:
|
|
|
|
delay = self.MIN_CHECK_PERIOD
|
|
|
|
else:
|
|
|
|
delay = self._check_period
|
|
|
|
|
|
watcher_status = 'checking again ' + HydrusData.ConvertTimestampToPrettyPending( self._last_time_checked + delay )
|
|
|
|
else:
|
|
|
|
watcher_status = 'checking finished'
|
|
|
|
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._watcher_status = watcher_status
|
|
|
|
|
|
if error_occurred:
|
|
|
|
time.sleep( 5 )
|
|
|
|
|
|
|
|
def _THREADWorkOnFiles( self, page_key ):
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
if self._thread_url == '':
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
did_work = self._WorkOnFiles( page_key )
|
|
|
|
if did_work:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
time.sleep( 1 )
|
|
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def _THREADWorkOnThread( self, page_key ):
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
if self._thread_url != '':
|
|
|
|
self._WorkOnThread( page_key )
|
|
|
|
|
|
time.sleep( 1 )
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def CheckNow( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._check_now = True
|
|
|
|
|
|
|
|
def CurrentlyWorking( self ):
|
|
|
|
with self._lock:
|
|
|
|
finished = not self._urls_cache.WorkToDo()
|
|
|
|
return not finished and not self._paused
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._urls_cache
|
|
|
|
|
|
def GetOptions( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._thread_url, self._import_file_options, self._import_tag_options, self._times_to_check, self._check_period )
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
finished = not self._urls_cache.WorkToDo()
|
|
|
|
return ( self._current_action, self._watcher_status, self._check_now, self._paused )
|
|
|
|
|
|
|
|
def HasThread( self ):
|
|
|
|
with self._lock:
|
|
|
|
return self._thread_url != ''
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def SetCheckPeriod( self, check_period ):
|
|
|
|
with self._lock:
|
|
|
|
self._check_period = max( self.MIN_CHECK_PERIOD, check_period )
|
|
|
|
|
|
|
|
def SetDownloadControlFile( self, download_control ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_control_file_set = download_control.SetNetworkJob
|
|
self._download_control_file_clear = download_control.ClearNetworkJob
|
|
|
|
|
|
|
|
def SetDownloadControlThread( self, download_control ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_control_thread_set = download_control.SetNetworkJob
|
|
self._download_control_thread_clear = download_control.ClearNetworkJob
|
|
|
|
|
|
|
|
def SetImportFileOptions( self, import_file_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_file_options = import_file_options
|
|
|
|
|
|
|
|
def SetImportTagOptions( self, import_tag_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_tag_options = import_tag_options
|
|
|
|
|
|
|
|
def SetThreadURL( self, thread_url ):
|
|
|
|
with self._lock:
|
|
|
|
self._thread_url = thread_url
|
|
|
|
|
|
|
|
def SetTimesToCheck( self, times_to_check ):
|
|
|
|
with self._lock:
|
|
|
|
self._times_to_check = times_to_check
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWorkOnThread, args = ( page_key, ) ).start()
|
|
threading.Thread( target = self._THREADWorkOnFiles, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_THREAD_WATCHER_IMPORT ] = ThreadWatcherImport
|
|
|
|
class URLsImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URLS_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptions()
|
|
|
|
self._urls_cache = SeedCache()
|
|
self._import_file_options = import_file_options
|
|
self._paused = False
|
|
|
|
self._seed_cache_status = ( 'initialising', ( 0, 1 ) )
|
|
self._download_control_file_set = None
|
|
self._download_control_file_clear = None
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = self._urls_cache.GetSerialisableTuple()
|
|
serialisable_file_options = self._import_file_options.GetSerialisableTuple()
|
|
|
|
return ( serialisable_url_cache, serialisable_file_options, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_url_cache, serialisable_file_options, self._paused ) = serialisable_info
|
|
|
|
self._urls_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
|
|
|
|
def _RegenerateSeedCacheStatus( self ):
|
|
|
|
new_seed_cache_status = self._urls_cache.GetStatus()
|
|
|
|
if self._seed_cache_status != new_seed_cache_status:
|
|
|
|
self._seed_cache_status = new_seed_cache_status
|
|
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
file_url = self._urls_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if file_url is None:
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
( status, hash ) = HG.client_controller.Read( 'url_status', file_url )
|
|
|
|
url_not_known_beforehand = status == CC.STATUS_NEW
|
|
|
|
if status == CC.STATUS_DELETED:
|
|
|
|
if not self._import_file_options.GetExcludeDeleted():
|
|
|
|
status = CC.STATUS_NEW
|
|
|
|
|
|
|
|
if status == CC.STATUS_NEW:
|
|
|
|
( os_file_handle, temp_path ) = HydrusPaths.GetTempPath()
|
|
|
|
try:
|
|
|
|
network_job = ClientNetworking.NetworkJob( 'GET', file_url, temp_path = temp_path )
|
|
|
|
try:
|
|
|
|
HG.client_controller.network_engine.AddJob( network_job )
|
|
|
|
with self._lock:
|
|
|
|
if self._download_control_file_set is not None:
|
|
|
|
wx.CallAfter( self._download_control_file_set, network_job )
|
|
|
|
|
|
|
|
while not network_job.IsDone():
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
|
|
finally:
|
|
|
|
if self._download_control_file_clear is not None:
|
|
|
|
wx.CallAfter( self._download_control_file_clear )
|
|
|
|
|
|
|
|
if HG.view_shutdown:
|
|
|
|
raise HydrusExceptions.ShutdownException()
|
|
|
|
elif network_job.HasError():
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = network_job.GetErrorText() )
|
|
|
|
time.sleep( 2 )
|
|
|
|
elif network_job.IsCancelled():
|
|
|
|
status = CC.STATUS_SKIPPED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = 'cancelled during download!' )
|
|
|
|
else:
|
|
|
|
file_import_job = FileImportJob( temp_path, self._import_file_options )
|
|
|
|
( status, hash ) = HG.client_controller.client_files_manager.ImportFile( file_import_job )
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
if url_not_known_beforehand and hash is not None:
|
|
|
|
service_keys_to_content_updates = { CC.COMBINED_LOCAL_FILE_SERVICE_KEY : [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_URLS, HC.CONTENT_UPDATE_ADD, ( hash, ( file_url, ) ) ) ] }
|
|
|
|
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
|
|
finally:
|
|
|
|
HydrusPaths.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
else:
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
( media_result, ) = HG.client_controller.Read( 'media_results', ( hash, ) )
|
|
|
|
HG.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
|
|
except HydrusExceptions.MimeException as e:
|
|
|
|
status = CC.STATUS_UNINTERESTING_MIME
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
except Exception as e:
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, exception = e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
return True
|
|
|
|
|
|
def _THREADWork( self, page_key ):
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
while not ( HG.view_shutdown or HG.client_controller.PageCompletelyDestroyed( page_key ) ):
|
|
|
|
if self._paused or HG.client_controller.PageClosedButNotDestroyed( page_key ):
|
|
|
|
time.sleep( 1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
did_work = self._WorkOnFiles( page_key )
|
|
|
|
if did_work:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
time.sleep( 1 )
|
|
|
|
|
|
HG.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except HydrusExceptions.ShutdownException:
|
|
|
|
return
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._urls_cache
|
|
|
|
|
|
def GetOptions( self ):
|
|
|
|
with self._lock:
|
|
|
|
return self._import_file_options
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._seed_cache_status, self._paused )
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def PendURLs( self, urls ):
|
|
|
|
with self._lock:
|
|
|
|
new_urls = [ url for url in urls if not self._urls_cache.HasSeed( url ) ]
|
|
|
|
self._urls_cache.AddSeeds( new_urls )
|
|
|
|
|
|
|
|
def SetDownloadControlFile( self, download_control ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_control_file_set = download_control.SetNetworkJob
|
|
self._download_control_file_clear = download_control.ClearNetworkJob
|
|
|
|
|
|
|
|
def SetImportFileOptions( self, import_file_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_file_options = import_file_options
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWork, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URLS_IMPORT ] = URLsImport
|