1787 lines
57 KiB
Python
1787 lines
57 KiB
Python
import bs4
|
|
import ClientConstants as CC
|
|
import ClientData
|
|
import ClientDefaults
|
|
import ClientDownloading
|
|
import ClientFiles
|
|
import collections
|
|
import HydrusConstants as HC
|
|
import HydrusData
|
|
import HydrusExceptions
|
|
import HydrusFileHandling
|
|
import HydrusGlobals
|
|
import HydrusSerialisable
|
|
import json
|
|
import os
|
|
import random
|
|
import shutil
|
|
import threading
|
|
import time
|
|
import traceback
|
|
import urlparse
|
|
import wx
|
|
import HydrusThreading
|
|
|
|
class HDDImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_HDD_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self, paths = None, import_file_options = None, paths_to_tags = None, delete_after_success = None ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
if paths is None:
|
|
|
|
self._paths_cache = None
|
|
|
|
else:
|
|
|
|
self._paths_cache = SeedCache()
|
|
|
|
for path in paths:
|
|
|
|
self._paths_cache.AddSeed( path )
|
|
|
|
|
|
|
|
self._import_file_options = import_file_options
|
|
self._paths_to_tags = paths_to_tags
|
|
self._delete_after_success = delete_after_success
|
|
self._paused = False
|
|
|
|
self._seed_cache_status = ( 'initialising', ( 0, 1 ) )
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = HydrusSerialisable.GetSerialisableTuple( self._paths_cache )
|
|
serialisable_options = HydrusSerialisable.GetSerialisableTuple( self._import_file_options )
|
|
serialisable_paths_to_tags = { path : { service_key.encode( 'hex' ) : tags for ( service_key, tags ) in service_keys_to_tags.items() } for ( path, service_keys_to_tags ) in self._paths_to_tags.items() }
|
|
|
|
return ( serialisable_url_cache, serialisable_options, serialisable_paths_to_tags, self._delete_after_success, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_url_cache, serialisable_options, serialisable_paths_to_tags, self._delete_after_success, self._paused ) = serialisable_info
|
|
|
|
self._paths_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_options )
|
|
self._paths_to_tags = { path : { service_key.decode( 'hex' ) : tags for ( service_key, tags ) in service_keys_to_tags.items() } for ( path, service_keys_to_tags ) in serialisable_paths_to_tags.items() }
|
|
|
|
|
|
def _RegenerateSeedCacheStatus( self ):
|
|
|
|
self._seed_cache_status = self._paths_cache.GetStatus()
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
path = self._paths_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
with self._lock:
|
|
|
|
if path is not None:
|
|
|
|
if path in self._paths_to_tags:
|
|
|
|
service_keys_to_tags = self._paths_to_tags[ path ]
|
|
|
|
else:
|
|
|
|
service_keys_to_tags = {}
|
|
|
|
|
|
|
|
|
|
if path is not None:
|
|
|
|
try:
|
|
|
|
( status, media_result ) = HydrusGlobals.client_controller.WriteSynchronous( 'import_file', path, import_file_options = self._import_file_options, service_keys_to_tags = service_keys_to_tags, generate_media_result = True )
|
|
|
|
self._paths_cache.UpdateSeedStatus( path, status )
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
HydrusGlobals.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
if self._delete_after_success:
|
|
|
|
try:
|
|
|
|
ClientData.DeletePath( path )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'While attempting to delete ' + path + ', the following error occured:' )
|
|
HydrusData.ShowException( e )
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
traceback.print_exc()
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
note = HydrusData.ToString( e )
|
|
|
|
self._paths_cache.UpdateSeedStatus( path, status, note = note )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
else:
|
|
|
|
time.sleep( 1 )
|
|
|
|
|
|
|
|
def _THREADWork( self, page_key ):
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
while not ( HydrusGlobals.view_shutdown or HydrusGlobals.client_controller.PageDeleted( page_key ) ):
|
|
|
|
if self._paused:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
self._WorkOnFiles( page_key )
|
|
|
|
HydrusGlobals.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._paths_cache
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._seed_cache_status, self._paused )
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def Pause( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = True
|
|
|
|
|
|
|
|
def Resume( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = False
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWork, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_HDD_IMPORT ] = HDDImport
|
|
|
|
class GalleryQuery( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_QUERY
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self, name ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
self._site_type = None
|
|
self._query_type = None
|
|
self._query = None
|
|
self._get_tags_if_redundant = False
|
|
self._file_limit = HC.options[ 'gallery_file_limit' ]
|
|
self._paused = False
|
|
self._page_index = 0
|
|
self._url_cache = None
|
|
self._options = {}
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = HydrusSerialisable.GetSerialisableTuple( self._url_cache )
|
|
|
|
serialisable_options = { name : HydrusSerialisable.GetSerialisableTuple( options ) for ( name, options ) in self._options.items() }
|
|
|
|
return ( self._site_type, self._query_type, self._query, self._get_tags_if_redundant, self._file_limit, serialisable_url_cache, serialisable_options )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._site_type, self._query_type, self._query, self._get_tags_if_redundant, serialisable_url_cache_tuple, serialisable_options_tuple ) = serialisable_info
|
|
|
|
self._url_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache_tuple )
|
|
|
|
self._options = { name : HydrusSerialisable.CreateFromSerialisableTuple( serialisable_suboptions_tuple ) for ( name, serialisable_suboptions_tuple ) in serialisable_options_tuple.items() }
|
|
|
|
|
|
def GetQuery( self ):
|
|
|
|
return self._query
|
|
|
|
|
|
def SetTuple( self, site_type, query_type, query, get_tags_if_redundant, file_limit, options ):
|
|
|
|
self._site_type = site_type
|
|
self._query_type = query_type
|
|
self._query = query
|
|
self._get_tags_if_redundant = get_tags_if_redundant
|
|
self._file_limit = file_limit
|
|
self._url_cache = SeedCache()
|
|
self._options = options
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_GALLERY_QUERY ] = GalleryQuery
|
|
|
|
class ImportFolder( HydrusSerialisable.SerialisableBaseNamed ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_IMPORT_FOLDER
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self, name, path = '', import_file_options = None, mimes = None, actions = None, action_locations = None, period = 3600, open_popup = True, tag = None ):
|
|
|
|
if mimes is None:
|
|
|
|
mimes = HC.ALLOWED_MIMES
|
|
|
|
|
|
if import_file_options is None:
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptionsObject()
|
|
|
|
|
|
if actions is None:
|
|
|
|
actions = {}
|
|
|
|
actions[ CC.STATUS_SUCCESSFUL ] = CC.IMPORT_FOLDER_DELETE
|
|
actions[ CC.STATUS_REDUNDANT ] = CC.IMPORT_FOLDER_DELETE
|
|
actions[ CC.STATUS_DELETED ] = CC.IMPORT_FOLDER_DELETE
|
|
actions[ CC.STATUS_FAILED ] = CC.IMPORT_FOLDER_IGNORE
|
|
|
|
|
|
if action_locations is None:
|
|
|
|
action_locations = {}
|
|
|
|
|
|
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
|
|
|
|
self._path = path
|
|
self._mimes = mimes
|
|
self._import_file_options = import_file_options
|
|
self._actions = actions
|
|
self._action_locations = action_locations
|
|
self._period = period
|
|
self._open_popup = open_popup
|
|
self._tag = tag
|
|
|
|
self._path_cache = SeedCache()
|
|
self._last_checked = 0
|
|
self._paused = False
|
|
|
|
|
|
def _ActionPaths( self ):
|
|
|
|
for status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT, CC.STATUS_DELETED, CC.STATUS_FAILED ):
|
|
|
|
action = self._actions[ status ]
|
|
|
|
if action == CC.IMPORT_FOLDER_DELETE:
|
|
|
|
while True:
|
|
|
|
path = self._path_cache.GetNextSeed( status )
|
|
|
|
if path is None or HydrusGlobals.view_shutdown:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
if os.path.exists( path ):
|
|
|
|
ClientData.DeletePath( path )
|
|
|
|
|
|
self._path_cache.RemoveSeed( path )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'Import folder tried to delete ' + path + ', but could not:' )
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
HydrusData.ShowText( 'Import folder has been paused.' )
|
|
|
|
self._paused = True
|
|
|
|
return
|
|
|
|
|
|
|
|
elif action == CC.IMPORT_FOLDER_MOVE:
|
|
|
|
while True:
|
|
|
|
path = self._path_cache.GetNextSeed( status )
|
|
|
|
if path is None or HydrusGlobals.view_shutdown:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
if os.path.exists( path ):
|
|
|
|
dest_dir = self._action_locations[ status ]
|
|
|
|
filename = os.path.basename( path )
|
|
|
|
dest_path = dest_dir + os.path.sep + filename
|
|
|
|
while os.path.exists( dest_path ):
|
|
|
|
dest_path += str( random.choice( range( 10 ) ) )
|
|
|
|
|
|
shutil.move( path, dest_path )
|
|
|
|
|
|
self._path_cache.RemoveSeed( path )
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowText( 'Import folder tried to move ' + path + ', but could not:' )
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
HydrusData.ShowText( 'Import folder has been paused.' )
|
|
|
|
self._paused = True
|
|
|
|
return
|
|
|
|
|
|
|
|
elif status == CC.IMPORT_FOLDER_IGNORE:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_import_file_options = HydrusSerialisable.GetSerialisableTuple( self._import_file_options )
|
|
serialisable_path_cache = HydrusSerialisable.GetSerialisableTuple( self._path_cache )
|
|
|
|
# json turns int dict keys to strings
|
|
action_pairs = self._actions.items()
|
|
action_location_pairs = self._action_locations.items()
|
|
|
|
return ( self._path, self._mimes, serialisable_import_file_options, action_pairs, action_location_pairs, self._period, self._open_popup, self._tag, serialisable_path_cache, self._last_checked, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._path, self._mimes, serialisable_import_file_options, action_pairs, action_location_pairs, self._period, self._open_popup, self._tag, serialisable_path_cache, self._last_checked, self._paused ) = serialisable_info
|
|
|
|
self._actions = dict( action_pairs )
|
|
self._action_locations = dict( action_location_pairs )
|
|
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_import_file_options )
|
|
self._path_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_path_cache )
|
|
|
|
|
|
def DoWork( self ):
|
|
|
|
if HydrusGlobals.view_shutdown:
|
|
|
|
return
|
|
|
|
|
|
if not self._paused and HydrusData.TimeHasPassed( self._last_checked + self._period ):
|
|
|
|
if os.path.exists( self._path ) and os.path.isdir( self._path ):
|
|
|
|
filenames = os.listdir( self._path )
|
|
|
|
raw_paths = [ self._path + os.path.sep + filename for filename in filenames ]
|
|
|
|
all_paths = ClientFiles.GetAllPaths( raw_paths )
|
|
|
|
for path in all_paths:
|
|
|
|
if not self._path_cache.HasSeed( path ):
|
|
|
|
self._path_cache.AddSeed( path )
|
|
|
|
|
|
|
|
successful_hashes = set()
|
|
|
|
while True:
|
|
|
|
path = self._path_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if path is None or HydrusGlobals.view_shutdown:
|
|
|
|
break
|
|
|
|
|
|
try:
|
|
|
|
mime = HydrusFileHandling.GetMime( path )
|
|
|
|
if mime in self._mimes:
|
|
|
|
service_keys_to_tags = {}
|
|
|
|
if self._tag is not None:
|
|
|
|
service_keys_to_tags[ CC.LOCAL_TAG_SERVICE_KEY ] = [ self._tag ]
|
|
|
|
|
|
( status, hash ) = HydrusGlobals.client_controller.WriteSynchronous( 'import_file', path, import_file_options = self._import_file_options, service_keys_to_tags = service_keys_to_tags )
|
|
|
|
self._path_cache.UpdateSeedStatus( path, status )
|
|
|
|
if status == CC.STATUS_SUCCESSFUL:
|
|
|
|
successful_hashes.add( hash )
|
|
|
|
|
|
else:
|
|
|
|
self._path_cache.UpdateSeedStatus( path, CC.STATUS_UNINTERESTING_MIME )
|
|
|
|
|
|
except Exception as e:
|
|
|
|
print( 'A file failed to import from import folder ' + self._name + ':' )
|
|
traceback.print_exc()
|
|
|
|
self._path_cache.UpdateSeedStatus( path, CC.STATUS_FAILED, note = str( e ) )
|
|
|
|
|
|
|
|
if self._open_popup and len( successful_hashes ) > 0:
|
|
|
|
job_key = HydrusThreading.JobKey()
|
|
|
|
job_key.SetVariable( 'popup_title', 'import folder - ' + self._name )
|
|
job_key.SetVariable( 'popup_files', successful_hashes )
|
|
|
|
HydrusGlobals.client_controller.pub( 'message', job_key )
|
|
|
|
|
|
self._ActionPaths()
|
|
|
|
|
|
self._last_checked = HydrusData.GetNow()
|
|
|
|
HydrusGlobals.client_controller.WriteSynchronous( 'import_folder', self )
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._path_cache
|
|
|
|
|
|
def ToListBoxTuple( self ):
|
|
|
|
return ( self._name, self._path, self._period, self._tag )
|
|
|
|
|
|
def ToTuple( self ):
|
|
|
|
return ( self._name, self._path, self._mimes, self._import_file_options, self._actions, self._action_locations, self._period, self._open_popup, self._tag, self._paused )
|
|
|
|
|
|
def SetTuple( self, name, path, mimes, import_file_options, actions, action_locations, period, open_popup, tag, paused ):
|
|
|
|
if path != self._path:
|
|
|
|
self._path_cache = SeedCache()
|
|
|
|
|
|
if set( mimes ) != set( self._mimes ):
|
|
|
|
self._path_cache.RemoveSeeds( CC.STATUS_UNINTERESTING_MIME )
|
|
|
|
|
|
self._name = name
|
|
self._path = path
|
|
self._mimes = mimes
|
|
self._import_file_options = import_file_options
|
|
self._actions = actions
|
|
self._action_locations = action_locations
|
|
self._period = period
|
|
self._open_popup = open_popup
|
|
self._tag = tag
|
|
self._paused = paused
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_IMPORT_FOLDER ] = ImportFolder
|
|
|
|
class PageOfImagesImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_PAGE_OF_IMAGES_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptionsObject()
|
|
|
|
self._pending_page_urls = []
|
|
self._urls_cache = SeedCache()
|
|
self._import_file_options = import_file_options
|
|
self._download_image_links = True
|
|
self._download_unlinked_images = False
|
|
|
|
self._file_download_hook = None
|
|
self._paused = False
|
|
|
|
self._parser_status = ''
|
|
self._seed_cache_status = ( 'initialising', ( 0, 1 ) )
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = HydrusSerialisable.GetSerialisableTuple( self._urls_cache )
|
|
serialisable_file_options = HydrusSerialisable.GetSerialisableTuple( self._import_file_options )
|
|
|
|
return ( self._pending_page_urls, serialisable_url_cache, serialisable_file_options, self._download_image_links, self._download_unlinked_images, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._pending_page_urls, serialisable_url_cache, serialisable_file_options, self._download_image_links, self._download_unlinked_images, self._paused ) = serialisable_info
|
|
|
|
self._urls_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
|
|
|
|
def _RegenerateSeedCacheStatus( self ):
|
|
|
|
self._seed_cache_status = self._urls_cache.GetStatus()
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
do_wait = False
|
|
|
|
file_url = self._urls_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if file_url is None:
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
( status, hash ) = HydrusGlobals.client_controller.Read( 'url_status', file_url )
|
|
|
|
if status == CC.STATUS_REDUNDANT:
|
|
|
|
( media_result, ) = HydrusGlobals.client_controller.Read( 'media_results', CC.LOCAL_FILE_SERVICE_KEY, ( hash, ) )
|
|
|
|
elif status == CC.STATUS_NEW:
|
|
|
|
( os_file_handle, temp_path ) = HydrusFileHandling.GetTempPath()
|
|
|
|
try:
|
|
|
|
report_hooks = []
|
|
|
|
with self._lock:
|
|
|
|
if self._file_download_hook is not None:
|
|
|
|
report_hooks.append( self._file_download_hook )
|
|
|
|
|
|
|
|
do_wait = True
|
|
|
|
HydrusGlobals.client_controller.DoHTTP( HC.GET, file_url, report_hooks = report_hooks, temp_path = temp_path )
|
|
|
|
( status, media_result ) = HydrusGlobals.client_controller.WriteSynchronous( 'import_file', temp_path, import_file_options = self._import_file_options, generate_media_result = True, url = file_url )
|
|
|
|
finally:
|
|
|
|
HydrusFileHandling.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
HydrusGlobals.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
|
|
except Exception as e:
|
|
|
|
traceback.print_exc()
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
note = HydrusData.ToString( e )
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = note )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
if do_wait:
|
|
|
|
time.sleep( HC.options[ 'website_download_polite_wait' ] )
|
|
|
|
|
|
|
|
def _WorkOnQueue( self, page_key ):
|
|
|
|
do_wait = False
|
|
|
|
file_url = self._urls_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if file_url is not None:
|
|
|
|
return
|
|
|
|
|
|
if len( self._pending_page_urls ) > 0:
|
|
|
|
with self._lock:
|
|
|
|
page_url = self._pending_page_urls.pop( 0 )
|
|
|
|
self._parser_status = 'checking ' + page_url
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
error_occurred = False
|
|
|
|
try:
|
|
|
|
do_wait = True
|
|
|
|
html = HydrusGlobals.client_controller.DoHTTP( HC.GET, page_url )
|
|
|
|
soup = bs4.BeautifulSoup( html )
|
|
|
|
#
|
|
|
|
all_links = soup.find_all( 'a' )
|
|
|
|
links_with_images = [ link for link in all_links if len( link.find_all( 'img' ) ) > 0 ]
|
|
|
|
all_linked_images = []
|
|
|
|
for link in all_links:
|
|
|
|
images = link.find_all( 'img' )
|
|
|
|
all_linked_images.extend( images )
|
|
|
|
|
|
all_images = soup.find_all( 'img' )
|
|
|
|
unlinked_images = [ image for image in all_images if image not in all_linked_images ]
|
|
|
|
#
|
|
|
|
file_urls = []
|
|
|
|
if self._download_image_links:
|
|
|
|
file_urls.extend( [ urlparse.urljoin( page_url, link[ 'href' ] ) for link in links_with_images ] )
|
|
|
|
|
|
if self._download_unlinked_images:
|
|
|
|
file_urls.extend( [ urlparse.urljoin( page_url, image[ 'src' ] ) for image in unlinked_images ] )
|
|
|
|
|
|
num_new = 0
|
|
|
|
for file_url in file_urls:
|
|
|
|
if not self._urls_cache.HasSeed( file_url ):
|
|
|
|
num_new += 1
|
|
|
|
self._urls_cache.AddSeed( file_url )
|
|
|
|
|
|
|
|
parser_status = 'page checked OK - ' + HydrusData.ConvertIntToPrettyString( num_new ) + ' new files'
|
|
|
|
except HydrusExceptions.NotFoundException:
|
|
|
|
error_occurred = True
|
|
|
|
parser_status = 'page 404'
|
|
|
|
except Exception as e:
|
|
|
|
error_occurred = True
|
|
|
|
parser_status = HydrusData.ToString( e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._parser_status = parser_status
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
if error_occurred:
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
time.sleep( 5 )
|
|
|
|
|
|
else:
|
|
|
|
with self._lock:
|
|
|
|
self._parser_status = ''
|
|
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
if do_wait:
|
|
|
|
time.sleep( HC.options[ 'website_download_polite_wait' ] )
|
|
|
|
|
|
|
|
def _THREADWork( self, page_key ):
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
while not ( HydrusGlobals.view_shutdown or HydrusGlobals.client_controller.PageDeleted( page_key ) ):
|
|
|
|
if self._paused:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
self._WorkOnQueue( page_key )
|
|
|
|
self._WorkOnFiles( page_key )
|
|
|
|
time.sleep( 1 )
|
|
|
|
HydrusGlobals.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def AdvancePageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url in self._pending_page_urls:
|
|
|
|
index = self._pending_page_urls.index( page_url )
|
|
|
|
if index - 1 >= 0:
|
|
|
|
self._pending_page_urls.remove( page_url )
|
|
|
|
self._pending_page_urls.insert( index - 1, page_url )
|
|
|
|
|
|
|
|
|
|
|
|
def DelayPageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url in self._pending_page_urls:
|
|
|
|
index = self._pending_page_urls.index( page_url )
|
|
|
|
if index + 1 < len( self._pending_page_urls ):
|
|
|
|
self._pending_page_urls.remove( page_url )
|
|
|
|
self._pending_page_urls.insert( index + 1, page_url )
|
|
|
|
|
|
|
|
|
|
|
|
def DeletePageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url in self._pending_page_urls:
|
|
|
|
self._pending_page_urls.remove( page_url )
|
|
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._urls_cache
|
|
|
|
|
|
def GetOptions( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._import_file_options, self._download_image_links, self._download_unlinked_images )
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( list( self._pending_page_urls ), self._parser_status, self._seed_cache_status, self._paused )
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def Pause( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = True
|
|
|
|
|
|
|
|
def PendPageURL( self, page_url ):
|
|
|
|
with self._lock:
|
|
|
|
if page_url not in self._pending_page_urls:
|
|
|
|
self._pending_page_urls.append( page_url )
|
|
|
|
|
|
|
|
|
|
def Resume( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = False
|
|
|
|
|
|
|
|
def SetDownloadHook( self, hook ):
|
|
|
|
with self._lock:
|
|
|
|
self._file_download_hook = hook
|
|
|
|
|
|
|
|
def SetDownloadImageLinks( self, value ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_image_links = value
|
|
|
|
|
|
|
|
def SetDownloadUnlinkedImages( self, value ):
|
|
|
|
with self._lock:
|
|
|
|
self._download_unlinked_images = value
|
|
|
|
|
|
|
|
def SetImportFileOptions( self, import_file_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_file_options = import_file_options
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWork, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_PAGE_OF_IMAGES_IMPORT ] = PageOfImagesImport
|
|
|
|
class Subscription( HydrusSerialisable.SerialisableBaseNamed ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_SUBSCRIPTION
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self, name ):
|
|
|
|
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
|
|
|
|
self._site_type = None
|
|
self._query_type = None
|
|
self._query = None
|
|
self._get_tags_if_redundant = False
|
|
self._file_limit = HC.options[ 'gallery_file_limit' ]
|
|
self._periodic = None
|
|
self._page_index = 0
|
|
self._url_cache = None
|
|
self._options = {}
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
return ( HydrusSerialisable.GetSerialisableTuple( self._gallery_query ), HydrusSerialisable.GetSerialisableTuple( self._periodic ) )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialised_gallery_query_tuple, serialised_periodic_tuple ) = serialisable_info
|
|
|
|
self._gallery_query = HydrusSerialisable.CreateFromSerialisableTuple( serialised_gallery_query_tuple )
|
|
|
|
self._periodic = HydrusSerialisable.CreateFromSerialisableTuple( serialised_periodic_tuple )
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_SUBSCRIPTION ] = Subscription
|
|
|
|
class SeedCache( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_SEED_CACHE
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
self._seeds_ordered = []
|
|
self._seeds_to_info = {}
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSeedTuple( self, seed ):
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
status = seed_info[ 'status' ]
|
|
added_timestamp = seed_info[ 'added_timestamp' ]
|
|
last_modified_timestamp = seed_info[ 'last_modified_timestamp' ]
|
|
note = seed_info[ 'note' ]
|
|
|
|
return ( seed, status, added_timestamp, last_modified_timestamp, note )
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
with self._lock:
|
|
|
|
serialisable_info = []
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
serialisable_info.append( ( seed, seed_info ) )
|
|
|
|
|
|
return serialisable_info
|
|
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
with self._lock:
|
|
|
|
for ( seed, seed_info ) in serialisable_info:
|
|
|
|
self._seeds_ordered.append( seed )
|
|
|
|
self._seeds_to_info[ seed ] = seed_info
|
|
|
|
|
|
|
|
|
|
def AddSeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
|
|
self._seeds_ordered.append( seed )
|
|
|
|
now = HydrusData.GetNow()
|
|
|
|
seed_info = {}
|
|
|
|
seed_info[ 'status' ] = CC.STATUS_UNKNOWN
|
|
seed_info[ 'added_timestamp' ] = now
|
|
seed_info[ 'last_modified_timestamp' ] = now
|
|
seed_info[ 'note' ] = ''
|
|
|
|
self._seeds_to_info[ seed ] = seed_info
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'seed_cache_seed_updated', seed )
|
|
|
|
|
|
def AdvanceSeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
index = self._seeds_ordered.index( seed )
|
|
|
|
if index > 0:
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
self._seeds_ordered.insert( index - 1, seed )
|
|
|
|
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'seed_cache_seed_updated', seed )
|
|
|
|
|
|
def DelaySeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
index = self._seeds_ordered.index( seed )
|
|
|
|
if index < len( self._seeds_ordered ) - 1:
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
self._seeds_ordered.insert( index + 1, seed )
|
|
|
|
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'seed_cache_seed_updated', seed )
|
|
|
|
|
|
def GetNextSeed( self, status ):
|
|
|
|
with self._lock:
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
if seed_info[ 'status' ] == status:
|
|
|
|
return seed
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
def GetSeeds( self ):
|
|
|
|
with self._lock:
|
|
|
|
return list( self._seeds_ordered )
|
|
|
|
|
|
|
|
def GetSeedsWithInfo( self ):
|
|
|
|
with self._lock:
|
|
|
|
all_info = []
|
|
|
|
for seed in self._seeds_ordered:
|
|
|
|
seed_tuple = self._GetSeedTuple( seed )
|
|
|
|
all_info.append( seed_tuple )
|
|
|
|
|
|
return all_info
|
|
|
|
|
|
|
|
def GetSeedInfo( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
return self._GetSeedTuple( seed )
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
statuses_to_counts = collections.Counter()
|
|
|
|
for seed_info in self._seeds_to_info.values():
|
|
|
|
statuses_to_counts[ seed_info[ 'status' ] ] += 1
|
|
|
|
|
|
num_successful = statuses_to_counts[ CC.STATUS_SUCCESSFUL ]
|
|
num_failed = statuses_to_counts[ CC.STATUS_FAILED ]
|
|
num_deleted = statuses_to_counts[ CC.STATUS_DELETED ]
|
|
num_redundant = statuses_to_counts[ CC.STATUS_REDUNDANT ]
|
|
num_unknown = statuses_to_counts[ CC.STATUS_UNKNOWN ]
|
|
|
|
status_strings = []
|
|
|
|
if num_successful > 0: status_strings.append( HydrusData.ToString( num_successful ) + ' successful' )
|
|
if num_failed > 0: status_strings.append( HydrusData.ToString( num_failed ) + ' failed' )
|
|
if num_deleted > 0: status_strings.append( HydrusData.ToString( num_deleted ) + ' already deleted' )
|
|
if num_redundant > 0: status_strings.append( HydrusData.ToString( num_redundant ) + ' already in db' )
|
|
|
|
status = ', '.join( status_strings )
|
|
|
|
total_processed = len( self._seeds_ordered ) - num_unknown
|
|
total = len( self._seeds_ordered )
|
|
|
|
return ( status, ( total_processed, total ) )
|
|
|
|
|
|
|
|
def HasSeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
return seed in self._seeds_to_info
|
|
|
|
|
|
|
|
def RemoveSeed( self, seed ):
|
|
|
|
with self._lock:
|
|
|
|
if seed in self._seeds_to_info:
|
|
|
|
del self._seeds_to_info[ seed ]
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'seed_cache_seed_updated', seed )
|
|
|
|
|
|
def RemoveSeeds( self, status ):
|
|
|
|
with self._lock:
|
|
|
|
seeds_to_delete = set()
|
|
|
|
for ( seed, seed_info ) in self._seeds_to_info.items():
|
|
|
|
if seed_info[ 'status' ] == status:
|
|
|
|
seeds_to_delete.add( seed )
|
|
|
|
|
|
|
|
for seed in seeds_to_delete:
|
|
|
|
del self._seeds_to_info[ seed ]
|
|
|
|
self._seeds_ordered.remove( seed )
|
|
|
|
|
|
|
|
for seed in seeds_to_delete:
|
|
|
|
HydrusGlobals.client_controller.pub( 'seed_cache_seed_updated', seed )
|
|
|
|
|
|
|
|
def UpdateSeedStatus( self, seed, status, note = '' ):
|
|
|
|
with self._lock:
|
|
|
|
seed_info = self._seeds_to_info[ seed ]
|
|
|
|
seed_info[ 'status' ] = status
|
|
seed_info[ 'last_modified_timestamp' ] = HydrusData.GetNow()
|
|
seed_info[ 'note' ] = note
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'seed_cache_seed_updated', seed )
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_SEED_CACHE ] = SeedCache
|
|
|
|
class ThreadWatcherImport( HydrusSerialisable.SerialisableBase ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_THREAD_WATCHER_IMPORT
|
|
SERIALISABLE_VERSION = 1
|
|
|
|
MIN_CHECK_PERIOD = 30
|
|
|
|
def __init__( self ):
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
|
|
import_file_options = ClientDefaults.GetDefaultImportFileOptionsObject()
|
|
|
|
( times_to_check, check_period ) = HC.options[ 'thread_checker_timings' ]
|
|
|
|
self._thread_url = ''
|
|
self._urls_cache = SeedCache()
|
|
self._urls_to_filenames = {}
|
|
self._urls_to_md5_base64 = {}
|
|
self._import_file_options = import_file_options
|
|
self._import_tag_options = ClientData.ImportTagOptions()
|
|
self._times_to_check = times_to_check
|
|
self._check_period = check_period
|
|
self._last_time_checked = 0
|
|
|
|
self._file_download_hook = None
|
|
self._check_now = False
|
|
self._paused = False
|
|
|
|
self._watcher_status = 'ready to start'
|
|
self._seed_cache_status = ( 'initialising', ( 0, 1 ) )
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_url_cache = HydrusSerialisable.GetSerialisableTuple( self._urls_cache )
|
|
serialisable_file_options = HydrusSerialisable.GetSerialisableTuple( self._import_file_options )
|
|
serialisable_tag_options = HydrusSerialisable.GetSerialisableTuple( self._import_tag_options )
|
|
|
|
return ( self._thread_url, serialisable_url_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_file_options, serialisable_tag_options, self._times_to_check, self._check_period, self._last_time_checked, self._paused )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( self._thread_url, serialisable_url_cache, self._urls_to_filenames, self._urls_to_md5_base64, serialisable_file_options, serialisable_tag_options, self._times_to_check, self._check_period, self._last_time_checked, self._paused ) = serialisable_info
|
|
|
|
self._urls_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_url_cache )
|
|
self._import_file_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_options )
|
|
self._import_tag_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_options )
|
|
|
|
|
|
def _RegenerateSeedCacheStatus( self ):
|
|
|
|
self._seed_cache_status = self._urls_cache.GetStatus()
|
|
|
|
|
|
def _WorkOnFiles( self, page_key ):
|
|
|
|
do_wait = False
|
|
|
|
file_url = self._urls_cache.GetNextSeed( CC.STATUS_UNKNOWN )
|
|
|
|
if file_url is None:
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
file_original_filename = self._urls_to_filenames[ file_url ]
|
|
|
|
tags = [ 'filename:' + file_original_filename ]
|
|
|
|
with self._lock:
|
|
|
|
service_keys_to_tags = self._import_tag_options.GetServiceKeysToTags( tags )
|
|
|
|
|
|
file_md5_base64 = self._urls_to_md5_base64[ file_url ]
|
|
|
|
file_md5 = file_md5_base64.decode( 'base64' )
|
|
|
|
( status, hash ) = HydrusGlobals.client_controller.Read( 'md5_status', file_md5 )
|
|
|
|
if status == CC.STATUS_REDUNDANT:
|
|
|
|
if len( service_keys_to_tags ) > 0:
|
|
|
|
service_keys_to_content_updates = ClientDownloading.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( hash, service_keys_to_tags )
|
|
|
|
HydrusGlobals.client_controller.Write( 'content_updates', service_keys_to_content_updates )
|
|
|
|
|
|
( media_result, ) = HydrusGlobals.client_controller.Read( 'media_results', CC.LOCAL_FILE_SERVICE_KEY, ( hash, ) )
|
|
|
|
elif status == CC.STATUS_NEW:
|
|
|
|
( os_file_handle, temp_path ) = HydrusFileHandling.GetTempPath()
|
|
|
|
try:
|
|
|
|
report_hooks = []
|
|
|
|
with self._lock:
|
|
|
|
if self._file_download_hook is not None:
|
|
|
|
report_hooks.append( self._file_download_hook )
|
|
|
|
|
|
|
|
do_wait = True
|
|
|
|
HydrusGlobals.client_controller.DoHTTP( HC.GET, file_url, report_hooks = report_hooks, temp_path = temp_path )
|
|
|
|
( status, media_result ) = HydrusGlobals.client_controller.WriteSynchronous( 'import_file', temp_path, import_file_options = self._import_file_options, service_keys_to_tags = service_keys_to_tags, generate_media_result = True, url = file_url )
|
|
|
|
finally:
|
|
|
|
HydrusFileHandling.CleanUpTempPath( os_file_handle, temp_path )
|
|
|
|
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status )
|
|
|
|
if status in ( CC.STATUS_SUCCESSFUL, CC.STATUS_REDUNDANT ):
|
|
|
|
HydrusGlobals.client_controller.pub( 'add_media_results', page_key, ( media_result, ) )
|
|
|
|
|
|
except Exception as e:
|
|
|
|
traceback.print_exc()
|
|
|
|
status = CC.STATUS_FAILED
|
|
|
|
note = HydrusData.ToString( e )
|
|
|
|
self._urls_cache.UpdateSeedStatus( file_url, status, note = note )
|
|
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
if do_wait:
|
|
|
|
time.sleep( HC.options[ 'website_download_polite_wait' ] )
|
|
|
|
|
|
|
|
def _WorkOnThread( self, page_key ):
|
|
|
|
do_wait = False
|
|
|
|
with self._lock:
|
|
|
|
p1 = self._check_now and HydrusData.TimeHasPassed( self._last_time_checked + self.MIN_CHECK_PERIOD )
|
|
p2 = self._times_to_check > 0 and HydrusData.TimeHasPassed( self._last_time_checked + self._check_period )
|
|
|
|
|
|
if p1 or p2:
|
|
|
|
with self._lock:
|
|
|
|
self._watcher_status = 'checking thread'
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
error_occurred = False
|
|
|
|
try:
|
|
|
|
( json_url, file_base ) = ClientDownloading.GetImageboardThreadURLs( self._thread_url )
|
|
|
|
do_wait = True
|
|
|
|
raw_json = HydrusGlobals.client_controller.DoHTTP( HC.GET, json_url )
|
|
|
|
json_dict = json.loads( raw_json )
|
|
|
|
posts_list = json_dict[ 'posts' ]
|
|
|
|
file_infos = []
|
|
|
|
for post in posts_list:
|
|
|
|
if 'md5' not in post:
|
|
|
|
continue
|
|
|
|
|
|
file_url = file_base + HydrusData.ToString( post[ 'tim' ] ) + post[ 'ext' ]
|
|
file_md5_base64 = post[ 'md5' ]
|
|
file_original_filename = post[ 'filename' ] + post[ 'ext' ]
|
|
|
|
file_infos.append( ( file_url, file_md5_base64, file_original_filename ) )
|
|
|
|
if 'extra_files' in post:
|
|
|
|
for extra_file in post[ 'extra_files' ]:
|
|
|
|
if 'md5' not in extra_file:
|
|
|
|
continue
|
|
|
|
|
|
file_url = file_base + HydrusData.ToString( extra_file[ 'tim' ] ) + extra_file[ 'ext' ]
|
|
file_md5_base64 = extra_file[ 'md5' ]
|
|
file_original_filename = extra_file[ 'filename' ] + extra_file[ 'ext' ]
|
|
|
|
file_infos.append( ( file_url, file_md5_base64, file_original_filename ) )
|
|
|
|
|
|
|
|
|
|
num_new = 0
|
|
|
|
for ( file_url, file_md5_base64, file_original_filename ) in file_infos:
|
|
|
|
if not self._urls_cache.HasSeed( file_url ):
|
|
|
|
num_new += 1
|
|
|
|
self._urls_cache.AddSeed( file_url )
|
|
|
|
self._urls_to_filenames[ file_url ] = file_original_filename
|
|
self._urls_to_md5_base64[ file_url ] = file_md5_base64
|
|
|
|
|
|
|
|
watcher_status = 'thread checked OK - ' + HydrusData.ConvertIntToPrettyString( num_new ) + ' new files'
|
|
|
|
except HydrusExceptions.NotFoundException:
|
|
|
|
error_occurred = True
|
|
|
|
watcher_status = 'thread 404'
|
|
|
|
with self._lock:
|
|
|
|
for i in range( self._times_to_check ):
|
|
|
|
HydrusGlobals.client_controller.pub( 'decrement_times_to_check', page_key )
|
|
|
|
|
|
self._times_to_check = 0
|
|
|
|
|
|
except Exception as e:
|
|
|
|
error_occurred = True
|
|
|
|
watcher_status = HydrusData.ToString( e )
|
|
|
|
|
|
with self._lock:
|
|
|
|
if self._check_now:
|
|
|
|
self._check_now = False
|
|
|
|
else:
|
|
|
|
self._times_to_check -= 1
|
|
|
|
HydrusGlobals.client_controller.pub( 'decrement_times_to_check', page_key )
|
|
|
|
|
|
self._last_time_checked = HydrusData.GetNow()
|
|
self._watcher_status = watcher_status
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
if error_occurred:
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
time.sleep( 5 )
|
|
|
|
|
|
else:
|
|
|
|
with self._lock:
|
|
|
|
if self._check_now or self._times_to_check > 0:
|
|
|
|
if self._check_now:
|
|
|
|
delay = self.MIN_CHECK_PERIOD
|
|
|
|
else:
|
|
|
|
delay = self._check_period
|
|
|
|
|
|
self._watcher_status = 'checking again in ' + HydrusData.ConvertTimestampToPrettyPending( self._last_time_checked + delay ) + ' seconds'
|
|
|
|
else:
|
|
|
|
self._watcher_status = 'checking finished'
|
|
|
|
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
if do_wait:
|
|
|
|
time.sleep( HC.options[ 'website_download_polite_wait' ] )
|
|
|
|
|
|
|
|
def _THREADWork( self, page_key ):
|
|
|
|
with self._lock:
|
|
|
|
self._RegenerateSeedCacheStatus()
|
|
|
|
|
|
HydrusGlobals.client_controller.pub( 'update_status', page_key )
|
|
|
|
while not ( HydrusGlobals.view_shutdown or HydrusGlobals.client_controller.PageDeleted( page_key ) ):
|
|
|
|
if self._paused:
|
|
|
|
time.sleep( 0.1 )
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
if self._thread_url != '':
|
|
|
|
self._WorkOnThread( page_key )
|
|
|
|
self._WorkOnFiles( page_key )
|
|
|
|
|
|
time.sleep( 1 )
|
|
|
|
HydrusGlobals.client_controller.WaitUntilPubSubsEmpty()
|
|
|
|
except Exception as e:
|
|
|
|
HydrusData.ShowException( e )
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def CheckNow( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._check_now = True
|
|
|
|
|
|
|
|
def GetSeedCache( self ):
|
|
|
|
return self._urls_cache
|
|
|
|
|
|
def GetOptions( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._thread_url, self._import_file_options, self._import_tag_options, self._times_to_check, self._check_period )
|
|
|
|
|
|
|
|
def GetStatus( self ):
|
|
|
|
with self._lock:
|
|
|
|
return ( self._watcher_status, self._seed_cache_status, self._check_now, self._paused )
|
|
|
|
|
|
|
|
def HasThread( self ):
|
|
|
|
with self._lock:
|
|
|
|
return self._thread_url != ''
|
|
|
|
|
|
|
|
def PausePlay( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = not self._paused
|
|
|
|
|
|
|
|
def Pause( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = True
|
|
|
|
|
|
|
|
def Resume( self ):
|
|
|
|
with self._lock:
|
|
|
|
self._paused = False
|
|
|
|
|
|
|
|
def SetCheckPeriod( self, check_period ):
|
|
|
|
with self._lock:
|
|
|
|
self._check_period = max( self.MIN_CHECK_PERIOD, check_period )
|
|
|
|
|
|
|
|
def SetDownloadHook( self, hook ):
|
|
|
|
with self._lock:
|
|
|
|
self._file_download_hook = hook
|
|
|
|
|
|
|
|
def SetImportFileOptions( self, import_file_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_file_options = import_file_options
|
|
|
|
|
|
|
|
def SetImportTagOptions( self, import_tag_options ):
|
|
|
|
with self._lock:
|
|
|
|
self._import_tag_options = import_tag_options
|
|
|
|
|
|
|
|
def SetThreadURL( self, thread_url ):
|
|
|
|
with self._lock:
|
|
|
|
self._thread_url = thread_url
|
|
|
|
|
|
|
|
def SetTimesToCheck( self, times_to_check ):
|
|
|
|
with self._lock:
|
|
|
|
self._times_to_check = times_to_check
|
|
|
|
|
|
|
|
def Start( self, page_key ):
|
|
|
|
threading.Thread( target = self._THREADWork, args = ( page_key, ) ).start()
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_THREAD_WATCHER_IMPORT ] = ThreadWatcherImport
|