hydrus/hydrus/client/importing/ClientImportLocal.py

1189 lines
48 KiB
Python
Raw Normal View History

2022-10-26 20:43:00 +00:00
import collections
2020-05-20 21:36:02 +00:00
import os
import threading
import time
2022-10-26 20:43:00 +00:00
import typing
2020-05-20 21:36:02 +00:00
2020-07-29 20:52:44 +00:00
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
2022-06-22 20:43:12 +00:00
from hydrus.core import HydrusExceptions
2020-07-29 20:52:44 +00:00
from hydrus.core import HydrusFileHandling
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusThreading
2020-04-22 21:00:35 +00:00
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientData
from hydrus.client import ClientFiles
from hydrus.client import ClientPaths
2022-08-03 20:59:51 +00:00
from hydrus.client import ClientSearch
2020-04-22 21:00:35 +00:00
from hydrus.client import ClientThreading
2022-06-22 20:43:12 +00:00
from hydrus.client.importing import ClientImportControl
2020-05-20 21:36:02 +00:00
from hydrus.client.importing import ClientImporting
from hydrus.client.importing import ClientImportFileSeeds
2021-09-22 21:12:34 +00:00
from hydrus.client.importing.options import FileImportOptions
2021-06-30 21:27:35 +00:00
from hydrus.client.importing.options import TagImportOptions
2022-10-26 20:43:00 +00:00
from hydrus.client.metadata import ClientMetadataMigration
from hydrus.client.metadata import ClientMetadataMigrationExporters
from hydrus.client.metadata import ClientMetadataMigrationImporters
from hydrus.client.metadata import ClientTags
2018-08-01 20:44:57 +00:00
class HDDImport( HydrusSerialisable.SerialisableBase ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_HDD_IMPORT
SERIALISABLE_NAME = 'Local File Import'
2022-10-26 20:43:00 +00:00
SERIALISABLE_VERSION = 3
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
def __init__( self, paths = None, file_import_options = None, metadata_routers = None, paths_to_additional_service_keys_to_tags = None, delete_after_success = None ):
2018-08-01 20:44:57 +00:00
HydrusSerialisable.SerialisableBase.__init__( self )
2022-10-26 20:43:00 +00:00
if metadata_routers is None:
metadata_routers = []
if paths_to_additional_service_keys_to_tags is None:
paths_to_additional_service_keys_to_tags = collections.defaultdict( ClientTags.ServiceKeysToTags )
if delete_after_success is None:
delete_after_success = False
2018-08-01 20:44:57 +00:00
if paths is None:
self._file_seed_cache = None
else:
self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
file_seeds = []
for path in paths:
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_HDD, path )
try:
2019-09-25 21:34:18 +00:00
file_modified_time = HydrusFileHandling.GetFileModifiedTimestamp( path )
2018-08-01 20:44:57 +00:00
2019-09-25 21:34:18 +00:00
file_seed.source_time = file_modified_time
2018-08-01 20:44:57 +00:00
except:
pass
2020-09-16 20:46:54 +00:00
if path in paths_to_additional_service_keys_to_tags:
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
file_seed.SetExternalAdditionalServiceKeysToTags( paths_to_additional_service_keys_to_tags[ path ] )
2019-02-27 23:03:30 +00:00
2018-08-01 20:44:57 +00:00
file_seeds.append( file_seed )
self._file_seed_cache.AddFileSeeds( file_seeds )
2022-10-26 20:43:00 +00:00
self._metadata_routers = HydrusSerialisable.SerialisableList( metadata_routers )
2018-08-01 20:44:57 +00:00
self._file_import_options = file_import_options
self._delete_after_success = delete_after_success
2022-06-22 20:43:12 +00:00
self._page_key = b'initialising page key'
self._files_status = ''
2018-08-01 20:44:57 +00:00
self._paused = False
self._lock = threading.Lock()
self._files_repeating_job = None
2021-09-22 21:12:34 +00:00
self._last_serialisable_change_timestamp = 0
2018-08-01 20:44:57 +00:00
HG.client_controller.sub( self, 'NotifyFileSeedsUpdated', 'file_seed_cache_file_seeds_updated' )
def _GetSerialisableInfo( self ):
serialisable_file_seed_cache = self._file_seed_cache.GetSerialisableTuple()
serialisable_options = self._file_import_options.GetSerialisableTuple()
2022-10-26 20:43:00 +00:00
serialisable_metadata_routers = self._metadata_routers.GetSerialisableTuple()
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
return ( serialisable_file_seed_cache, serialisable_options, serialisable_metadata_routers, self._delete_after_success, self._paused )
2018-08-01 20:44:57 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2022-10-26 20:43:00 +00:00
( serialisable_file_seed_cache, serialisable_options, serialisable_metadata_routers, self._delete_after_success, self._paused ) = serialisable_info
2018-08-01 20:44:57 +00:00
self._file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache )
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_options )
2022-10-26 20:43:00 +00:00
self._metadata_routers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_metadata_routers )
2019-02-27 23:03:30 +00:00
2021-09-22 21:12:34 +00:00
def _SerialisableChangeMade( self ):
self._last_serialisable_change_timestamp = HydrusData.GetNow()
2019-02-27 23:03:30 +00:00
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( serialisable_file_seed_cache, serialisable_options, serialisable_paths_to_tags, delete_after_success, paused ) = old_serialisable_info
file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache )
2020-09-16 20:46:54 +00:00
paths_to_additional_service_keys_to_tags = { path : { bytes.fromhex( service_key ) : tags for ( service_key, tags ) in service_keys_to_tags.items() } for ( path, service_keys_to_tags ) in serialisable_paths_to_tags.items() }
2019-02-27 23:03:30 +00:00
for file_seed in file_seed_cache.GetFileSeeds():
path = file_seed.file_seed_data
2020-09-16 20:46:54 +00:00
if path in paths_to_additional_service_keys_to_tags:
2019-02-27 23:03:30 +00:00
2020-09-16 20:46:54 +00:00
file_seed.SetExternalAdditionalServiceKeysToTags( paths_to_additional_service_keys_to_tags[ path ] )
2019-02-27 23:03:30 +00:00
serialisable_file_seed_cache = file_seed_cache.GetSerialisableTuple()
new_serialisable_info = ( serialisable_file_seed_cache, serialisable_options, delete_after_success, paused )
return ( 2, new_serialisable_info )
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
if version == 2:
( serialisable_file_seed_cache, serialisable_options, delete_after_success, paused ) = old_serialisable_info
metadata_routers = HydrusSerialisable.SerialisableList()
serialisable_metadata_routers = metadata_routers.GetSerialisableTuple()
new_serialisable_info = ( serialisable_file_seed_cache, serialisable_options, serialisable_metadata_routers, delete_after_success, paused )
return ( 3, new_serialisable_info )
2018-08-01 20:44:57 +00:00
2022-06-22 20:43:12 +00:00
def _WorkOnFiles( self ):
2018-08-01 20:44:57 +00:00
file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN )
if file_seed is None:
return
path = file_seed.file_seed_data
with self._lock:
2022-06-22 20:43:12 +00:00
self._files_status = 'importing'
2018-08-01 20:44:57 +00:00
2020-02-19 21:48:36 +00:00
def status_hook( text ):
with self._lock:
2022-06-29 20:52:53 +00:00
self._files_status = ClientImportControl.NeatenStatusText( text )
2020-02-19 21:48:36 +00:00
2022-08-03 20:59:51 +00:00
file_seed.ImportPath( self._file_seed_cache, self._file_import_options, FileImportOptions.IMPORT_TYPE_LOUD, status_hook = status_hook )
2018-08-01 20:44:57 +00:00
if file_seed.status in CC.SUCCESSFUL_IMPORT_STATES:
2022-10-26 20:43:00 +00:00
if len( self._metadata_routers ) > 0:
hash = file_seed.GetHash()
media_result = HG.client_controller.Read( 'media_result', hash )
for router in self._metadata_routers:
try:
router.Work( media_result, file_seed.file_seed_data )
except Exception as e:
HydrusData.ShowText( 'Trying to run metadata routing on the file "{}" threw an error!'.format( file_seed.file_seed_data ) )
HydrusData.ShowException( e )
2022-08-10 21:32:27 +00:00
real_presentation_import_options = FileImportOptions.GetRealPresentationImportOptions( self._file_import_options, FileImportOptions.IMPORT_TYPE_LOUD )
if file_seed.ShouldPresent( real_presentation_import_options ):
2018-08-01 20:44:57 +00:00
2022-06-22 20:43:12 +00:00
file_seed.PresentToPage( self._page_key )
2018-08-01 20:44:57 +00:00
if self._delete_after_success:
try:
ClientPaths.DeletePath( path )
except Exception as e:
2022-10-26 20:43:00 +00:00
HydrusData.ShowText( 'While attempting to delete {}, the following error occurred:'.format( path ) )
2018-08-01 20:44:57 +00:00
HydrusData.ShowException( e )
2022-10-26 20:43:00 +00:00
possible_sidecar_paths = set()
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
for router in self._metadata_routers:
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
possible_sidecar_paths.update( router.GetPossibleImporterSidecarPaths( path ) )
for possible_sidecar_path in possible_sidecar_paths:
if os.path.exists( possible_sidecar_path ):
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
try:
ClientPaths.DeletePath( possible_sidecar_path )
except Exception as e:
HydrusData.ShowText( 'While attempting to delete {}, the following error occurred:'.format( possible_sidecar_path ) )
HydrusData.ShowException( e )
2018-08-01 20:44:57 +00:00
with self._lock:
2022-06-22 20:43:12 +00:00
self._files_status = ''
2018-08-01 20:44:57 +00:00
2022-06-22 20:43:12 +00:00
time.sleep( ClientImporting.DID_SUBSTANTIAL_FILE_WORK_MINIMUM_SLEEP_TIME )
2018-08-01 20:44:57 +00:00
def CurrentlyWorking( self ):
with self._lock:
work_to_do = self._file_seed_cache.WorkToDo()
return work_to_do and not self._paused
2019-08-21 21:34:01 +00:00
def GetAPIInfoDict( self, simple ):
with self._lock:
d = {}
d[ 'imports' ] = self._file_seed_cache.GetAPIInfoDict( simple )
d[ 'files_paused' ] = self._paused
return d
2018-08-01 20:44:57 +00:00
def GetFileImportOptions( self ):
with self._lock:
return self._file_import_options
def GetFileSeedCache( self ):
return self._file_seed_cache
2019-10-16 20:47:55 +00:00
def GetNumSeeds( self ):
with self._lock:
return len( self._file_seed_cache )
2018-08-01 20:44:57 +00:00
def GetStatus( self ):
with self._lock:
2022-06-22 20:43:12 +00:00
text = ClientImportControl.GenerateLiveStatusText( self._files_status, self._paused, 0, '' )
return ( text, self._paused )
2018-08-01 20:44:57 +00:00
def GetValueRange( self ):
with self._lock:
return self._file_seed_cache.GetValueRange()
2021-09-22 21:12:34 +00:00
def HasSerialisableChangesSince( self, since_timestamp ):
with self._lock:
return self._last_serialisable_change_timestamp > since_timestamp
2018-08-01 20:44:57 +00:00
def NotifyFileSeedsUpdated( self, file_seed_cache_key, file_seeds ):
if file_seed_cache_key == self._file_seed_cache.GetFileSeedCacheKey():
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-08-01 20:44:57 +00:00
def PausePlay( self ):
with self._lock:
self._paused = not self._paused
ClientImporting.WakeRepeatingJob( self._files_repeating_job )
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-08-01 20:44:57 +00:00
2021-09-22 21:12:34 +00:00
def SetFileImportOptions( self, file_import_options: FileImportOptions.FileImportOptions ):
2018-08-01 20:44:57 +00:00
with self._lock:
2021-09-22 21:12:34 +00:00
if file_import_options.DumpToString() != self._file_import_options.DumpToString():
self._file_import_options = file_import_options
self._SerialisableChangeMade()
2018-08-01 20:44:57 +00:00
def Start( self, page_key ):
2022-06-22 20:43:12 +00:00
self._page_key = page_key
self._files_repeating_job = HG.client_controller.CallRepeating( ClientImporting.GetRepeatingJobInitialDelay(), ClientImporting.REPEATING_JOB_TYPICAL_PERIOD, self.REPEATINGWorkOnFiles )
2018-08-01 20:44:57 +00:00
2019-01-16 22:40:53 +00:00
self._files_repeating_job.SetThreadSlotType( 'misc' )
2018-08-01 20:44:57 +00:00
2022-06-22 20:43:12 +00:00
def CheckCanDoFileWork( self ):
2018-08-01 20:44:57 +00:00
with self._lock:
2022-04-13 21:39:26 +00:00
try:
2022-06-22 20:43:12 +00:00
ClientImportControl.CheckImporterCanDoWorkBecauseStopped( self._page_key )
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
except HydrusExceptions.VetoException:
2022-04-13 21:39:26 +00:00
2022-06-22 20:43:12 +00:00
self._files_repeating_job.Cancel()
2022-06-22 20:43:12 +00:00
raise
2022-06-22 20:43:12 +00:00
ClientImportControl.CheckImporterCanDoFileWorkBecausePaused( self._paused, self._file_seed_cache, self._page_key )
return True
2018-08-01 20:44:57 +00:00
2022-06-22 20:43:12 +00:00
def REPEATINGWorkOnFiles( self ):
2022-06-22 20:43:12 +00:00
while True:
2018-08-01 20:44:57 +00:00
try:
2022-06-22 20:43:12 +00:00
try:
self.CheckCanDoFileWork()
except HydrusExceptions.VetoException as e:
with self._lock:
2022-06-29 20:52:53 +00:00
self._files_status = str( e )
2022-06-22 20:43:12 +00:00
break
self._WorkOnFiles()
2018-08-01 20:44:57 +00:00
HG.client_controller.WaitUntilViewFree()
2021-09-22 21:12:34 +00:00
self._SerialisableChangeMade()
2018-08-01 20:44:57 +00:00
except Exception as e:
2022-06-22 20:43:12 +00:00
with self._lock:
2022-06-29 20:52:53 +00:00
self._files_status = 'stopping work: {}'.format( str( e ) )
2022-06-22 20:43:12 +00:00
2018-08-01 20:44:57 +00:00
HydrusData.ShowException( e )
2022-06-22 20:43:12 +00:00
return
2018-08-01 20:44:57 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_HDD_IMPORT ] = HDDImport
class ImportFolder( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_IMPORT_FOLDER
SERIALISABLE_NAME = 'Import Folder'
2022-10-26 20:43:00 +00:00
SERIALISABLE_VERSION = 8
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
def __init__(
self,
name,
path = '',
file_import_options = None,
tag_import_options = None,
metadata_routers: typing.Optional[ typing.Collection[ ClientMetadataMigration.SingleFileMetadataRouter ] ] = None,
tag_service_keys_to_filename_tagging_options = None,
actions = None,
action_locations = None,
period = 3600,
check_regularly = True,
show_working_popup = True,
publish_files_to_popup_button = True,
publish_files_to_page = False
):
2018-08-01 20:44:57 +00:00
if file_import_options is None:
2022-08-03 20:59:51 +00:00
file_import_options = FileImportOptions.FileImportOptions()
file_import_options.SetIsDefault( True )
2018-08-01 20:44:57 +00:00
if tag_import_options is None:
2021-06-30 21:27:35 +00:00
tag_import_options = TagImportOptions.TagImportOptions()
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
if metadata_routers is None:
metadata_routers = []
metadata_routers = HydrusSerialisable.SerialisableList( metadata_routers )
2018-08-01 20:44:57 +00:00
if tag_service_keys_to_filename_tagging_options is None:
tag_service_keys_to_filename_tagging_options = {}
if actions is None:
actions = {}
actions[ CC.STATUS_SUCCESSFUL_AND_NEW ] = CC.IMPORT_FOLDER_IGNORE
actions[ CC.STATUS_SUCCESSFUL_BUT_REDUNDANT ] = CC.IMPORT_FOLDER_IGNORE
actions[ CC.STATUS_DELETED ] = CC.IMPORT_FOLDER_IGNORE
actions[ CC.STATUS_ERROR ] = CC.IMPORT_FOLDER_IGNORE
if action_locations is None:
action_locations = {}
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
self._path = path
self._file_import_options = file_import_options
self._tag_import_options = tag_import_options
2022-10-26 20:43:00 +00:00
self._metadata_routers = metadata_routers
2018-08-01 20:44:57 +00:00
self._tag_service_keys_to_filename_tagging_options = tag_service_keys_to_filename_tagging_options
self._actions = actions
self._action_locations = action_locations
self._period = period
self._check_regularly = check_regularly
self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
self._last_checked = 0
self._paused = False
self._check_now = False
self._show_working_popup = show_working_popup
self._publish_files_to_popup_button = publish_files_to_popup_button
self._publish_files_to_page = publish_files_to_page
def _ActionPaths( self ):
for status in ( CC.STATUS_SUCCESSFUL_AND_NEW, CC.STATUS_SUCCESSFUL_BUT_REDUNDANT, CC.STATUS_DELETED, CC.STATUS_ERROR ):
action = self._actions[ status ]
if action == CC.IMPORT_FOLDER_DELETE:
while True:
file_seed = self._file_seed_cache.GetNextFileSeed( status )
2022-01-19 21:28:59 +00:00
if file_seed is None or HG.started_shutdown:
2018-08-01 20:44:57 +00:00
break
path = file_seed.file_seed_data
try:
2019-07-17 22:10:19 +00:00
if os.path.exists( path ) and not os.path.isdir( path ):
2018-08-01 20:44:57 +00:00
ClientPaths.DeletePath( path )
2022-10-26 20:43:00 +00:00
possible_sidecar_paths = set()
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
for router in self._metadata_routers:
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
possible_sidecar_paths.update( router.GetPossibleImporterSidecarPaths( path ) )
for possible_sidecar_path in possible_sidecar_paths:
if os.path.exists( possible_sidecar_path ):
ClientPaths.DeletePath( possible_sidecar_path )
2018-08-01 20:44:57 +00:00
self._file_seed_cache.RemoveFileSeeds( ( file_seed, ) )
except Exception as e:
2019-07-17 22:10:19 +00:00
raise Exception( 'Tried to delete "{}", but could not.'.format( path ) )
2018-08-01 20:44:57 +00:00
elif action == CC.IMPORT_FOLDER_MOVE:
while True:
file_seed = self._file_seed_cache.GetNextFileSeed( status )
2022-01-19 21:28:59 +00:00
if file_seed is None or HG.started_shutdown:
2018-08-01 20:44:57 +00:00
break
path = file_seed.file_seed_data
try:
dest_dir = self._action_locations[ status ]
if not os.path.exists( dest_dir ):
2019-07-17 22:10:19 +00:00
raise Exception( 'Tried to move "{}" to "{}", but the destination directory did not exist.'.format( path, dest_dir ) )
2018-08-01 20:44:57 +00:00
2019-07-17 22:10:19 +00:00
if os.path.exists( path ) and not os.path.isdir( path ):
2018-08-01 20:44:57 +00:00
filename = os.path.basename( path )
dest_path = os.path.join( dest_dir, filename )
dest_path = HydrusPaths.AppendPathUntilNoConflicts( dest_path )
HydrusPaths.MergeFile( path, dest_path )
txt_path = path + '.txt'
if os.path.exists( txt_path ):
txt_filename = os.path.basename( txt_path )
txt_dest_path = os.path.join( dest_dir, txt_filename )
txt_dest_path = HydrusPaths.AppendPathUntilNoConflicts( txt_dest_path )
HydrusPaths.MergeFile( txt_path, txt_dest_path )
self._file_seed_cache.RemoveFileSeeds( ( file_seed, ) )
except Exception as e:
HydrusData.ShowText( 'Import folder tried to move ' + path + ', but could not:' )
HydrusData.ShowException( e )
HydrusData.ShowText( 'Import folder has been paused.' )
self._paused = True
return
elif status == CC.IMPORT_FOLDER_IGNORE:
2022-03-30 20:28:13 +00:00
file_seeds = self._file_seed_cache.GetFileSeeds( status )
for file_seed in file_seeds:
path = file_seed.file_seed_data
try:
if not os.path.exists( path ):
self._file_seed_cache.RemoveFileSeeds( ( file_seed, ) )
except Exception as e:
raise Exception( 'Tried to check existence of "{}", but could not.'.format( path ) )
2018-08-01 20:44:57 +00:00
def _CheckFolder( self, job_key ):
2023-01-25 22:59:39 +00:00
( all_paths, num_sidecars ) = ClientFiles.GetAllFilePaths( [ self._path ] )
2018-08-01 20:44:57 +00:00
all_paths = HydrusPaths.FilterFreePaths( all_paths )
file_seeds = []
for path in all_paths:
if job_key.IsCancelled():
break
file_seed = ClientImportFileSeeds.FileSeed( ClientImportFileSeeds.FILE_SEED_TYPE_HDD, path )
if not self._file_seed_cache.HasFileSeed( file_seed ):
file_seeds.append( file_seed )
job_key.SetVariable( 'popup_text_1', 'checking: found ' + HydrusData.ToHumanInt( len( file_seeds ) ) + ' new files' )
self._file_seed_cache.AddFileSeeds( file_seeds )
self._last_checked = HydrusData.GetNow()
self._check_now = False
def _GetSerialisableInfo( self ):
serialisable_file_import_options = self._file_import_options.GetSerialisableTuple()
serialisable_tag_import_options = self._tag_import_options.GetSerialisableTuple()
2022-10-26 20:43:00 +00:00
serialisable_metadata_routers = self._metadata_routers.GetSerialisableTuple()
2019-01-09 22:59:03 +00:00
serialisable_tag_service_keys_to_filename_tagging_options = [ ( service_key.hex(), filename_tagging_options.GetSerialisableTuple() ) for ( service_key, filename_tagging_options ) in list(self._tag_service_keys_to_filename_tagging_options.items()) ]
2018-08-01 20:44:57 +00:00
serialisable_file_seed_cache = self._file_seed_cache.GetSerialisableTuple()
# json turns int dict keys to strings
2019-01-09 22:59:03 +00:00
action_pairs = list(self._actions.items())
action_location_pairs = list(self._action_locations.items())
2018-08-01 20:44:57 +00:00
2022-10-26 20:43:00 +00:00
return ( self._path, serialisable_file_import_options, serialisable_tag_import_options, serialisable_metadata_routers, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, self._period, self._check_regularly, serialisable_file_seed_cache, self._last_checked, self._paused, self._check_now, self._show_working_popup, self._publish_files_to_popup_button, self._publish_files_to_page )
2018-08-01 20:44:57 +00:00
def _ImportFiles( self, job_key ):
did_work = False
time_to_save = HydrusData.GetNow() + 600
num_files_imported = 0
presentation_hashes = []
presentation_hashes_fast = set()
i = 0
2022-03-30 20:28:13 +00:00
# don't want to start at 23/100 because of carrying over failed results or whatever
# num_to_do is num currently unknown
num_total = self._file_seed_cache.GetFileSeedCount( CC.STATUS_UNKNOWN )
2018-08-01 20:44:57 +00:00
while True:
file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN )
2022-04-20 20:18:56 +00:00
p1 = HG.client_controller.new_options.GetBoolean( 'pause_import_folders_sync' ) or self._paused
2018-08-01 20:44:57 +00:00
p2 = HydrusThreading.IsThreadShuttingDown()
p3 = job_key.IsCancelled()
if file_seed is None or p1 or p2 or p3:
break
did_work = True
if HydrusData.TimeHasPassed( time_to_save ):
HG.client_controller.WriteSynchronous( 'serialisable', self )
time_to_save = HydrusData.GetNow() + 600
2022-03-30 20:28:13 +00:00
gauge_num_done = num_files_imported + 1
2018-08-01 20:44:57 +00:00
job_key.SetVariable( 'popup_text_1', 'importing file ' + HydrusData.ConvertValueRangeToPrettyString( gauge_num_done, num_total ) )
job_key.SetVariable( 'popup_gauge_1', ( gauge_num_done, num_total ) )
path = file_seed.file_seed_data
2022-08-03 20:59:51 +00:00
file_seed.ImportPath( self._file_seed_cache, self._file_import_options, FileImportOptions.IMPORT_TYPE_QUIET )
2018-08-01 20:44:57 +00:00
if file_seed.status in CC.SUCCESSFUL_IMPORT_STATES:
2022-03-30 20:28:13 +00:00
hash = None
2018-10-24 21:34:02 +00:00
if file_seed.HasHash():
2018-10-03 21:00:15 +00:00
2018-10-24 21:34:02 +00:00
hash = file_seed.GetHash()
2018-10-03 21:00:15 +00:00
2022-10-26 20:43:00 +00:00
if self._tag_import_options.HasAdditionalTags() or len( self._metadata_routers ) > 0:
2018-10-03 21:00:15 +00:00
2020-04-29 21:44:12 +00:00
media_result = HG.client_controller.Read( 'media_result', hash )
2018-10-03 21:00:15 +00:00
2022-10-26 20:43:00 +00:00
if self._tag_import_options.HasAdditionalTags():
downloaded_tags = []
service_keys_to_content_updates = self._tag_import_options.GetServiceKeysToContentUpdates( file_seed.status, media_result, downloaded_tags ) # additional tags
if len( service_keys_to_content_updates ) > 0:
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
2018-10-24 21:34:02 +00:00
2022-10-26 20:43:00 +00:00
for metadata_router in self._metadata_routers:
2018-10-24 21:34:02 +00:00
2022-10-26 20:43:00 +00:00
try:
metadata_router.Work( media_result, path )
except Exception as e:
HydrusData.ShowText( 'Trying to run metadata routing in the import folder "' + self._name + '" threw an error!' )
HydrusData.ShowException( e )
2018-10-24 21:34:02 +00:00
2018-08-01 20:44:57 +00:00
2019-02-27 23:03:30 +00:00
service_keys_to_tags = ClientTags.ServiceKeysToTags()
2018-10-24 21:34:02 +00:00
2022-10-26 20:43:00 +00:00
for ( tag_service_key, filename_tagging_options ) in self._tag_service_keys_to_filename_tagging_options.items():
2018-08-01 20:44:57 +00:00
2018-10-24 21:34:02 +00:00
if not HG.client_controller.services_manager.ServiceExists( tag_service_key ):
continue
2018-08-01 20:44:57 +00:00
2018-10-24 21:34:02 +00:00
try:
tags = filename_tagging_options.GetTags( tag_service_key, path )
if len( tags ) > 0:
service_keys_to_tags[ tag_service_key ] = tags
except Exception as e:
HydrusData.ShowText( 'Trying to parse filename tags in the import folder "' + self._name + '" threw an error!' )
2018-08-01 20:44:57 +00:00
2018-10-24 21:34:02 +00:00
HydrusData.ShowException( e )
2018-08-01 20:44:57 +00:00
2018-10-24 21:34:02 +00:00
if len( service_keys_to_tags ) > 0:
2018-08-01 20:44:57 +00:00
2018-10-24 21:34:02 +00:00
service_keys_to_content_updates = ClientData.ConvertServiceKeysToTagsToServiceKeysToContentUpdates( { hash }, service_keys_to_tags )
2018-08-01 20:44:57 +00:00
2018-10-24 21:34:02 +00:00
HG.client_controller.WriteSynchronous( 'content_updates', service_keys_to_content_updates )
2018-08-01 20:44:57 +00:00
num_files_imported += 1
if hash not in presentation_hashes_fast:
2022-08-10 21:32:27 +00:00
real_presentation_import_options = FileImportOptions.GetRealPresentationImportOptions( self._file_import_options, FileImportOptions.IMPORT_TYPE_LOUD )
if file_seed.ShouldPresent( real_presentation_import_options ):
2018-08-01 20:44:57 +00:00
presentation_hashes.append( hash )
presentation_hashes_fast.add( hash )
elif file_seed.status == CC.STATUS_ERROR:
HydrusData.Print( 'A file failed to import from import folder ' + self._name + ':' + path )
i += 1
if i % 10 == 0:
self._ActionPaths()
if num_files_imported > 0:
HydrusData.Print( 'Import folder ' + self._name + ' imported ' + HydrusData.ToHumanInt( num_files_imported ) + ' files.' )
if len( presentation_hashes ) > 0:
ClientImporting.PublishPresentationHashes( self._name, presentation_hashes, self._publish_files_to_popup_button, self._publish_files_to_page )
self._ActionPaths()
return did_work
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2022-10-26 20:43:00 +00:00
( self._path, serialisable_file_import_options, serialisable_tag_import_options, serialisable_metadata_routers, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, self._period, self._check_regularly, serialisable_file_seed_cache, self._last_checked, self._paused, self._check_now, self._show_working_popup, self._publish_files_to_popup_button, self._publish_files_to_page ) = serialisable_info
2018-08-01 20:44:57 +00:00
self._actions = dict( action_pairs )
self._action_locations = dict( action_location_pairs )
self._file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options )
self._tag_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_tag_import_options )
2022-10-26 20:43:00 +00:00
self._metadata_routers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_metadata_routers )
2019-01-09 22:59:03 +00:00
self._tag_service_keys_to_filename_tagging_options = dict( [ ( bytes.fromhex( encoded_service_key ), HydrusSerialisable.CreateFromSerialisableTuple( serialisable_filename_tagging_options ) ) for ( encoded_service_key, serialisable_filename_tagging_options ) in serialisable_tag_service_keys_to_filename_tagging_options ] )
2018-08-01 20:44:57 +00:00
self._file_seed_cache = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_seed_cache )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( path, mimes, serialisable_file_import_options, action_pairs, action_location_pairs, period, open_popup, tag, serialisable_file_seed_cache, last_checked, paused ) = old_serialisable_info
# edited out tag carry-over to tio due to bit rot
2021-06-30 21:27:35 +00:00
tag_import_options = TagImportOptions.TagImportOptions()
2018-08-01 20:44:57 +00:00
serialisable_tag_import_options = tag_import_options.GetSerialisableTuple()
new_serialisable_info = ( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused )
return ( 2, new_serialisable_info )
if version == 2:
( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused ) = old_serialisable_info
serialisable_txt_parse_tag_service_keys = []
new_serialisable_info = ( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_txt_parse_tag_service_keys, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused )
return ( 3, new_serialisable_info )
if version == 3:
( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_txt_parse_tag_service_keys, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused ) = old_serialisable_info
check_now = False
new_serialisable_info = ( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_txt_parse_tag_service_keys, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused, check_now )
return ( 4, new_serialisable_info )
if version == 4:
( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_txt_parse_tag_service_keys, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused, check_now ) = old_serialisable_info
2019-01-09 22:59:03 +00:00
txt_parse_tag_service_keys = [ bytes.fromhex( service_key ) for service_key in serialisable_txt_parse_tag_service_keys ]
2018-08-01 20:44:57 +00:00
tag_service_keys_to_filename_tagging_options = {}
for service_key in txt_parse_tag_service_keys:
2021-06-30 21:27:35 +00:00
filename_tagging_options = TagImportOptions.FilenameTaggingOptions()
2018-08-01 20:44:57 +00:00
filename_tagging_options._load_from_neighbouring_txt_files = True
tag_service_keys_to_filename_tagging_options[ service_key ] = filename_tagging_options
2019-01-09 22:59:03 +00:00
serialisable_tag_service_keys_to_filename_tagging_options = [ ( service_key.hex(), filename_tagging_options.GetSerialisableTuple() ) for ( service_key, filename_tagging_options ) in list(tag_service_keys_to_filename_tagging_options.items()) ]
2018-08-01 20:44:57 +00:00
new_serialisable_info = ( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused, check_now )
return ( 5, new_serialisable_info )
if version == 5:
( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, open_popup, serialisable_file_seed_cache, last_checked, paused, check_now ) = old_serialisable_info
check_regularly = not paused
show_working_popup = True
publish_files_to_page = False
publish_files_to_popup_button = open_popup
new_serialisable_info = ( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, check_regularly, serialisable_file_seed_cache, last_checked, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page )
return ( 6, new_serialisable_info )
2022-08-03 20:59:51 +00:00
if version == 6:
( path, mimes, serialisable_file_import_options, serialisable_tag_import_options, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, check_regularly, serialisable_file_seed_cache, last_checked, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page ) = old_serialisable_info
file_import_options = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_import_options )
file_import_options.SetAllowedSpecificFiletypes( mimes )
serialisable_file_import_options = file_import_options.GetSerialisableTuple()
new_serialisable_info = ( path, serialisable_file_import_options, serialisable_tag_import_options, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, check_regularly, serialisable_file_seed_cache, last_checked, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page )
return ( 7, new_serialisable_info )
2022-10-26 20:43:00 +00:00
if version == 7:
( path, serialisable_file_import_options, serialisable_tag_import_options, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, check_regularly, serialisable_file_seed_cache, last_checked, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page ) = old_serialisable_info
tag_service_keys_to_filename_tagging_options = dict( [ ( bytes.fromhex( encoded_service_key ), HydrusSerialisable.CreateFromSerialisableTuple( serialisable_filename_tagging_options ) ) for ( encoded_service_key, serialisable_filename_tagging_options ) in serialisable_tag_service_keys_to_filename_tagging_options ] )
metadata_routers = HydrusSerialisable.SerialisableList()
try:
for ( service_key, filename_tagging_options ) in tag_service_keys_to_filename_tagging_options.items():
# beardy access here, but this is once off
if hasattr( filename_tagging_options, '_load_from_neighbouring_txt_files' ) and filename_tagging_options._load_from_neighbouring_txt_files:
importers = [ ClientMetadataMigrationImporters.SingleFileMetadataImporterTXT() ]
exporter = ClientMetadataMigrationExporters.SingleFileMetadataExporterMediaTags( service_key = service_key )
metadata_router = ClientMetadataMigration.SingleFileMetadataRouter( importers = importers, exporter = exporter )
metadata_routers.append( metadata_router )
except Exception as e:
HydrusData.Print( 'Failed to update import folder with new metadata routers.' )
HydrusData.PrintException( e )
serialisable_metadata_routers = metadata_routers.GetSerialisableTuple()
new_serialisable_info = ( path, serialisable_file_import_options, serialisable_tag_import_options, serialisable_metadata_routers, serialisable_tag_service_keys_to_filename_tagging_options, action_pairs, action_location_pairs, period, check_regularly, serialisable_file_seed_cache, last_checked, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page )
return ( 8, new_serialisable_info )
2018-08-01 20:44:57 +00:00
def CheckNow( self ):
2019-05-01 21:24:42 +00:00
self._paused = False
2018-08-01 20:44:57 +00:00
self._check_now = True
def DoWork( self ):
2022-01-19 21:28:59 +00:00
if HG.started_shutdown:
2018-08-01 20:44:57 +00:00
return
2022-04-20 20:18:56 +00:00
if HG.client_controller.new_options.GetBoolean( 'pause_import_folders_sync' ) or self._paused:
2018-08-01 20:44:57 +00:00
return
2018-09-26 19:05:12 +00:00
checked_folder = False
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
did_import_file_work = False
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
error_occured = False
2018-08-01 20:44:57 +00:00
2019-07-31 22:01:02 +00:00
stop_time = HydrusData.GetNow() + 3600
job_key = ClientThreading.JobKey( pausable = False, cancellable = True, stop_time = stop_time )
2018-10-03 21:00:15 +00:00
2018-09-26 19:05:12 +00:00
try:
2018-08-01 20:44:57 +00:00
2022-08-10 21:32:27 +00:00
real_file_import_options = FileImportOptions.GetRealFileImportOptions( self._file_import_options, FileImportOptions.IMPORT_TYPE_QUIET )
real_file_import_options.CheckReadyToImport()
2022-04-13 21:39:26 +00:00
2018-09-26 19:05:12 +00:00
if not os.path.exists( self._path ) or not os.path.isdir( self._path ):
2018-08-01 20:44:57 +00:00
2018-10-03 21:00:15 +00:00
raise Exception( 'Path "' + self._path + '" does not seem to exist, or is not a directory.' )
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
pubbed_job_key = False
2018-08-01 20:44:57 +00:00
2021-06-09 20:28:09 +00:00
job_key.SetStatusTitle( 'import folder - ' + self._name )
2018-09-26 19:05:12 +00:00
due_by_check_now = self._check_now
due_by_period = self._check_regularly and HydrusData.TimeHasPassed( self._last_checked + self._period )
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
if due_by_check_now or due_by_period:
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
if not pubbed_job_key and self._show_working_popup:
HG.client_controller.pub( 'message', job_key )
pubbed_job_key = True
self._CheckFolder( job_key )
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
checked_folder = True
file_seed = self._file_seed_cache.GetNextFileSeed( CC.STATUS_UNKNOWN )
if file_seed is not None:
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
if not pubbed_job_key and self._show_working_popup:
HG.client_controller.pub( 'message', job_key )
pubbed_job_key = True
did_import_file_work = self._ImportFiles( job_key )
except Exception as e:
error_occured = True
self._paused = True
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
HydrusData.ShowText( 'The import folder "' + self._name + '" encountered an exception! It has been paused!' )
HydrusData.ShowException( e )
2018-08-01 20:44:57 +00:00
2018-09-26 19:05:12 +00:00
if checked_folder or did_import_file_work or error_occured:
2018-08-01 20:44:57 +00:00
HG.client_controller.WriteSynchronous( 'serialisable', self )
job_key.Delete()
def GetFileSeedCache( self ):
return self._file_seed_cache
2022-10-26 20:43:00 +00:00
def GetMetadataRouters( self ):
return list( self._metadata_routers )
2018-08-01 20:44:57 +00:00
def Paused( self ):
return self._paused
def PausePlay( self ):
self._paused = not self._paused
def ToListBoxTuple( self ):
return ( self._name, self._path, self._paused, self._check_regularly, self._period )
def ToTuple( self ):
2022-08-03 20:59:51 +00:00
return ( self._name, self._path, self._file_import_options, self._tag_import_options, self._tag_service_keys_to_filename_tagging_options, self._actions, self._action_locations, self._period, self._check_regularly, self._paused, self._check_now, self._show_working_popup, self._publish_files_to_popup_button, self._publish_files_to_page )
2018-08-01 20:44:57 +00:00
def SetFileSeedCache( self, file_seed_cache ):
self._file_seed_cache = file_seed_cache
2022-10-26 20:43:00 +00:00
def SetMetadataRouters( self, metadata_routers: typing.Collection[ ClientMetadataMigration.SingleFileMetadataRouter ] ):
self._metadata_routers = HydrusSerialisable.SerialisableList( metadata_routers )
2022-08-03 20:59:51 +00:00
def SetTuple( self, name, path, file_import_options, tag_import_options, tag_service_keys_to_filename_tagging_options, actions, action_locations, period, check_regularly, paused, check_now, show_working_popup, publish_files_to_popup_button, publish_files_to_page ):
2018-08-01 20:44:57 +00:00
if path != self._path:
self._file_seed_cache = ClientImportFileSeeds.FileSeedCache()
2022-08-10 21:32:27 +00:00
if not file_import_options.IsDefault() and not self._file_import_options.IsDefault():
2018-08-01 20:44:57 +00:00
2022-08-10 21:32:27 +00:00
mimes = set( file_import_options.GetAllowedSpecificFiletypes() )
if mimes != set( self._file_import_options.GetAllowedSpecificFiletypes() ):
self._file_seed_cache.RemoveFileSeedsByStatus( ( CC.STATUS_VETOED, ) )
2018-08-01 20:44:57 +00:00
self._name = name
self._path = path
self._file_import_options = file_import_options
self._tag_import_options = tag_import_options
self._tag_service_keys_to_filename_tagging_options = tag_service_keys_to_filename_tagging_options
self._actions = actions
self._action_locations = action_locations
self._period = period
self._check_regularly = check_regularly
self._paused = paused
self._check_now = check_now
self._show_working_popup = show_working_popup
self._publish_files_to_popup_button = publish_files_to_popup_button
self._publish_files_to_page = publish_files_to_page
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_IMPORT_FOLDER ] = ImportFolder