import collections import os import re import typing from hydrus.core import HydrusConstants as HC from hydrus.core import HydrusData from hydrus.core import HydrusExceptions from hydrus.core import HydrusGlobals as HG from hydrus.core import HydrusPaths from hydrus.core import HydrusSerialisable from hydrus.core import HydrusTags from hydrus.core import HydrusThreading from hydrus.client import ClientConstants as CC from hydrus.client import ClientPaths from hydrus.client import ClientSearch from hydrus.client.metadata import ClientMetadataMigration from hydrus.client.metadata import ClientTags MAX_PATH_LENGTH = 240 # bit of padding from 255 for .txt neigbouring and other surprises def GenerateExportFilename( destination_directory, media, terms, file_index, do_not_use_filenames = None ): def clean_tag_text( t ): if HC.PLATFORM_WINDOWS: t = re.sub( r'\\', '_', t ) else: t = re.sub( '/', '_', t ) return t if len( destination_directory ) > ( MAX_PATH_LENGTH - 10 ): raise Exception( 'The destination directory is too long!' ) filename = '' for ( term_type, term ) in terms: tags_manager = media.GetTagsManager() if term_type == 'string': filename += term elif term_type == 'namespace': tags = tags_manager.GetNamespaceSlice( CC.COMBINED_TAG_SERVICE_KEY, ( term, ), ClientTags.TAG_DISPLAY_ACTUAL ) subtags = sorted( ( HydrusTags.SplitTag( tag )[1] for tag in tags ) ) filename += clean_tag_text( ', '.join( subtags ) ) elif term_type == 'predicate': if term in ( 'tags', 'nn tags' ): current = tags_manager.GetCurrent( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL ) pending = tags_manager.GetPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL ) tags = sorted( current.union( pending ) ) if term == 'nn tags': tags = [ tag for tag in tags if ':' not in tag ] else: tags = [ HydrusTags.SplitTag( tag )[1] for tag in tags ] filename += clean_tag_text( ', '.join( tags ) ) elif term == 'hash': hash = media.GetHash() filename += hash.hex() elif term == 'file_id': hash_id = media.GetHashId() filename += str( hash_id ) elif term == '#': filename += str( file_index ) elif term_type == 'tag': tag = term ( namespace, subtag ) = HydrusTags.SplitTag( tag ) if tags_manager.HasTag( subtag, ClientTags.TAG_DISPLAY_ACTUAL ): filename += clean_tag_text( subtag ) while filename.startswith( os.path.sep ): filename = filename[1:] # replace many consecutive (back)slash with single if HC.PLATFORM_WINDOWS: filename = re.sub( r'\\+', r'\\', filename ) else: filename = re.sub( '/+', '/', filename ) filename = HydrusPaths.SanitizePathForExport( destination_directory, filename ) # mime = media.GetMime() ext = HC.mime_ext_lookup[ mime ] if filename.endswith( ext ): filename = filename[ : - len( ext ) ] example_dest_path = os.path.join( destination_directory, filename + ext ) excess_chars = len( example_dest_path ) - MAX_PATH_LENGTH if excess_chars > 0: filename = filename[ : - excess_chars ] if do_not_use_filenames is not None: i = 1 possible_filename = '{}{}'.format( filename, ext ) while possible_filename in do_not_use_filenames: possible_filename = '{} ({}){}'.format( filename, i, ext ) i += 1 filename = possible_filename else: filename += ext return filename def GetExportPath(): portable_path = HG.client_controller.options[ 'export_path' ] if portable_path is None: desired_path = os.path.join( '~', 'hydrus_export' ) path = os.path.expanduser( desired_path ) if path == desired_path: # could not figure it out, probably crazy user setup atm return None HydrusPaths.MakeSureDirectoryExists( path ) else: path = HydrusPaths.ConvertPortablePathToAbsPath( portable_path ) return path def ParseExportPhrase( phrase ): try: terms = [ ( 'string', phrase ) ] new_terms = [] for ( term_type, term ) in terms: if term_type == 'string': while '[' in term: ( pre, term ) = term.split( '[', 1 ) ( namespace, term ) = term.split( ']', 1 ) new_terms.append( ( 'string', pre ) ) new_terms.append( ( 'namespace', namespace ) ) new_terms.append( ( term_type, term ) ) terms = new_terms new_terms = [] for ( term_type, term ) in terms: if term_type == 'string': while '{' in term: ( pre, term ) = term.split( '{', 1 ) ( predicate, term ) = term.split( '}', 1 ) new_terms.append( ( 'string', pre ) ) new_terms.append( ( 'predicate', predicate ) ) new_terms.append( ( term_type, term ) ) terms = new_terms new_terms = [] for ( term_type, term ) in terms: if term_type == 'string': while '(' in term: ( pre, term ) = term.split( '(', 1 ) ( tag, term ) = term.split( ')', 1 ) new_terms.append( ( 'string', pre ) ) new_terms.append( ( 'tag', tag ) ) new_terms.append( ( term_type, term ) ) terms = new_terms except Exception as e: raise Exception( 'Could not parse that phrase: ' + str( e ) ) return terms class ExportFolder( HydrusSerialisable.SerialisableBaseNamed ): SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER SERIALISABLE_NAME = 'Export Folder' SERIALISABLE_VERSION = 6 def __init__( self, name, path = '', export_type = HC.EXPORT_FOLDER_TYPE_REGULAR, delete_from_client_after_export = False, file_search_context = None, metadata_routers = None, run_regularly = True, period = 3600, phrase = None, last_checked = 0, paused = False, run_now = False, last_error = '' ): HydrusSerialisable.SerialisableBaseNamed.__init__( self, name ) if export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE: delete_from_client_after_export = False if file_search_context is None: default_location_context = HG.client_controller.new_options.GetDefaultLocalLocationContext() file_search_context = ClientSearch.FileSearchContext( location_context = default_location_context ) if metadata_routers is None: metadata_routers = [] if phrase is None: phrase = HG.client_controller.new_options.GetString( 'export_phrase' ) self._path = path self._export_type = export_type self._delete_from_client_after_export = delete_from_client_after_export self._file_search_context = file_search_context self._metadata_routers = HydrusSerialisable.SerialisableList( metadata_routers ) self._run_regularly = run_regularly self._period = period self._phrase = phrase self._last_checked = last_checked self._paused = paused and not run_now self._run_now = run_now self._last_error = last_error def _GetSerialisableInfo( self ): serialisable_file_search_context = self._file_search_context.GetSerialisableTuple() serialisable_metadata_routers = self._metadata_routers.GetSerialisableTuple() return ( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, serialisable_metadata_routers, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now, self._last_error ) def _InitialiseFromSerialisableInfo( self, serialisable_info ): ( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, serialisable_metadata_routers, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now, self._last_error ) = serialisable_info if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE: self._delete_from_client_after_export = False self._file_search_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_search_context ) self._metadata_routers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_metadata_routers ) def _UpdateSerialisableInfo( self, version, old_serialisable_info ): if version == 1: ( export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info path = self._name new_serialisable_info = ( path, export_type, serialisable_file_search_context, period, phrase, last_checked ) return ( 2, new_serialisable_info ) if version == 2: ( path, export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info delete_from_client_after_export = False new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked ) return ( 3, new_serialisable_info ) if version == 3: ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info run_regularly = True paused = False run_now = False new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now ) return ( 4, new_serialisable_info ) if version == 4: ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now ) = old_serialisable_info last_error = '' new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now, last_error ) return ( 5, new_serialisable_info ) if version == 5: ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now, last_error ) = old_serialisable_info metadata_routers = HydrusSerialisable.SerialisableList() serialisable_metadata_routers = metadata_routers.GetSerialisableTuple() new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, serialisable_metadata_routers, run_regularly, period, phrase, last_checked, paused, run_now, last_error ) return ( 6, new_serialisable_info ) def _DoExport( self ): query_hash_ids = HG.client_controller.Read( 'file_query_ids', self._file_search_context, apply_implicit_limit = False ) media_results = [] i = 0 base = 256 while i < len( query_hash_ids ): if HG.client_controller.new_options.GetBoolean( 'pause_export_folders_sync' ) or HydrusThreading.IsThreadShuttingDown(): return if i == 0: ( last_i, i ) = ( 0, base ) else: ( last_i, i ) = ( i, i + base ) sub_query_hash_ids = query_hash_ids[ last_i : i ] more_media_results = HG.client_controller.Read( 'media_results_from_ids', sub_query_hash_ids ) media_results.extend( more_media_results ) media_results.sort( key = lambda mr: mr.GetHashId() ) # terms = ParseExportPhrase( self._phrase ) previous_paths = set() for ( root, dirnames, filenames ) in os.walk( self._path ): previous_paths.update( ( os.path.join( root, filename ) for filename in filenames ) ) sync_paths = set() client_files_manager = HG.client_controller.client_files_manager num_copied = 0 for ( i, media_result ) in enumerate( media_results ): if HG.client_controller.new_options.GetBoolean( 'pause_export_folders_sync' ) or HydrusThreading.IsThreadShuttingDown(): return hash = media_result.GetHash() mime = media_result.GetMime() size = media_result.GetSize() try: source_path = client_files_manager.GetFilePath( hash, mime ) except HydrusExceptions.FileMissingException: raise Exception( 'A file to be exported, hash "{}", was missing! You should run file maintenance (under database->maintenance->files) to check the files for the export folder\'s search, and possibly all your files.' ) filename = GenerateExportFilename( self._path, media_result, terms, i + 1 ) dest_path = os.path.normpath( os.path.join( self._path, filename ) ) if not dest_path.startswith( self._path ): raise Exception( 'It seems a destination path for export folder "{}" was above the main export directory! The file was "{}" and its destination path was "{}".'.format( self._path, hash.hex(), dest_path ) ) dest_path_dir = os.path.dirname( dest_path ) HydrusPaths.MakeSureDirectoryExists( dest_path_dir ) if dest_path not in sync_paths: copied = HydrusPaths.MirrorFile( source_path, dest_path ) if copied: num_copied += 1 HydrusPaths.TryToGiveFileNicePermissionBits( dest_path ) for metadata_router in self._metadata_routers: metadata_router.Work( media_result, dest_path ) sync_paths.add( dest_path ) if num_copied > 0: HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ToHumanInt( num_copied ) + ' files.' ) if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE: deletee_paths = previous_paths.difference( sync_paths ) for deletee_path in deletee_paths: ClientPaths.DeletePath( deletee_path ) deletee_dirs = set() for ( root, dirnames, filenames ) in os.walk( self._path, topdown = False ): if root == self._path: continue no_files = len( filenames ) == 0 useful_dirnames = [ dirname for dirname in dirnames if os.path.join( root, dirname ) not in deletee_dirs ] no_useful_dirs = len( useful_dirnames ) == 0 if no_useful_dirs and no_files: deletee_dirs.add( root ) for deletee_dir in deletee_dirs: if os.path.exists( deletee_dir ): HydrusPaths.DeletePath( deletee_dir ) if len( deletee_paths ) > 0: HydrusData.Print( 'Export folder {} deleted {} files and {} folders.'.format( self._name, HydrusData.ToHumanInt( len( deletee_paths ) ), HydrusData.ToHumanInt( len( deletee_dirs ) ) ) ) if self._delete_from_client_after_export: local_file_service_keys = HG.client_controller.services_manager.GetServiceKeys( ( HC.LOCAL_FILE_DOMAIN, ) ) service_keys_to_deletee_hashes = collections.defaultdict( list ) for media_result in media_results: if media_result.IsDeleteLocked(): continue hash = media_result.GetHash() deletee_service_keys = media_result.GetLocationsManager().GetCurrent().intersection( local_file_service_keys ) for deletee_service_key in deletee_service_keys: service_keys_to_deletee_hashes[ deletee_service_key ].append( hash ) reason = 'Deleted after export to Export Folder "{}".'.format( self._path ) for ( service_key, deletee_hashes ) in service_keys_to_deletee_hashes.items(): chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64 ) for chunk_of_hashes in chunks_of_hashes: content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason = reason ) HG.client_controller.WriteSynchronous( 'content_updates', { service_key : [ content_update ] } ) def DoWork( self ): regular_run_due = self._run_regularly and HydrusData.TimeHasPassed( self._last_checked + self._period ) good_to_go = ( regular_run_due or self._run_now ) and not self._paused if not good_to_go: return try: if self._path == '': raise Exception( 'No path set for the folder!' ) if not os.path.exists( self._path ): raise Exception( 'The path, "{}", does not exist!'.format( self._path ) ) if not os.path.isdir( self._path ): raise Exception( 'The path, "{}", is not a directory!'.format( self._path ) ) self._DoExport() self._last_error = '' except Exception as e: self._paused = True HydrusData.ShowText( 'The export folder "' + self._name + '" encountered an error! It has now been paused. Please check the folder\'s settings and maybe report to hydrus dev if the error is complicated! The error follows:' ) HydrusData.ShowException( e ) self._last_error = str( e ) finally: self._last_checked = HydrusData.GetNow() self._run_now = False HG.client_controller.WriteSynchronous( 'serialisable', self ) def GetLastError( self ) -> str: return self._last_error def GetMetadataRouters( self ) -> typing.Collection[ ClientMetadataMigration.SingleFileMetadataRouter ]: return self._metadata_routers def RunNow( self ): self._paused = False self._run_now = True def ToTuple( self ): return ( self._name, self._path, self._export_type, self._delete_from_client_after_export, self._file_search_context, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now ) HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER ] = ExportFolder