hydrus/hydrus/client/exporting/ClientExportingFiles.py

681 lines
23 KiB
Python

import collections
import os
import re
import typing
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusTags
from hydrus.core import HydrusThreading
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientPaths
from hydrus.client import ClientSearch
from hydrus.client.metadata import ClientMetadataMigration
from hydrus.client.metadata import ClientTags
MAX_PATH_LENGTH = 240 # bit of padding from 255 for .txt neigbouring and other surprises
def GenerateExportFilename( destination_directory, media, terms, file_index, do_not_use_filenames = None ):
def clean_tag_text( t ):
if HC.PLATFORM_WINDOWS:
t = re.sub( r'\\', '_', t )
else:
t = re.sub( '/', '_', t )
return t
if len( destination_directory ) > ( MAX_PATH_LENGTH - 10 ):
raise Exception( 'The destination directory is too long!' )
filename = ''
for ( term_type, term ) in terms:
tags_manager = media.GetTagsManager()
if term_type == 'string':
filename += term
elif term_type == 'namespace':
tags = tags_manager.GetNamespaceSlice( CC.COMBINED_TAG_SERVICE_KEY, ( term, ), ClientTags.TAG_DISPLAY_ACTUAL )
subtags = sorted( ( HydrusTags.SplitTag( tag )[1] for tag in tags ) )
filename += clean_tag_text( ', '.join( subtags ) )
elif term_type == 'predicate':
if term in ( 'tags', 'nn tags' ):
current = tags_manager.GetCurrent( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL )
pending = tags_manager.GetPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_ACTUAL )
tags = sorted( current.union( pending ) )
if term == 'nn tags':
tags = [ tag for tag in tags if ':' not in tag ]
else:
tags = [ HydrusTags.SplitTag( tag )[1] for tag in tags ]
filename += clean_tag_text( ', '.join( tags ) )
elif term == 'hash':
hash = media.GetHash()
filename += hash.hex()
elif term == 'file_id':
hash_id = media.GetHashId()
filename += str( hash_id )
elif term == '#':
filename += str( file_index )
elif term_type == 'tag':
tag = term
( namespace, subtag ) = HydrusTags.SplitTag( tag )
if tags_manager.HasTag( subtag, ClientTags.TAG_DISPLAY_ACTUAL ):
filename += clean_tag_text( subtag )
while filename.startswith( os.path.sep ):
filename = filename[1:]
# replace many consecutive (back)slash with single
if HC.PLATFORM_WINDOWS:
filename = re.sub( r'\\+', r'\\', filename )
else:
filename = re.sub( '/+', '/', filename )
filename = HydrusPaths.SanitizePathForExport( destination_directory, filename )
#
mime = media.GetMime()
ext = HC.mime_ext_lookup[ mime ]
if filename.endswith( ext ):
filename = filename[ : - len( ext ) ]
example_dest_path = os.path.join( destination_directory, filename + ext )
excess_chars = len( example_dest_path ) - MAX_PATH_LENGTH
if excess_chars > 0:
filename = filename[ : - excess_chars ]
if do_not_use_filenames is not None:
i = 1
possible_filename = '{}{}'.format( filename, ext )
while possible_filename in do_not_use_filenames:
possible_filename = '{} ({}){}'.format( filename, i, ext )
i += 1
filename = possible_filename
else:
filename += ext
return filename
def GetExportPath():
portable_path = HG.client_controller.options[ 'export_path' ]
if portable_path is None:
desired_path = os.path.join( '~', 'hydrus_export' )
path = os.path.expanduser( desired_path )
if path == desired_path:
# could not figure it out, probably crazy user setup atm
return None
HydrusPaths.MakeSureDirectoryExists( path )
else:
path = HydrusPaths.ConvertPortablePathToAbsPath( portable_path )
return path
def ParseExportPhrase( phrase ):
try:
terms = [ ( 'string', phrase ) ]
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '[' in term:
( pre, term ) = term.split( '[', 1 )
( namespace, term ) = term.split( ']', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'namespace', namespace ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '{' in term:
( pre, term ) = term.split( '{', 1 )
( predicate, term ) = term.split( '}', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'predicate', predicate ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '(' in term:
( pre, term ) = term.split( '(', 1 )
( tag, term ) = term.split( ')', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'tag', tag ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
except Exception as e:
raise Exception( 'Could not parse that phrase: ' + str( e ) )
return terms
class ExportFolder( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER
SERIALISABLE_NAME = 'Export Folder'
SERIALISABLE_VERSION = 6
def __init__(
self,
name,
path = '',
export_type = HC.EXPORT_FOLDER_TYPE_REGULAR,
delete_from_client_after_export = False,
file_search_context = None,
metadata_routers = None,
run_regularly = True,
period = 3600,
phrase = None,
last_checked = 0,
paused = False,
run_now = False,
last_error = ''
):
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
if export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
delete_from_client_after_export = False
if file_search_context is None:
default_location_context = HG.client_controller.new_options.GetDefaultLocalLocationContext()
file_search_context = ClientSearch.FileSearchContext( location_context = default_location_context )
if metadata_routers is None:
metadata_routers = []
if phrase is None:
phrase = HG.client_controller.new_options.GetString( 'export_phrase' )
self._path = path
self._export_type = export_type
self._delete_from_client_after_export = delete_from_client_after_export
self._file_search_context = file_search_context
self._metadata_routers = HydrusSerialisable.SerialisableList( metadata_routers )
self._run_regularly = run_regularly
self._period = period
self._phrase = phrase
self._last_checked = last_checked
self._paused = paused and not run_now
self._run_now = run_now
self._last_error = last_error
def _GetSerialisableInfo( self ):
serialisable_file_search_context = self._file_search_context.GetSerialisableTuple()
serialisable_metadata_routers = self._metadata_routers.GetSerialisableTuple()
return ( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, serialisable_metadata_routers, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now, self._last_error )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, serialisable_metadata_routers, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now, self._last_error ) = serialisable_info
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
self._delete_from_client_after_export = False
self._file_search_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_search_context )
self._metadata_routers = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_metadata_routers )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
path = self._name
new_serialisable_info = ( path, export_type, serialisable_file_search_context, period, phrase, last_checked )
return ( 2, new_serialisable_info )
if version == 2:
( path, export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
delete_from_client_after_export = False
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked )
return ( 3, new_serialisable_info )
if version == 3:
( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
run_regularly = True
paused = False
run_now = False
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now )
return ( 4, new_serialisable_info )
if version == 4:
( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now ) = old_serialisable_info
last_error = ''
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now, last_error )
return ( 5, new_serialisable_info )
if version == 5:
( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now, last_error ) = old_serialisable_info
metadata_routers = HydrusSerialisable.SerialisableList()
serialisable_metadata_routers = metadata_routers.GetSerialisableTuple()
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, serialisable_metadata_routers, run_regularly, period, phrase, last_checked, paused, run_now, last_error )
return ( 6, new_serialisable_info )
def _DoExport( self ):
query_hash_ids = HG.client_controller.Read( 'file_query_ids', self._file_search_context, apply_implicit_limit = False )
media_results = []
i = 0
base = 256
while i < len( query_hash_ids ):
if HG.client_controller.new_options.GetBoolean( 'pause_export_folders_sync' ) or HydrusThreading.IsThreadShuttingDown():
return
if i == 0: ( last_i, i ) = ( 0, base )
else: ( last_i, i ) = ( i, i + base )
sub_query_hash_ids = query_hash_ids[ last_i : i ]
more_media_results = HG.client_controller.Read( 'media_results_from_ids', sub_query_hash_ids )
media_results.extend( more_media_results )
media_results.sort( key = lambda mr: mr.GetHashId() )
#
terms = ParseExportPhrase( self._phrase )
previous_paths = set()
for ( root, dirnames, filenames ) in os.walk( self._path ):
previous_paths.update( ( os.path.join( root, filename ) for filename in filenames ) )
sync_paths = set()
client_files_manager = HG.client_controller.client_files_manager
num_copied = 0
for ( i, media_result ) in enumerate( media_results ):
if HG.client_controller.new_options.GetBoolean( 'pause_export_folders_sync' ) or HydrusThreading.IsThreadShuttingDown():
return
hash = media_result.GetHash()
mime = media_result.GetMime()
size = media_result.GetSize()
try:
source_path = client_files_manager.GetFilePath( hash, mime )
except HydrusExceptions.FileMissingException:
raise Exception( 'A file to be exported, hash "{}", was missing! You should run file maintenance (under database->maintenance->files) to check the files for the export folder\'s search, and possibly all your files.' )
filename = GenerateExportFilename( self._path, media_result, terms, i + 1 )
dest_path = os.path.normpath( os.path.join( self._path, filename ) )
if not dest_path.startswith( self._path ):
raise Exception( 'It seems a destination path for export folder "{}" was above the main export directory! The file was "{}" and its destination path was "{}".'.format( self._path, hash.hex(), dest_path ) )
dest_path_dir = os.path.dirname( dest_path )
HydrusPaths.MakeSureDirectoryExists( dest_path_dir )
if dest_path not in sync_paths:
copied = HydrusPaths.MirrorFile( source_path, dest_path )
if copied:
num_copied += 1
HydrusPaths.TryToGiveFileNicePermissionBits( dest_path )
for metadata_router in self._metadata_routers:
metadata_router.Work( media_result, dest_path )
sync_paths.add( dest_path )
if num_copied > 0:
HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ToHumanInt( num_copied ) + ' files.' )
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
deletee_paths = previous_paths.difference( sync_paths )
for deletee_path in deletee_paths:
ClientPaths.DeletePath( deletee_path )
deletee_dirs = set()
for ( root, dirnames, filenames ) in os.walk( self._path, topdown = False ):
if root == self._path:
continue
no_files = len( filenames ) == 0
useful_dirnames = [ dirname for dirname in dirnames if os.path.join( root, dirname ) not in deletee_dirs ]
no_useful_dirs = len( useful_dirnames ) == 0
if no_useful_dirs and no_files:
deletee_dirs.add( root )
for deletee_dir in deletee_dirs:
if os.path.exists( deletee_dir ):
HydrusPaths.DeletePath( deletee_dir )
if len( deletee_paths ) > 0:
HydrusData.Print( 'Export folder {} deleted {} files and {} folders.'.format( self._name, HydrusData.ToHumanInt( len( deletee_paths ) ), HydrusData.ToHumanInt( len( deletee_dirs ) ) ) )
if self._delete_from_client_after_export:
local_file_service_keys = HG.client_controller.services_manager.GetServiceKeys( ( HC.LOCAL_FILE_DOMAIN, ) )
service_keys_to_deletee_hashes = collections.defaultdict( list )
for media_result in media_results:
if media_result.IsDeleteLocked():
continue
hash = media_result.GetHash()
deletee_service_keys = media_result.GetLocationsManager().GetCurrent().intersection( local_file_service_keys )
for deletee_service_key in deletee_service_keys:
service_keys_to_deletee_hashes[ deletee_service_key ].append( hash )
reason = 'Deleted after export to Export Folder "{}".'.format( self._path )
for ( service_key, deletee_hashes ) in service_keys_to_deletee_hashes.items():
chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64 )
for chunk_of_hashes in chunks_of_hashes:
content_update = HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason = reason )
HG.client_controller.WriteSynchronous( 'content_updates', { service_key : [ content_update ] } )
def DoWork( self ):
regular_run_due = self._run_regularly and HydrusData.TimeHasPassed( self._last_checked + self._period )
good_to_go = ( regular_run_due or self._run_now ) and not self._paused
if not good_to_go:
return
try:
if self._path == '':
raise Exception( 'No path set for the folder!' )
if not os.path.exists( self._path ):
raise Exception( 'The path, "{}", does not exist!'.format( self._path ) )
if not os.path.isdir( self._path ):
raise Exception( 'The path, "{}", is not a directory!'.format( self._path ) )
self._DoExport()
self._last_error = ''
except Exception as e:
self._paused = True
HydrusData.ShowText( 'The export folder "' + self._name + '" encountered an error! It has now been paused. Please check the folder\'s settings and maybe report to hydrus dev if the error is complicated! The error follows:' )
HydrusData.ShowException( e )
self._last_error = str( e )
finally:
self._last_checked = HydrusData.GetNow()
self._run_now = False
HG.client_controller.WriteSynchronous( 'serialisable', self )
def GetLastError( self ) -> str:
return self._last_error
def GetMetadataRouters( self ) -> typing.Collection[ ClientMetadataMigration.SingleFileMetadataRouter ]:
return self._metadata_routers
def RunNow( self ):
self._paused = False
self._run_now = True
def ToTuple( self ):
return ( self._name, self._path, self._export_type, self._delete_from_client_after_export, self._file_search_context, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER ] = ExportFolder