hydrus/include/ClientExporting.py

552 lines
18 KiB
Python
Raw Normal View History

2019-01-09 22:59:03 +00:00
from . import ClientConstants as CC
from . import ClientPaths
from . import ClientSearch
2019-10-02 23:38:59 +00:00
from . import ClientTags
2019-01-09 22:59:03 +00:00
from . import HydrusConstants as HC
from . import HydrusData
from . import HydrusGlobals as HG
from . import HydrusPaths
from . import HydrusSerialisable
from . import HydrusTags
from . import HydrusThreading
2017-01-04 22:48:23 +00:00
import os
import re
import stat
2019-09-05 00:05:32 +00:00
MAX_PATH_LENGTH = 240 # bit of padding from 255 for .txt neigbouring and other surprises
2017-08-23 21:34:25 +00:00
2019-09-05 00:05:32 +00:00
def GenerateExportFilename( destination_directory, media, terms, append_number = None ):
2017-01-04 22:48:23 +00:00
2018-09-12 21:36:26 +00:00
def clean_tag_text( t ):
if HC.PLATFORM_WINDOWS:
2019-01-09 22:59:03 +00:00
t = re.sub( r'\\', '_', t )
2018-09-12 21:36:26 +00:00
else:
2019-01-09 22:59:03 +00:00
t = re.sub( '/', '_', t )
2018-09-12 21:36:26 +00:00
return t
2017-08-23 21:34:25 +00:00
if len( destination_directory ) > ( MAX_PATH_LENGTH - 10 ):
raise Exception( 'The destination directory is too long!' )
2017-01-04 22:48:23 +00:00
filename = ''
for ( term_type, term ) in terms:
tags_manager = media.GetTagsManager()
if term_type == 'string':
filename += term
elif term_type == 'namespace':
2019-10-02 23:38:59 +00:00
tags = tags_manager.GetNamespaceSlice( ( term, ), ClientTags.TAG_DISPLAY_SIBLINGS_AND_PARENTS )
2017-01-04 22:48:23 +00:00
2017-02-08 22:27:00 +00:00
subtags = [ HydrusTags.SplitTag( tag )[1] for tag in tags ]
subtags.sort()
2018-09-12 21:36:26 +00:00
filename += clean_tag_text( ', '.join( subtags ) )
2017-01-04 22:48:23 +00:00
elif term_type == 'predicate':
if term in ( 'tags', 'nn tags' ):
2019-10-02 23:38:59 +00:00
current = tags_manager.GetCurrent( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_SIBLINGS_AND_PARENTS )
pending = tags_manager.GetPending( CC.COMBINED_TAG_SERVICE_KEY, ClientTags.TAG_DISPLAY_SIBLINGS_AND_PARENTS )
2017-01-04 22:48:23 +00:00
tags = list( current.union( pending ) )
2017-02-08 22:27:00 +00:00
if term == 'nn tags':
tags = [ tag for tag in tags if ':' not in tag ]
else:
tags = [ HydrusTags.SplitTag( tag )[1] for tag in tags ]
2017-01-04 22:48:23 +00:00
tags.sort()
2018-09-12 21:36:26 +00:00
filename += clean_tag_text( ', '.join( tags ) )
2017-01-04 22:48:23 +00:00
elif term == 'hash':
hash = media.GetHash()
2019-01-09 22:59:03 +00:00
filename += hash.hex()
2017-01-04 22:48:23 +00:00
2019-12-05 05:29:32 +00:00
elif term == 'file_id':
hash_id = media.GetHashId()
filename += str( hash_id )
2017-01-04 22:48:23 +00:00
elif term_type == 'tag':
2017-09-20 19:47:31 +00:00
tag = term
2017-02-08 22:27:00 +00:00
( namespace, subtag ) = HydrusTags.SplitTag( tag )
2017-01-04 22:48:23 +00:00
2019-10-02 23:38:59 +00:00
if tags_manager.HasTag( subtag, ClientTags.TAG_DISPLAY_SIBLINGS_AND_PARENTS ):
2017-02-08 22:27:00 +00:00
2018-09-12 21:36:26 +00:00
filename += clean_tag_text( subtag )
2017-02-08 22:27:00 +00:00
2017-01-04 22:48:23 +00:00
if HC.PLATFORM_WINDOWS:
2018-09-12 21:36:26 +00:00
# replace many consecutive backspace with single backspace
2019-01-09 22:59:03 +00:00
filename = re.sub( r'\\+', r'\\', filename )
2018-09-12 21:36:26 +00:00
# /, :, *, ?, ", <, >, |
2019-01-09 22:59:03 +00:00
filename = re.sub( r'/|:|\*|\?|"|<|>|\|', '_', filename )
2017-01-04 22:48:23 +00:00
else:
2019-01-09 22:59:03 +00:00
filename = re.sub( '/', '_', filename )
2017-01-04 22:48:23 +00:00
2017-08-23 21:34:25 +00:00
#
mime = media.GetMime()
2017-01-04 22:48:23 +00:00
ext = HC.mime_ext_lookup[ mime ]
2017-08-23 21:34:25 +00:00
if filename.endswith( ext ):
2017-01-04 22:48:23 +00:00
2017-08-23 21:34:25 +00:00
filename = filename[ : - len( ext ) ]
2017-01-04 22:48:23 +00:00
2017-08-23 21:34:25 +00:00
example_dest_path = os.path.join( destination_directory, filename + ext )
excess_chars = len( example_dest_path ) - MAX_PATH_LENGTH
if excess_chars > 0:
filename = filename[ : - excess_chars ]
2019-09-05 00:05:32 +00:00
if append_number is not None:
filename += ' ({})'.format( append_number )
filename += ext
2017-08-23 21:34:25 +00:00
2017-01-04 22:48:23 +00:00
return filename
def GetExportPath():
2017-12-06 22:06:56 +00:00
portable_path = HG.client_controller.options[ 'export_path' ]
2017-01-04 22:48:23 +00:00
if portable_path is None:
path = os.path.join( os.path.expanduser( '~' ), 'hydrus_export' )
HydrusPaths.MakeSureDirectoryExists( path )
else:
path = HydrusPaths.ConvertPortablePathToAbsPath( portable_path )
return path
def ParseExportPhrase( phrase ):
try:
terms = [ ( 'string', phrase ) ]
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '[' in term:
( pre, term ) = term.split( '[', 1 )
( namespace, term ) = term.split( ']', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'namespace', namespace ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '{' in term:
( pre, term ) = term.split( '{', 1 )
( predicate, term ) = term.split( '}', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'predicate', predicate ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '(' in term:
( pre, term ) = term.split( '(', 1 )
( tag, term ) = term.split( ')', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'tag', tag ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
2018-11-14 23:10:55 +00:00
except Exception as e:
2019-01-09 22:59:03 +00:00
raise Exception( 'Could not parse that phrase: ' + str( e ) )
2018-11-14 23:10:55 +00:00
2017-01-04 22:48:23 +00:00
return terms
class ExportFolder( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER
2017-11-29 21:48:23 +00:00
SERIALISABLE_NAME = 'Export Folder'
2019-05-01 21:24:42 +00:00
SERIALISABLE_VERSION = 4
2017-01-04 22:48:23 +00:00
2019-05-01 21:24:42 +00:00
def __init__( self, name, path = '', export_type = HC.EXPORT_FOLDER_TYPE_REGULAR, delete_from_client_after_export = False, file_search_context = None, run_regularly = True, period = 3600, phrase = None, last_checked = 0, paused = False, run_now = False ):
2017-01-04 22:48:23 +00:00
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
2018-11-28 22:31:04 +00:00
if export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
delete_from_client_after_export = False
2017-01-04 22:48:23 +00:00
if file_search_context is None:
file_search_context = ClientSearch.FileSearchContext( file_service_key = CC.LOCAL_FILE_SERVICE_KEY )
2017-03-29 19:39:34 +00:00
if phrase is None:
2017-12-06 22:06:56 +00:00
phrase = HG.client_controller.new_options.GetString( 'export_phrase' )
2017-03-29 19:39:34 +00:00
2017-01-04 22:48:23 +00:00
self._path = path
self._export_type = export_type
2018-11-28 22:31:04 +00:00
self._delete_from_client_after_export = delete_from_client_after_export
2017-01-04 22:48:23 +00:00
self._file_search_context = file_search_context
2019-05-01 21:24:42 +00:00
self._run_regularly = run_regularly
2017-01-04 22:48:23 +00:00
self._period = period
self._phrase = phrase
2019-05-01 21:24:42 +00:00
self._last_checked = last_checked
self._paused = paused and not run_now
self._run_now = run_now
2017-01-04 22:48:23 +00:00
def _GetSerialisableInfo( self ):
serialisable_file_search_context = self._file_search_context.GetSerialisableTuple()
2019-05-01 21:24:42 +00:00
return ( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now )
2017-01-04 22:48:23 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2019-05-01 21:24:42 +00:00
( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now ) = serialisable_info
2018-11-28 22:31:04 +00:00
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
self._delete_from_client_after_export = False
2017-01-04 22:48:23 +00:00
self._file_search_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_search_context )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
path = self._name
new_serialisable_info = ( path, export_type, serialisable_file_search_context, period, phrase, last_checked )
return ( 2, new_serialisable_info )
2018-11-28 22:31:04 +00:00
if version == 2:
( path, export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
delete_from_client_after_export = False
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked )
return ( 3, new_serialisable_info )
2019-05-01 21:24:42 +00:00
if version == 3:
( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
run_regularly = True
paused = False
run_now = False
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, run_regularly, period, phrase, last_checked, paused, run_now )
return ( 4, new_serialisable_info )
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
def _DoExport( self ):
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
query_hash_ids = HG.client_controller.Read( 'file_query_ids', self._file_search_context )
media_results = []
i = 0
base = 256
while i < len( query_hash_ids ):
if HC.options[ 'pause_export_folders_sync' ] or HydrusThreading.IsThreadShuttingDown():
return
if i == 0: ( last_i, i ) = ( 0, base )
else: ( last_i, i ) = ( i, i + base )
sub_query_hash_ids = query_hash_ids[ last_i : i ]
more_media_results = HG.client_controller.Read( 'media_results_from_ids', sub_query_hash_ids )
media_results.extend( more_media_results )
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
media_results.sort( key = lambda mr: mr.GetHashId() )
#
terms = ParseExportPhrase( self._phrase )
previous_paths = set()
for ( root, dirnames, filenames ) in os.walk( self._path ):
previous_paths.update( ( os.path.join( root, filename ) for filename in filenames ) )
2019-05-01 21:24:42 +00:00
2019-07-17 22:10:19 +00:00
sync_paths = set()
client_files_manager = HG.client_controller.client_files_manager
num_copied = 0
for media_result in media_results:
2019-05-01 21:24:42 +00:00
2019-07-17 22:10:19 +00:00
if HC.options[ 'pause_export_folders_sync' ] or HydrusThreading.IsThreadShuttingDown():
2019-02-27 23:03:30 +00:00
2019-03-06 23:06:22 +00:00
return
2019-02-27 23:03:30 +00:00
2019-07-17 22:10:19 +00:00
hash = media_result.GetHash()
mime = media_result.GetMime()
size = media_result.GetSize()
source_path = client_files_manager.GetFilePath( hash, mime )
filename = GenerateExportFilename( self._path, media_result, terms )
dest_path = os.path.normpath( os.path.join( self._path, filename ) )
if not dest_path.startswith( self._path ):
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
raise Exception( 'It seems a destination path for export folder "{}" was above the main export directory! The file was "{}" and its destination path was "{}".'.format( self._path, hash.hex(), dest_path ) )
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
dest_path_dir = os.path.dirname( dest_path )
HydrusPaths.MakeSureDirectoryExists( dest_path_dir )
if dest_path not in sync_paths:
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
copied = HydrusPaths.MirrorFile( source_path, dest_path )
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
if copied:
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
num_copied += 1
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
HydrusPaths.MakeFileWritable( dest_path )
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
sync_paths.add( dest_path )
if num_copied > 0:
HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ToHumanInt( num_copied ) + ' files.' )
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
deletee_paths = previous_paths.difference( sync_paths )
for deletee_path in deletee_paths:
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
ClientPaths.DeletePath( deletee_path )
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
deletee_dirs = set()
for ( root, dirnames, filenames ) in os.walk( self._path, topdown = False ):
2019-04-10 22:50:53 +00:00
2019-07-17 22:10:19 +00:00
if root == self._path:
2019-04-10 22:50:53 +00:00
2019-07-17 22:10:19 +00:00
continue
2019-04-10 22:50:53 +00:00
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
no_files = len( filenames ) == 0
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
useful_dirnames = [ dirname for dirname in dirnames if os.path.join( root, dirname ) not in deletee_dirs ]
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
no_useful_dirs = len( useful_dirnames ) == 0
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
if no_useful_dirs and no_files:
2018-09-12 21:36:26 +00:00
2019-07-17 22:10:19 +00:00
deletee_dirs.add( root )
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
for deletee_dir in deletee_dirs:
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
if os.path.exists( deletee_dir ):
2019-04-10 22:50:53 +00:00
2019-07-17 22:10:19 +00:00
HydrusPaths.DeletePath( deletee_dir )
2019-03-06 23:06:22 +00:00
2019-07-17 22:10:19 +00:00
if len( deletee_paths ) > 0:
HydrusData.Print( 'Export folder {} deleted {} files and {} folders.'.format( self._name, HydrusData.ToHumanInt( len( deletee_paths ) ), HydrusData.ToHumanInt( len( deletee_dirs ) ) ) )
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
if self._delete_from_client_after_export:
deletee_hashes = { media_result.GetHash() for media_result in media_results }
chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64 )
reason = 'Deleted after export to Export Folder "{}".'.format( self._path )
content_updates = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes, reason = reason ) for chunk_of_hashes in chunks_of_hashes ]
for content_update in content_updates:
HG.client_controller.WriteSynchronous( 'content_updates', { CC.LOCAL_FILE_SERVICE_KEY : [ content_update ] } )
def DoWork( self ):
regular_run_due = self._run_regularly and HydrusData.TimeHasPassed( self._last_checked + self._period )
good_to_go = ( regular_run_due or self._run_now ) and not self._paused
if not good_to_go:
return
try:
if self._path == '':
raise Exception( 'No path set for the folder!' )
if not os.path.exists( self._path ):
raise Exception( 'The path, "{}", does not exist!'.format( self._path ) )
if not os.path.isdir( self._path ):
raise Exception( 'The path, "{}", is not a directory!'.format( self._path ) )
self._DoExport()
2019-03-06 23:06:22 +00:00
2018-11-14 23:10:55 +00:00
except Exception as e:
2019-05-01 21:24:42 +00:00
self._paused = True
2017-01-04 22:48:23 +00:00
2019-07-17 22:10:19 +00:00
HydrusData.ShowText( 'The export folder "' + self._name + '" encountered an error! The error will follow! It has now been paused. Please check the folder\'s settings and maybe report to hydrus dev if the error is complicated!' )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
HydrusData.ShowException( e )
2019-07-17 22:10:19 +00:00
finally:
self._last_checked = HydrusData.GetNow()
self._run_now = False
HG.client_controller.WriteSynchronous( 'serialisable', self )
2017-01-04 22:48:23 +00:00
2019-05-01 21:24:42 +00:00
def RunNow( self ):
self._paused = False
self._run_now = True
2017-01-04 22:48:23 +00:00
def ToTuple( self ):
2019-05-01 21:24:42 +00:00
return ( self._name, self._path, self._export_type, self._delete_from_client_after_export, self._file_search_context, self._run_regularly, self._period, self._phrase, self._last_checked, self._paused, self._run_now )
2017-01-04 22:48:23 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER ] = ExportFolder