hydrus/include/ClientExporting.py

457 lines
16 KiB
Python
Raw Normal View History

2019-01-09 22:59:03 +00:00
from . import ClientConstants as CC
from . import ClientPaths
from . import ClientSearch
from . import HydrusConstants as HC
from . import HydrusData
from . import HydrusGlobals as HG
from . import HydrusPaths
from . import HydrusSerialisable
from . import HydrusTags
from . import HydrusThreading
2017-01-04 22:48:23 +00:00
import os
import re
import stat
2017-08-23 21:34:25 +00:00
MAX_PATH_LENGTH = 245 # bit of padding from 255 for .txt neigbouring and other surprises
def GenerateExportFilename( destination_directory, media, terms ):
2017-01-04 22:48:23 +00:00
2018-09-12 21:36:26 +00:00
def clean_tag_text( t ):
if HC.PLATFORM_WINDOWS:
2019-01-09 22:59:03 +00:00
t = re.sub( r'\\', '_', t )
2018-09-12 21:36:26 +00:00
else:
2019-01-09 22:59:03 +00:00
t = re.sub( '/', '_', t )
2018-09-12 21:36:26 +00:00
return t
2017-08-23 21:34:25 +00:00
if len( destination_directory ) > ( MAX_PATH_LENGTH - 10 ):
raise Exception( 'The destination directory is too long!' )
2017-01-04 22:48:23 +00:00
filename = ''
for ( term_type, term ) in terms:
tags_manager = media.GetTagsManager()
if term_type == 'string':
filename += term
elif term_type == 'namespace':
tags = tags_manager.GetNamespaceSlice( ( term, ) )
2017-02-08 22:27:00 +00:00
subtags = [ HydrusTags.SplitTag( tag )[1] for tag in tags ]
subtags.sort()
2018-09-12 21:36:26 +00:00
filename += clean_tag_text( ', '.join( subtags ) )
2017-01-04 22:48:23 +00:00
elif term_type == 'predicate':
if term in ( 'tags', 'nn tags' ):
current = tags_manager.GetCurrent()
pending = tags_manager.GetPending()
tags = list( current.union( pending ) )
2017-02-08 22:27:00 +00:00
if term == 'nn tags':
tags = [ tag for tag in tags if ':' not in tag ]
else:
tags = [ HydrusTags.SplitTag( tag )[1] for tag in tags ]
2017-01-04 22:48:23 +00:00
tags.sort()
2018-09-12 21:36:26 +00:00
filename += clean_tag_text( ', '.join( tags ) )
2017-01-04 22:48:23 +00:00
elif term == 'hash':
hash = media.GetHash()
2019-01-09 22:59:03 +00:00
filename += hash.hex()
2017-01-04 22:48:23 +00:00
elif term_type == 'tag':
2017-09-20 19:47:31 +00:00
tag = term
2017-02-08 22:27:00 +00:00
( namespace, subtag ) = HydrusTags.SplitTag( tag )
2017-01-04 22:48:23 +00:00
2017-02-08 22:27:00 +00:00
if tags_manager.HasTag( subtag ):
2018-09-12 21:36:26 +00:00
filename += clean_tag_text( subtag )
2017-02-08 22:27:00 +00:00
2017-01-04 22:48:23 +00:00
if HC.PLATFORM_WINDOWS:
2018-09-12 21:36:26 +00:00
# replace many consecutive backspace with single backspace
2019-01-09 22:59:03 +00:00
filename = re.sub( r'\\+', r'\\', filename )
2018-09-12 21:36:26 +00:00
# /, :, *, ?, ", <, >, |
2019-01-09 22:59:03 +00:00
filename = re.sub( r'/|:|\*|\?|"|<|>|\|', '_', filename )
2017-01-04 22:48:23 +00:00
else:
2019-01-09 22:59:03 +00:00
filename = re.sub( '/', '_', filename )
2017-01-04 22:48:23 +00:00
2017-08-23 21:34:25 +00:00
#
mime = media.GetMime()
2017-01-04 22:48:23 +00:00
ext = HC.mime_ext_lookup[ mime ]
2017-08-23 21:34:25 +00:00
if filename.endswith( ext ):
2017-01-04 22:48:23 +00:00
2017-08-23 21:34:25 +00:00
filename = filename[ : - len( ext ) ]
2017-01-04 22:48:23 +00:00
2017-08-23 21:34:25 +00:00
example_dest_path = os.path.join( destination_directory, filename + ext )
excess_chars = len( example_dest_path ) - MAX_PATH_LENGTH
if excess_chars > 0:
filename = filename[ : - excess_chars ]
filename = filename + ext
2017-01-04 22:48:23 +00:00
return filename
def GetExportPath():
2017-12-06 22:06:56 +00:00
portable_path = HG.client_controller.options[ 'export_path' ]
2017-01-04 22:48:23 +00:00
if portable_path is None:
path = os.path.join( os.path.expanduser( '~' ), 'hydrus_export' )
HydrusPaths.MakeSureDirectoryExists( path )
else:
path = HydrusPaths.ConvertPortablePathToAbsPath( portable_path )
return path
def ParseExportPhrase( phrase ):
try:
terms = [ ( 'string', phrase ) ]
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '[' in term:
( pre, term ) = term.split( '[', 1 )
( namespace, term ) = term.split( ']', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'namespace', namespace ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '{' in term:
( pre, term ) = term.split( '{', 1 )
( predicate, term ) = term.split( '}', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'predicate', predicate ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '(' in term:
( pre, term ) = term.split( '(', 1 )
( tag, term ) = term.split( ')', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'tag', tag ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
2018-11-14 23:10:55 +00:00
except Exception as e:
2019-01-09 22:59:03 +00:00
raise Exception( 'Could not parse that phrase: ' + str( e ) )
2018-11-14 23:10:55 +00:00
2017-01-04 22:48:23 +00:00
return terms
class ExportFolder( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER
2017-11-29 21:48:23 +00:00
SERIALISABLE_NAME = 'Export Folder'
2018-11-28 22:31:04 +00:00
SERIALISABLE_VERSION = 3
2017-01-04 22:48:23 +00:00
2018-11-28 22:31:04 +00:00
def __init__( self, name, path = '', export_type = HC.EXPORT_FOLDER_TYPE_REGULAR, delete_from_client_after_export = False, file_search_context = None, period = 3600, phrase = None ):
2017-01-04 22:48:23 +00:00
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
2018-11-28 22:31:04 +00:00
if export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
delete_from_client_after_export = False
2017-01-04 22:48:23 +00:00
if file_search_context is None:
file_search_context = ClientSearch.FileSearchContext( file_service_key = CC.LOCAL_FILE_SERVICE_KEY )
2017-03-29 19:39:34 +00:00
if phrase is None:
2017-12-06 22:06:56 +00:00
phrase = HG.client_controller.new_options.GetString( 'export_phrase' )
2017-03-29 19:39:34 +00:00
2017-01-04 22:48:23 +00:00
self._path = path
self._export_type = export_type
2018-11-28 22:31:04 +00:00
self._delete_from_client_after_export = delete_from_client_after_export
2017-01-04 22:48:23 +00:00
self._file_search_context = file_search_context
self._period = period
self._phrase = phrase
self._last_checked = 0
def _GetSerialisableInfo( self ):
serialisable_file_search_context = self._file_search_context.GetSerialisableTuple()
2018-11-28 22:31:04 +00:00
return ( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, self._period, self._phrase, self._last_checked )
2017-01-04 22:48:23 +00:00
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
2018-11-28 22:31:04 +00:00
( self._path, self._export_type, self._delete_from_client_after_export, serialisable_file_search_context, self._period, self._phrase, self._last_checked ) = serialisable_info
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
self._delete_from_client_after_export = False
2017-01-04 22:48:23 +00:00
self._file_search_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_search_context )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
path = self._name
new_serialisable_info = ( path, export_type, serialisable_file_search_context, period, phrase, last_checked )
return ( 2, new_serialisable_info )
2018-11-28 22:31:04 +00:00
if version == 2:
( path, export_type, serialisable_file_search_context, period, phrase, last_checked ) = old_serialisable_info
delete_from_client_after_export = False
new_serialisable_info = ( path, export_type, delete_from_client_after_export, serialisable_file_search_context, period, phrase, last_checked )
return ( 3, new_serialisable_info )
2017-01-04 22:48:23 +00:00
def DoWork( self ):
2018-11-14 23:10:55 +00:00
try:
2017-01-04 22:48:23 +00:00
2019-02-27 23:03:30 +00:00
if HG.daemon_report_mode:
HydrusData.ShowText( 'Export folder start check: {} {} {} {}'.format( HydrusData.GetNow(), self._last_checked, self._period, HydrusData.TimeHasPassed( self._last_checked + self._period ) ) )
2018-11-14 23:10:55 +00:00
if HydrusData.TimeHasPassed( self._last_checked + self._period ):
2017-01-04 22:48:23 +00:00
2019-01-09 22:59:03 +00:00
if self._path != '' and os.path.exists( self._path ) and os.path.isdir( self._path ):
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
query_hash_ids = HG.client_controller.Read( 'file_query_ids', self._file_search_context )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
media_results = []
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
i = 0
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
base = 256
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
while i < len( query_hash_ids ):
if HC.options[ 'pause_export_folders_sync' ] or HydrusThreading.IsThreadShuttingDown():
return
2017-11-08 22:07:12 +00:00
2018-11-14 23:10:55 +00:00
if i == 0: ( last_i, i ) = ( 0, base )
else: ( last_i, i ) = ( i, i + base )
sub_query_hash_ids = query_hash_ids[ last_i : i ]
more_media_results = HG.client_controller.Read( 'media_results_from_ids', sub_query_hash_ids )
media_results.extend( more_media_results )
2017-11-08 22:07:12 +00:00
2018-11-14 23:10:55 +00:00
#
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
terms = ParseExportPhrase( self._phrase )
2017-01-04 22:48:23 +00:00
2019-01-09 22:59:03 +00:00
previous_filenames = set( os.listdir( self._path ) )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
sync_filenames = set()
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
client_files_manager = HG.client_controller.client_files_manager
2018-09-12 21:36:26 +00:00
2018-11-14 23:10:55 +00:00
num_copied = 0
2018-09-12 21:36:26 +00:00
2018-11-14 23:10:55 +00:00
for media_result in media_results:
if HC.options[ 'pause_export_folders_sync' ] or HydrusThreading.IsThreadShuttingDown():
return
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
hash = media_result.GetHash()
mime = media_result.GetMime()
size = media_result.GetSize()
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
source_path = client_files_manager.GetFilePath( hash, mime )
2019-01-09 22:59:03 +00:00
filename = GenerateExportFilename( self._path, media_result, terms )
2018-11-14 23:10:55 +00:00
2019-01-09 22:59:03 +00:00
dest_path = os.path.join( self._path, filename )
2018-11-14 23:10:55 +00:00
dest_path_dir = os.path.dirname( dest_path )
HydrusPaths.MakeSureDirectoryExists( dest_path_dir )
if filename not in sync_filenames:
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
copied = HydrusPaths.MirrorFile( source_path, dest_path )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
if copied:
num_copied += 1
try: os.chmod( dest_path, stat.S_IWRITE | stat.S_IREAD )
except: pass
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
sync_filenames.add( filename )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
if num_copied > 0:
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ToHumanInt( num_copied ) + ' files.' )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
deletee_filenames = previous_filenames.difference( sync_filenames )
for deletee_filename in deletee_filenames:
2019-01-09 22:59:03 +00:00
deletee_path = os.path.join( self._path, deletee_filename )
2018-11-14 23:10:55 +00:00
ClientPaths.DeletePath( deletee_path )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
if len( deletee_filenames ) > 0:
HydrusData.Print( 'Export folder ' + self._name + ' deleted ' + HydrusData.ToHumanInt( len( deletee_filenames ) ) + ' files.' )
2017-01-04 22:48:23 +00:00
2018-11-28 22:31:04 +00:00
if self._delete_from_client_after_export:
deletee_hashes = { media_result.GetHash() for media_result in media_results }
chunks_of_hashes = HydrusData.SplitListIntoChunks( deletee_hashes, 64 )
content_updates = [ HydrusData.ContentUpdate( HC.CONTENT_TYPE_FILES, HC.CONTENT_UPDATE_DELETE, chunk_of_hashes ) for chunk_of_hashes in chunks_of_hashes ]
for content_update in content_updates:
HG.client_controller.WriteSynchronous( 'content_updates', { CC.LOCAL_FILE_SERVICE_KEY : [ content_update ] } )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
except Exception as e:
HG.client_controller.options[ 'pause_export_folders_sync' ] = True
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
HydrusData.ShowText( 'The export folder "' + self._name + '" encountered an error! The error will follow! All export folders have now been paused. Please check the folder\'s settings and maybe report to hydrus dev if the error is complicated!' )
2017-01-04 22:48:23 +00:00
2018-11-14 23:10:55 +00:00
HydrusData.ShowException( e )
self._last_checked = HydrusData.GetNow()
HG.client_controller.WriteSynchronous( 'serialisable', self )
2017-01-04 22:48:23 +00:00
def ToTuple( self ):
2018-11-28 22:31:04 +00:00
return ( self._name, self._path, self._export_type, self._delete_from_client_after_export, self._file_search_context, self._period, self._phrase )
2017-01-04 22:48:23 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER ] = ExportFolder