hydrus/include/ClientFiles.py

555 lines
18 KiB
Python
Raw Normal View History

2015-06-24 22:10:14 +00:00
import ClientConstants as CC
import ClientData
2015-03-18 21:46:29 +00:00
import gc
import HydrusConstants as HC
2015-06-24 22:10:14 +00:00
import HydrusData
2015-03-18 21:46:29 +00:00
import HydrusExceptions
import HydrusFileHandling
2015-08-26 21:18:39 +00:00
import HydrusGlobals
2015-11-25 22:00:57 +00:00
import HydrusPaths
2015-06-24 22:10:14 +00:00
import HydrusSerialisable
2015-03-18 21:46:29 +00:00
import itertools
2015-06-24 22:10:14 +00:00
import os
import random
2015-09-02 23:16:09 +00:00
import re
2015-06-24 22:10:14 +00:00
import shutil
import stat
2015-05-13 20:22:39 +00:00
import wx
2015-03-18 21:46:29 +00:00
2015-06-24 22:10:14 +00:00
def GenerateExportFilename( media, terms ):
2015-11-04 22:30:28 +00:00
mime = media.GetMime()
2015-06-24 22:10:14 +00:00
filename = ''
for ( term_type, term ) in terms:
tags_manager = media.GetTagsManager()
if term_type == 'string': filename += term
elif term_type == 'namespace':
tags = tags_manager.GetNamespaceSlice( ( term, ), collapse_siblings = True )
filename += ', '.join( [ tag.split( ':' )[1] for tag in tags ] )
elif term_type == 'predicate':
if term in ( 'tags', 'nn tags' ):
current = tags_manager.GetCurrent()
pending = tags_manager.GetPending()
tags = list( current.union( pending ) )
if term == 'nn tags': tags = [ tag for tag in tags if ':' not in tag ]
else: tags = [ tag if ':' not in tag else tag.split( ':' )[1] for tag in tags ]
tags.sort()
filename += ', '.join( tags )
elif term == 'hash':
hash = media.GetHash()
filename += hash.encode( 'hex' )
elif term_type == 'tag':
if ':' in term: term = term.split( ':' )[1]
if tags_manager.HasTag( term ): filename += term
2015-09-02 23:16:09 +00:00
if HC.PLATFORM_WINDOWS:
filename = re.sub( '\\\\|/|:|\\*|\\?|"|<|>|\\|', '_', filename, flags = re.UNICODE )
else:
filename = re.sub( '/', '_', filename, flags = re.UNICODE )
2015-11-04 22:30:28 +00:00
return filename + HC.mime_ext_lookup[ mime ]
2015-06-24 22:10:14 +00:00
2015-03-18 21:46:29 +00:00
def GetAllPaths( raw_paths ):
file_paths = []
paths_to_process = raw_paths
while len( paths_to_process ) > 0:
next_paths_to_process = []
for path in paths_to_process:
if os.path.isdir( path ):
2015-11-04 22:30:28 +00:00
subpaths = [ os.path.join( path, filename ) for filename in os.listdir( path ) ]
2015-03-18 21:46:29 +00:00
next_paths_to_process.extend( subpaths )
2015-11-04 22:30:28 +00:00
else:
file_paths.append( path )
2015-03-18 21:46:29 +00:00
paths_to_process = next_paths_to_process
gc.collect()
return file_paths
def GetAllThumbnailHashes():
thumbnail_hashes = set()
2015-11-25 22:00:57 +00:00
for hash in IterateAllThumbnailHashes():
2015-03-18 21:46:29 +00:00
2015-11-25 22:00:57 +00:00
thumbnail_hashes.add( hash )
2015-03-18 21:46:29 +00:00
return thumbnail_hashes
def GetExpectedFilePath( hash, mime ):
hash_encoded = hash.encode( 'hex' )
first_two_chars = hash_encoded[:2]
2015-11-04 22:30:28 +00:00
return os.path.join( HC.CLIENT_FILES_DIR, first_two_chars, hash_encoded + HC.mime_ext_lookup[ mime ] )
2015-06-24 22:10:14 +00:00
def GetExportPath():
2015-09-16 18:11:00 +00:00
options = HydrusGlobals.client_controller.GetOptions()
2015-06-24 22:10:14 +00:00
path = options[ 'export_path' ]
if path is None:
2015-11-04 22:30:28 +00:00
path = os.path.join( os.path.expanduser( '~' ), 'hydrus_export' )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
if not os.path.exists( path ):
os.mkdir( path )
2015-06-24 22:10:14 +00:00
path = os.path.normpath( path ) # converts slashes to backslashes for windows
2015-11-25 22:00:57 +00:00
path = HydrusPaths.ConvertPortablePathToAbsPath( path )
2015-06-24 22:10:14 +00:00
return path
2015-03-18 21:46:29 +00:00
def GetFilePath( hash, mime = None ):
if mime is None:
path = None
for potential_mime in HC.ALLOWED_MIMES:
potential_path = GetExpectedFilePath( hash, potential_mime )
if os.path.exists( potential_path ):
path = potential_path
break
2015-11-25 22:00:57 +00:00
else:
path = GetExpectedFilePath( hash, mime )
2015-03-18 21:46:29 +00:00
2015-11-25 22:00:57 +00:00
if path is None or not os.path.exists( path ):
raise HydrusExceptions.NotFoundException( 'File not found!' )
2015-03-18 21:46:29 +00:00
return path
2015-06-24 22:10:14 +00:00
def GetExpectedThumbnailPath( hash, full_size = True ):
hash_encoded = hash.encode( 'hex' )
first_two_chars = hash_encoded[:2]
2015-11-04 22:30:28 +00:00
path = os.path.join( HC.CLIENT_THUMBNAILS_DIR, first_two_chars, hash_encoded )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
if not full_size:
path += '_resized'
2015-06-24 22:10:14 +00:00
return path
def GetThumbnailPath( hash, full_size = True ):
path = GetExpectedThumbnailPath( hash, full_size )
if not os.path.exists( path ):
2015-11-25 22:00:57 +00:00
if full_size:
raise HydrusExceptions.NotFoundException( 'Thumbnail not found!' )
2015-06-24 22:10:14 +00:00
else:
full_size_path = GetThumbnailPath( hash, True )
2015-09-16 18:11:00 +00:00
options = HydrusGlobals.client_controller.GetOptions()
2015-06-24 22:10:14 +00:00
thumbnail_dimensions = options[ 'thumbnail_dimensions' ]
2015-07-29 19:11:35 +00:00
if tuple( thumbnail_dimensions ) == HC.UNSCALED_THUMBNAIL_DIMENSIONS:
path = full_size_path
else:
thumbnail_resized = HydrusFileHandling.GenerateThumbnail( full_size_path, thumbnail_dimensions )
2015-11-25 22:00:57 +00:00
with open( path, 'wb' ) as f:
f.write( thumbnail_resized )
2015-07-29 19:11:35 +00:00
2015-06-24 22:10:14 +00:00
return path
def GetExpectedContentUpdatePackagePath( service_key, begin, subindex ):
2015-11-04 22:30:28 +00:00
return os.path.join( GetExpectedUpdateDir( service_key ), str( begin ) + '_' + str( subindex ) + '.json' )
2015-06-24 22:10:14 +00:00
def GetExpectedServiceUpdatePackagePath( service_key, begin ):
2015-11-04 22:30:28 +00:00
return os.path.join( GetExpectedUpdateDir( service_key ), str( begin ) + '_metadata.json' )
2015-09-02 23:16:09 +00:00
def GetExpectedUpdateDir( service_key ):
2015-11-04 22:30:28 +00:00
return os.path.join( HC.CLIENT_UPDATES_DIR, service_key.encode( 'hex' ) )
2015-06-24 22:10:14 +00:00
2015-07-08 21:45:38 +00:00
def IterateAllFileHashes():
for path in IterateAllFilePaths():
( base, filename ) = os.path.split( path )
result = filename.split( '.', 1 )
if len( result ) != 2: continue
( hash_encoded, ext ) = result
try: hash = hash_encoded.decode( 'hex' )
except TypeError: continue
yield hash
2015-06-24 22:10:14 +00:00
def IterateAllFilePaths():
hex_chars = '0123456789abcdef'
for ( one, two ) in itertools.product( hex_chars, hex_chars ):
2015-11-04 22:30:28 +00:00
dir = os.path.join( HC.CLIENT_FILES_DIR, one + two )
2015-06-24 22:10:14 +00:00
2015-08-19 21:48:21 +00:00
next_paths = os.listdir( dir )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
for path in next_paths:
yield os.path.join( dir, path )
2015-07-08 21:45:38 +00:00
2015-06-24 22:10:14 +00:00
2015-11-25 22:00:57 +00:00
def IterateAllThumbnailHashes():
for path in IterateAllThumbnailPaths():
( base, filename ) = os.path.split( path )
if not filename.endswith( '_resized' ):
try: hash = filename.decode( 'hex' )
except TypeError: continue
yield hash
2015-06-24 22:10:14 +00:00
def IterateAllThumbnailPaths():
hex_chars = '0123456789abcdef'
for ( one, two ) in itertools.product( hex_chars, hex_chars ):
2015-11-04 22:30:28 +00:00
dir = os.path.join( HC.CLIENT_THUMBNAILS_DIR, one + two )
2015-06-24 22:10:14 +00:00
2015-08-19 21:48:21 +00:00
next_paths = os.listdir( dir )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
for path in next_paths:
yield os.path.join( dir, path )
2015-06-24 22:10:14 +00:00
def ParseExportPhrase( phrase ):
try:
terms = [ ( 'string', phrase ) ]
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '[' in term:
( pre, term ) = term.split( '[', 1 )
( namespace, term ) = term.split( ']', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'namespace', namespace ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '{' in term:
( pre, term ) = term.split( '{', 1 )
( predicate, term ) = term.split( '}', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'predicate', predicate ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '(' in term:
( pre, term ) = term.split( '(', 1 )
( tag, term ) = term.split( ')', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'tag', tag ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
except: raise Exception( 'Could not parse that phrase!' )
return terms
class ExportFolder( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER
SERIALISABLE_VERSION = 1
def __init__( self, name, export_type = HC.EXPORT_FOLDER_TYPE_REGULAR, file_search_context = None, period = 3600, phrase = '{hash}' ):
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
self._export_type = export_type
self._file_search_context = file_search_context
self._period = period
self._phrase = phrase
self._last_checked = 0
def _GetSerialisableInfo( self ):
2015-10-14 21:02:25 +00:00
serialisable_file_search_context = self._file_search_context.GetSerialisableTuple()
2015-06-24 22:10:14 +00:00
return ( self._export_type, serialisable_file_search_context, self._period, self._phrase, self._last_checked )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( self._export_type, serialisable_file_search_context, self._period, self._phrase, self._last_checked ) = serialisable_info
self._file_search_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_search_context )
def DoWork( self ):
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' checking' )
2015-06-24 22:10:14 +00:00
if HydrusData.TimeHasPassed( self._last_checked + self._period ):
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' time to begin' )
2015-06-24 22:10:14 +00:00
folder_path = self._name
if os.path.exists( folder_path ) and os.path.isdir( folder_path ):
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' folder checks out ok' )
2015-09-16 18:11:00 +00:00
query_hash_ids = HydrusGlobals.client_controller.Read( 'file_query_ids', self._file_search_context )
2015-06-24 22:10:14 +00:00
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' results found: ' + str( len( query_hash_ids ) ) )
2015-06-24 22:10:14 +00:00
query_hash_ids = list( query_hash_ids )
random.shuffle( query_hash_ids )
limit = self._file_search_context.GetSystemPredicates().GetLimit()
if limit is not None: query_hash_ids = query_hash_ids[ : limit ]
media_results = []
i = 0
base = 256
while i < len( query_hash_ids ):
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' building results: ' + str( i ) + '/' + str( len( query_hash_ids ) ) )
2015-06-24 22:10:14 +00:00
if HC.options[ 'pause_export_folders_sync' ]: return
if i == 0: ( last_i, i ) = ( 0, base )
else: ( last_i, i ) = ( i, i + base )
sub_query_hash_ids = query_hash_ids[ last_i : i ]
2015-09-16 18:11:00 +00:00
more_media_results = HydrusGlobals.client_controller.Read( 'media_results_from_ids', CC.LOCAL_FILE_SERVICE_KEY, sub_query_hash_ids )
2015-06-24 22:10:14 +00:00
media_results.extend( more_media_results )
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' media_results: ' + str( len( media_results ) ) )
2015-06-24 22:10:14 +00:00
#
terms = ParseExportPhrase( self._phrase )
2015-11-04 22:30:28 +00:00
previous_filenames = set( os.listdir( HydrusData.ToUnicode( folder_path ) ) )
2015-10-21 21:53:10 +00:00
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' existing filenames: ' + str( len( previous_filenames ) ) )
if HydrusGlobals.special_debug_mode:
for previous_filename in previous_filenames:
2015-11-18 22:44:07 +00:00
HydrusData.Print( previous_filename )
2015-10-28 21:29:05 +00:00
2015-11-04 22:30:28 +00:00
2015-10-28 21:29:05 +00:00
2015-10-21 21:53:10 +00:00
sync_filenames = set()
2015-06-24 22:10:14 +00:00
for media_result in media_results:
hash = media_result.GetHash()
mime = media_result.GetMime()
2015-10-21 21:53:10 +00:00
size = media_result.GetSize()
2015-06-24 22:10:14 +00:00
source_path = GetFilePath( hash, mime )
2015-11-04 22:30:28 +00:00
filename = GenerateExportFilename( media_result, terms )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
dest_path = os.path.join( folder_path, filename )
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' dest path: ' + dest_path )
2015-06-24 22:10:14 +00:00
do_copy = True
2015-10-21 21:53:10 +00:00
if filename in sync_filenames:
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' it was already attempted this run' )
2015-06-24 22:10:14 +00:00
do_copy = False
elif os.path.exists( dest_path ):
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' it exists' )
2015-06-24 22:10:14 +00:00
dest_info = os.lstat( dest_path )
dest_size = dest_info[6]
2015-10-21 21:53:10 +00:00
if dest_size == size:
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' and the file size is the same' )
2015-06-24 22:10:14 +00:00
do_copy = False
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' copy decision: ' + str( do_copy ) )
2015-06-24 22:10:14 +00:00
if do_copy:
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' copy started' )
2015-06-24 22:10:14 +00:00
shutil.copy( source_path, dest_path )
shutil.copystat( source_path, dest_path )
2015-08-19 21:48:21 +00:00
2015-06-24 22:10:14 +00:00
try: os.chmod( dest_path, stat.S_IWRITE | stat.S_IREAD )
except: pass
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' copy ok' )
2015-06-24 22:10:14 +00:00
2015-10-21 21:53:10 +00:00
sync_filenames.add( filename )
2015-06-24 22:10:14 +00:00
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' media results done' )
2015-06-24 22:10:14 +00:00
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' inside sync delete code' )
2015-10-21 21:53:10 +00:00
deletee_filenames = previous_filenames.difference( sync_filenames )
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' delete filenames: ' + str( len( deletee_filenames ) ) )
2015-10-21 21:53:10 +00:00
for deletee_filename in deletee_filenames:
2015-11-04 22:30:28 +00:00
deletee_path = os.path.join( folder_path, deletee_filename )
2015-11-18 22:44:07 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.Print( deletee_path )
2015-09-16 18:11:00 +00:00
ClientData.DeletePath( deletee_path )
2015-06-24 22:10:14 +00:00
self._last_checked = HydrusData.GetNow()
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' writing self back to db' )
2015-10-21 21:53:10 +00:00
HydrusGlobals.client_controller.WriteSynchronous( 'serialisable', self )
2015-10-28 21:29:05 +00:00
if HydrusGlobals.special_debug_mode: HydrusData.ShowText( self._name + ' saved ok' )
2015-06-24 22:10:14 +00:00
def ToTuple( self ):
return ( self._name, self._export_type, self._file_search_context, self._period, self._phrase )
def SetTuple( self, folder_path, export_type, file_search_context, period, phrase ):
self._name = folder_path
self._export_type = export_type
self._file_search_context = file_search_context
self._period = period
self._phrase = phrase
2015-08-05 18:42:35 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER ] = ExportFolder