hydrus/include/ClientFiles.py

570 lines
18 KiB
Python
Raw Normal View History

2015-06-24 22:10:14 +00:00
import ClientConstants as CC
import ClientData
2015-03-18 21:46:29 +00:00
import gc
import HydrusConstants as HC
2015-06-24 22:10:14 +00:00
import HydrusData
2015-03-18 21:46:29 +00:00
import HydrusExceptions
import HydrusFileHandling
2015-08-26 21:18:39 +00:00
import HydrusGlobals
2015-11-25 22:00:57 +00:00
import HydrusPaths
2015-06-24 22:10:14 +00:00
import HydrusSerialisable
2015-03-18 21:46:29 +00:00
import itertools
2015-06-24 22:10:14 +00:00
import os
import random
2015-09-02 23:16:09 +00:00
import re
2015-06-24 22:10:14 +00:00
import shutil
import stat
2015-05-13 20:22:39 +00:00
import wx
2015-03-18 21:46:29 +00:00
2015-06-24 22:10:14 +00:00
def GenerateExportFilename( media, terms ):
2015-11-04 22:30:28 +00:00
mime = media.GetMime()
2015-06-24 22:10:14 +00:00
filename = ''
for ( term_type, term ) in terms:
tags_manager = media.GetTagsManager()
2016-03-02 21:00:30 +00:00
if term_type == 'string':
filename += term
2015-06-24 22:10:14 +00:00
elif term_type == 'namespace':
tags = tags_manager.GetNamespaceSlice( ( term, ), collapse_siblings = True )
filename += ', '.join( [ tag.split( ':' )[1] for tag in tags ] )
elif term_type == 'predicate':
if term in ( 'tags', 'nn tags' ):
current = tags_manager.GetCurrent()
pending = tags_manager.GetPending()
tags = list( current.union( pending ) )
if term == 'nn tags': tags = [ tag for tag in tags if ':' not in tag ]
else: tags = [ tag if ':' not in tag else tag.split( ':' )[1] for tag in tags ]
tags.sort()
filename += ', '.join( tags )
elif term == 'hash':
hash = media.GetHash()
filename += hash.encode( 'hex' )
elif term_type == 'tag':
if ':' in term: term = term.split( ':' )[1]
if tags_manager.HasTag( term ): filename += term
2015-09-02 23:16:09 +00:00
if HC.PLATFORM_WINDOWS:
filename = re.sub( '\\\\|/|:|\\*|\\?|"|<|>|\\|', '_', filename, flags = re.UNICODE )
else:
filename = re.sub( '/', '_', filename, flags = re.UNICODE )
2015-11-04 22:30:28 +00:00
return filename + HC.mime_ext_lookup[ mime ]
2015-06-24 22:10:14 +00:00
2015-03-18 21:46:29 +00:00
def GetAllPaths( raw_paths ):
file_paths = []
paths_to_process = raw_paths
while len( paths_to_process ) > 0:
next_paths_to_process = []
for path in paths_to_process:
if os.path.isdir( path ):
2015-11-04 22:30:28 +00:00
subpaths = [ os.path.join( path, filename ) for filename in os.listdir( path ) ]
2015-03-18 21:46:29 +00:00
next_paths_to_process.extend( subpaths )
2015-11-04 22:30:28 +00:00
else:
file_paths.append( path )
2015-03-18 21:46:29 +00:00
paths_to_process = next_paths_to_process
gc.collect()
return file_paths
2015-12-02 22:32:18 +00:00
2015-03-18 21:46:29 +00:00
def GetAllThumbnailHashes():
thumbnail_hashes = set()
2015-11-25 22:00:57 +00:00
for hash in IterateAllThumbnailHashes():
2015-03-18 21:46:29 +00:00
2015-11-25 22:00:57 +00:00
thumbnail_hashes.add( hash )
2015-03-18 21:46:29 +00:00
return thumbnail_hashes
2015-06-24 22:10:14 +00:00
def GetExportPath():
2015-09-16 18:11:00 +00:00
options = HydrusGlobals.client_controller.GetOptions()
2015-06-24 22:10:14 +00:00
path = options[ 'export_path' ]
if path is None:
2015-11-04 22:30:28 +00:00
path = os.path.join( os.path.expanduser( '~' ), 'hydrus_export' )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
if not os.path.exists( path ):
2016-01-06 21:17:20 +00:00
os.makedirs( path )
2015-11-04 22:30:28 +00:00
2015-06-24 22:10:14 +00:00
path = os.path.normpath( path ) # converts slashes to backslashes for windows
2015-11-25 22:00:57 +00:00
path = HydrusPaths.ConvertPortablePathToAbsPath( path )
2015-06-24 22:10:14 +00:00
return path
2015-12-02 22:32:18 +00:00
def GetExpectedFilePath( location, hash, mime ):
hash_encoded = hash.encode( 'hex' )
prefix = hash_encoded[:2]
return os.path.join( location, prefix, hash_encoded + HC.mime_ext_lookup[ mime ] )
def GetExpectedThumbnailPath( hash, full_size = True ):
hash_encoded = hash.encode( 'hex' )
first_two_chars = hash_encoded[:2]
path = os.path.join( HC.CLIENT_THUMBNAILS_DIR, first_two_chars, hash_encoded )
if not full_size:
path += '_resized'
return path
def GetFilePath( location, hash, mime = None ):
2015-03-18 21:46:29 +00:00
if mime is None:
path = None
for potential_mime in HC.ALLOWED_MIMES:
2015-12-02 22:32:18 +00:00
potential_path = GetExpectedFilePath( location, hash, potential_mime )
2015-03-18 21:46:29 +00:00
if os.path.exists( potential_path ):
path = potential_path
break
2015-11-25 22:00:57 +00:00
else:
2015-12-02 22:32:18 +00:00
path = GetExpectedFilePath( location, hash, mime )
2015-11-25 22:00:57 +00:00
2015-03-18 21:46:29 +00:00
2016-01-20 23:57:33 +00:00
if path is None:
2016-02-17 22:06:47 +00:00
raise HydrusExceptions.FileMissingException( 'File not found in directory ' + location + '!' )
2016-01-20 23:57:33 +00:00
elif not os.path.exists( path ):
2015-11-25 22:00:57 +00:00
2016-02-17 22:06:47 +00:00
raise HydrusExceptions.FileMissingException( 'File not found in path + ' + path + '!' )
2015-11-25 22:00:57 +00:00
2015-03-18 21:46:29 +00:00
return path
2015-06-24 22:10:14 +00:00
def GetThumbnailPath( hash, full_size = True ):
2015-12-23 22:51:04 +00:00
if not full_size:
options = HydrusGlobals.client_controller.GetOptions()
thumbnail_dimensions = options[ 'thumbnail_dimensions' ]
if tuple( thumbnail_dimensions ) == HC.UNSCALED_THUMBNAIL_DIMENSIONS:
full_size = True
2015-06-24 22:10:14 +00:00
path = GetExpectedThumbnailPath( hash, full_size )
if not os.path.exists( path ):
2015-11-25 22:00:57 +00:00
if full_size:
2015-12-23 22:51:04 +00:00
client_files_manager = HydrusGlobals.client_controller.GetClientFilesManager()
2015-06-24 22:10:14 +00:00
2015-12-23 22:51:04 +00:00
try:
file_path = client_files_manager.GetFilePath( hash )
2016-02-17 22:06:47 +00:00
except HydrusExceptions.FileMissingException:
2015-12-23 22:51:04 +00:00
2016-02-17 22:06:47 +00:00
raise HydrusExceptions.FileMissingException( 'The thumbnail for file ' + hash.encode( 'hex' ) + ' was missing. It could not be regenerated because the original file was also missing. This event could indicate hard drive corruption or an unplugged external drive. Please check everything is ok.' )
2015-12-23 22:51:04 +00:00
2015-06-24 22:10:14 +00:00
2015-12-23 22:51:04 +00:00
try:
thumbnail = HydrusFileHandling.GenerateThumbnail( file_path )
except Exception as e:
HydrusData.ShowException( e )
2016-02-17 22:06:47 +00:00
raise HydrusExceptions.FileMissingException( 'The thumbnail for file ' + hash.encode( 'hex' ) + ' was missing. It could not be regenerated from the original file for the above reason. This event could indicate hard drive corruption. Please check everything is ok.' )
2015-12-23 22:51:04 +00:00
2015-06-24 22:10:14 +00:00
2015-12-23 22:51:04 +00:00
try:
2015-07-29 19:11:35 +00:00
2015-12-23 22:51:04 +00:00
with open( path, 'wb' ) as f:
f.write( thumbnail )
2015-07-29 19:11:35 +00:00
2015-12-23 22:51:04 +00:00
except Exception as e:
HydrusData.ShowException( e )
2016-02-17 22:06:47 +00:00
raise HydrusExceptions.FileMissingException( 'The thumbnail for file ' + hash.encode( 'hex' ) + ' was missing. It was regenerated from the original file, but hydrus could not write it to the location ' + path + ' for the above reason. This event could indicate hard drive corruption, and it also suggests that hydrus does not have permission to write to its thumbnail folder. Please check everything is ok.' )
2015-12-23 22:51:04 +00:00
HydrusData.ShowText( 'The thumbnail for file ' + hash.encode( 'hex' ) + ' was missing. It has been regenerated from the original file, but this event could indicate hard drive corruption. Please check everything is ok.' )
else:
full_size_path = GetThumbnailPath( hash, True )
try:
2015-07-29 19:11:35 +00:00
thumbnail_resized = HydrusFileHandling.GenerateThumbnail( full_size_path, thumbnail_dimensions )
2015-12-23 22:51:04 +00:00
except:
try:
os.remove( full_size_path )
2015-11-25 22:00:57 +00:00
2015-12-23 22:51:04 +00:00
except:
2015-11-25 22:00:57 +00:00
2016-02-17 22:06:47 +00:00
raise HydrusExceptions.FileMissingException( 'The thumbnail for file ' + hash.encode( 'hex' ) + ' was found, but it would not render. An attempt to delete it was made, but that failed as well. This event could indicate hard drive corruption, and it also suggests that hydrus does not have permission to write to its thumbnail folder. Please check everything is ok.' )
2015-12-23 22:51:04 +00:00
full_size_path = GetThumbnailPath( hash, True )
2015-07-29 19:11:35 +00:00
2015-12-23 22:51:04 +00:00
thumbnail_resized = HydrusFileHandling.GenerateThumbnail( full_size_path, thumbnail_dimensions )
with open( path, 'wb' ) as f:
f.write( thumbnail_resized )
2015-06-24 22:10:14 +00:00
return path
def GetExpectedContentUpdatePackagePath( service_key, begin, subindex ):
2015-11-04 22:30:28 +00:00
return os.path.join( GetExpectedUpdateDir( service_key ), str( begin ) + '_' + str( subindex ) + '.json' )
2015-06-24 22:10:14 +00:00
def GetExpectedServiceUpdatePackagePath( service_key, begin ):
2015-11-04 22:30:28 +00:00
return os.path.join( GetExpectedUpdateDir( service_key ), str( begin ) + '_metadata.json' )
2015-09-02 23:16:09 +00:00
def GetExpectedUpdateDir( service_key ):
2015-11-04 22:30:28 +00:00
return os.path.join( HC.CLIENT_UPDATES_DIR, service_key.encode( 'hex' ) )
2015-06-24 22:10:14 +00:00
2015-11-25 22:00:57 +00:00
def IterateAllThumbnailHashes():
for path in IterateAllThumbnailPaths():
( base, filename ) = os.path.split( path )
if not filename.endswith( '_resized' ):
try: hash = filename.decode( 'hex' )
except TypeError: continue
yield hash
2015-06-24 22:10:14 +00:00
def IterateAllThumbnailPaths():
2015-12-02 22:32:18 +00:00
for prefix in HydrusData.IterateHexPrefixes():
2015-06-24 22:10:14 +00:00
2015-12-02 22:32:18 +00:00
dir = os.path.join( HC.CLIENT_THUMBNAILS_DIR, prefix )
2015-06-24 22:10:14 +00:00
2015-08-19 21:48:21 +00:00
next_paths = os.listdir( dir )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
for path in next_paths:
yield os.path.join( dir, path )
2015-06-24 22:10:14 +00:00
def ParseExportPhrase( phrase ):
try:
terms = [ ( 'string', phrase ) ]
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '[' in term:
( pre, term ) = term.split( '[', 1 )
( namespace, term ) = term.split( ']', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'namespace', namespace ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '{' in term:
( pre, term ) = term.split( '{', 1 )
( predicate, term ) = term.split( '}', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'predicate', predicate ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
new_terms = []
for ( term_type, term ) in terms:
if term_type == 'string':
while '(' in term:
( pre, term ) = term.split( '(', 1 )
( tag, term ) = term.split( ')', 1 )
new_terms.append( ( 'string', pre ) )
new_terms.append( ( 'tag', tag ) )
new_terms.append( ( term_type, term ) )
terms = new_terms
except: raise Exception( 'Could not parse that phrase!' )
return terms
class ExportFolder( HydrusSerialisable.SerialisableBaseNamed ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER
SERIALISABLE_VERSION = 1
def __init__( self, name, export_type = HC.EXPORT_FOLDER_TYPE_REGULAR, file_search_context = None, period = 3600, phrase = '{hash}' ):
HydrusSerialisable.SerialisableBaseNamed.__init__( self, name )
self._export_type = export_type
self._file_search_context = file_search_context
self._period = period
self._phrase = phrase
self._last_checked = 0
def _GetSerialisableInfo( self ):
2015-10-14 21:02:25 +00:00
serialisable_file_search_context = self._file_search_context.GetSerialisableTuple()
2015-06-24 22:10:14 +00:00
return ( self._export_type, serialisable_file_search_context, self._period, self._phrase, self._last_checked )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( self._export_type, serialisable_file_search_context, self._period, self._phrase, self._last_checked ) = serialisable_info
self._file_search_context = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_file_search_context )
def DoWork( self ):
if HydrusData.TimeHasPassed( self._last_checked + self._period ):
2016-02-24 21:42:54 +00:00
folder_path = HydrusData.ToUnicode( self._name )
2015-06-24 22:10:14 +00:00
if os.path.exists( folder_path ) and os.path.isdir( folder_path ):
2015-09-16 18:11:00 +00:00
query_hash_ids = HydrusGlobals.client_controller.Read( 'file_query_ids', self._file_search_context )
2015-06-24 22:10:14 +00:00
media_results = []
i = 0
base = 256
while i < len( query_hash_ids ):
2016-03-02 21:00:30 +00:00
if HC.options[ 'pause_export_folders_sync' ]:
return
2015-06-24 22:10:14 +00:00
if i == 0: ( last_i, i ) = ( 0, base )
else: ( last_i, i ) = ( i, i + base )
sub_query_hash_ids = query_hash_ids[ last_i : i ]
2015-09-16 18:11:00 +00:00
more_media_results = HydrusGlobals.client_controller.Read( 'media_results_from_ids', CC.LOCAL_FILE_SERVICE_KEY, sub_query_hash_ids )
2015-06-24 22:10:14 +00:00
media_results.extend( more_media_results )
#
terms = ParseExportPhrase( self._phrase )
2016-02-24 21:42:54 +00:00
previous_filenames = set( os.listdir( folder_path ) )
2015-10-21 21:53:10 +00:00
sync_filenames = set()
2015-06-24 22:10:14 +00:00
2015-12-02 22:32:18 +00:00
client_files_manager = HydrusGlobals.client_controller.GetClientFilesManager()
2016-03-02 21:00:30 +00:00
num_copied = 0
2015-06-24 22:10:14 +00:00
for media_result in media_results:
hash = media_result.GetHash()
mime = media_result.GetMime()
2015-10-21 21:53:10 +00:00
size = media_result.GetSize()
2015-06-24 22:10:14 +00:00
2015-12-02 22:32:18 +00:00
source_path = client_files_manager.GetFilePath( hash, mime )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
filename = GenerateExportFilename( media_result, terms )
2015-06-24 22:10:14 +00:00
2015-11-04 22:30:28 +00:00
dest_path = os.path.join( folder_path, filename )
2015-12-02 22:32:18 +00:00
2015-06-24 22:10:14 +00:00
do_copy = True
2015-10-21 21:53:10 +00:00
if filename in sync_filenames:
2015-12-02 22:32:18 +00:00
2015-06-24 22:10:14 +00:00
do_copy = False
elif os.path.exists( dest_path ):
2015-12-02 22:32:18 +00:00
2016-04-14 01:54:29 +00:00
dest_size = os.path.getsize( dest_path )
2015-06-24 22:10:14 +00:00
2015-10-21 21:53:10 +00:00
if dest_size == size:
2015-12-02 22:32:18 +00:00
2015-06-24 22:10:14 +00:00
do_copy = False
2015-12-02 22:32:18 +00:00
2015-06-24 22:10:14 +00:00
if do_copy:
2015-12-02 22:32:18 +00:00
shutil.copy2( source_path, dest_path )
2015-08-19 21:48:21 +00:00
2016-03-02 21:00:30 +00:00
num_copied += 1
2015-06-24 22:10:14 +00:00
try: os.chmod( dest_path, stat.S_IWRITE | stat.S_IREAD )
except: pass
2015-12-02 22:32:18 +00:00
2015-06-24 22:10:14 +00:00
2015-10-21 21:53:10 +00:00
sync_filenames.add( filename )
2015-06-24 22:10:14 +00:00
2015-12-02 22:32:18 +00:00
2016-03-02 21:00:30 +00:00
if num_copied > 0:
HydrusData.Print( 'Export folder ' + self._name + ' exported ' + HydrusData.ConvertIntToPrettyString( num_copied ) + ' files.' )
2015-06-24 22:10:14 +00:00
if self._export_type == HC.EXPORT_FOLDER_TYPE_SYNCHRONISE:
2015-12-02 22:32:18 +00:00
2015-10-21 21:53:10 +00:00
deletee_filenames = previous_filenames.difference( sync_filenames )
2015-12-02 22:32:18 +00:00
2015-10-21 21:53:10 +00:00
for deletee_filename in deletee_filenames:
2015-11-04 22:30:28 +00:00
deletee_path = os.path.join( folder_path, deletee_filename )
2015-12-02 22:32:18 +00:00
2015-09-16 18:11:00 +00:00
ClientData.DeletePath( deletee_path )
2015-06-24 22:10:14 +00:00
2016-03-02 21:00:30 +00:00
if len( deletee_filenames ) > 0:
HydrusData.Print( 'Export folder ' + self._name + ' deleted ' + HydrusData.ConvertIntToPrettyString( len( deletee_filenames ) ) + ' files.' )
2015-06-24 22:10:14 +00:00
self._last_checked = HydrusData.GetNow()
2015-12-02 22:32:18 +00:00
2015-10-21 21:53:10 +00:00
HydrusGlobals.client_controller.WriteSynchronous( 'serialisable', self )
2015-12-02 22:32:18 +00:00
2015-06-24 22:10:14 +00:00
def ToTuple( self ):
return ( self._name, self._export_type, self._file_search_context, self._period, self._phrase )
def SetTuple( self, folder_path, export_type, file_search_context, period, phrase ):
self._name = folder_path
self._export_type = export_type
self._file_search_context = file_search_context
self._period = period
self._phrase = phrase
2015-08-05 18:42:35 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_EXPORT_FOLDER ] = ExportFolder