602 lines
20 KiB
Python
602 lines
20 KiB
Python
import os
|
|
import typing
|
|
|
|
from hydrus.core import HydrusGlobals as HG
|
|
from hydrus.core import HydrusSerialisable
|
|
from hydrus.core import HydrusText
|
|
|
|
from hydrus.client import ClientConstants as CC
|
|
from hydrus.client import ClientParsing
|
|
from hydrus.client import ClientStrings
|
|
from hydrus.client.media import ClientMediaResult
|
|
from hydrus.client.metadata import ClientMetadataMigrationCore
|
|
from hydrus.client.metadata import ClientTags
|
|
|
|
# TODO: All importers should probably have a string processor
|
|
|
|
class SingleFileMetadataImporter( ClientMetadataMigrationCore.ImporterExporterNode ):
|
|
|
|
def __init__( self, string_processor: ClientStrings.StringProcessor ):
|
|
|
|
self._string_processor = string_processor
|
|
|
|
|
|
def GetStringProcessor( self ) -> ClientStrings.StringProcessor:
|
|
|
|
return self._string_processor
|
|
|
|
|
|
def Import( self, *args, **kwargs ):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
class SingleFileMetadataImporterMedia( SingleFileMetadataImporter ):
|
|
|
|
def Import( self, media_result: ClientMediaResult.MediaResult ):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
class SingleFileMetadataImporterSidecar( SingleFileMetadataImporter, ClientMetadataMigrationCore.SidecarNode ):
|
|
|
|
def __init__( self, string_processor: ClientStrings.StringProcessor, remove_actual_filename_ext: bool, suffix: str, filename_string_converter: ClientStrings.StringConverter ):
|
|
|
|
ClientMetadataMigrationCore.SidecarNode.__init__( self, remove_actual_filename_ext, suffix, filename_string_converter )
|
|
SingleFileMetadataImporter.__init__( self, string_processor )
|
|
|
|
|
|
def GetExpectedSidecarPath( self, path: str ):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
def Import( self, actual_file_path: str ):
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
|
class SingleFileMetadataImporterMediaTags( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterMedia ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_TAGS
|
|
SERIALISABLE_NAME = 'Metadata Single File Importer Media Tags'
|
|
SERIALISABLE_VERSION = 2
|
|
|
|
def __init__( self, string_processor = None, service_key = None ):
|
|
|
|
if string_processor is None:
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
SingleFileMetadataImporterMedia.__init__( self, string_processor )
|
|
|
|
if service_key is None:
|
|
|
|
service_key = CC.COMBINED_TAG_SERVICE_KEY
|
|
|
|
|
|
self._service_key = service_key
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
|
|
serialisable_service_key = self._service_key.hex()
|
|
|
|
return ( serialisable_string_processor, serialisable_service_key )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_string_processor, serialisable_service_key ) = serialisable_info
|
|
|
|
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
|
|
self._service_key = bytes.fromhex( serialisable_service_key )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
serialisable_service_key = old_serialisable_info
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
serialisable_string_processor = string_processor.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_string_processor, serialisable_service_key )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
|
|
def GetExampleStrings( self ):
|
|
|
|
examples = [
|
|
'blue eyes',
|
|
'blonde hair',
|
|
'skirt',
|
|
'character:jane smith',
|
|
'series:jane smith adventures',
|
|
'creator:some guy'
|
|
]
|
|
|
|
return examples
|
|
|
|
|
|
def GetServiceKey( self ) -> bytes:
|
|
|
|
return self._service_key
|
|
|
|
|
|
def Import( self, media_result: ClientMediaResult.MediaResult ):
|
|
|
|
tags = media_result.GetTagsManager().GetCurrent( self._service_key, ClientTags.TAG_DISPLAY_STORAGE )
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
tags = self._string_processor.ProcessStrings( tags )
|
|
|
|
|
|
return tags
|
|
|
|
|
|
def SetServiceKey( self, service_key: bytes ):
|
|
|
|
self._service_key = service_key
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
try:
|
|
|
|
name = HG.client_controller.services_manager.GetName( self._service_key )
|
|
|
|
except:
|
|
|
|
name = 'unknown service'
|
|
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
|
|
|
|
else:
|
|
|
|
full_munge_text = ''
|
|
|
|
|
|
return '"{}" tags from media{}'.format( name, full_munge_text )
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_TAGS ] = SingleFileMetadataImporterMediaTags
|
|
|
|
class SingleFileMetadataImporterMediaURLs( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterMedia ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_URLS
|
|
SERIALISABLE_NAME = 'Metadata Single File Importer Media URLs'
|
|
SERIALISABLE_VERSION = 2
|
|
|
|
def __init__( self, string_processor = None ):
|
|
|
|
if string_processor is None:
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
SingleFileMetadataImporterMedia.__init__( self, string_processor )
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
|
|
|
|
return serialisable_string_processor
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
serialisable_string_processor = serialisable_info
|
|
|
|
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
gumpf = old_serialisable_info
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
serialisable_string_processor = string_processor.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = serialisable_string_processor
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
|
|
def GetExampleStrings( self ):
|
|
|
|
examples = [
|
|
'https://example.com/gallery/index.php?post=123456&page=show',
|
|
'https://cdn3.expl.com/files/file_id?id=123456&token=0123456789abcdef'
|
|
]
|
|
|
|
return examples
|
|
|
|
|
|
def Import( self, media_result: ClientMediaResult.MediaResult ):
|
|
|
|
urls = media_result.GetLocationsManager().GetURLs()
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
urls = self._string_processor.ProcessStrings( urls )
|
|
|
|
|
|
return urls
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
|
|
|
|
else:
|
|
|
|
full_munge_text = ''
|
|
|
|
|
|
return 'urls from media{}'.format( full_munge_text )
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_URLS ] = SingleFileMetadataImporterMediaURLs
|
|
|
|
class SingleFileMetadataImporterJSON( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterSidecar ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_JSON
|
|
SERIALISABLE_NAME = 'Metadata Single File Importer JSON'
|
|
SERIALISABLE_VERSION = 3
|
|
|
|
def __init__( self, string_processor = None, remove_actual_filename_ext = None, suffix = None, filename_string_converter = None, json_parsing_formula = None ):
|
|
|
|
if remove_actual_filename_ext is None:
|
|
|
|
remove_actual_filename_ext = False
|
|
|
|
|
|
if suffix is None:
|
|
|
|
suffix = ''
|
|
|
|
|
|
if filename_string_converter is None:
|
|
|
|
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.json' )
|
|
|
|
|
|
if string_processor is None:
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
SingleFileMetadataImporterSidecar.__init__( self, string_processor, remove_actual_filename_ext, suffix, filename_string_converter )
|
|
|
|
if json_parsing_formula is None:
|
|
|
|
parse_rules = [ ( ClientParsing.JSON_PARSE_RULE_TYPE_ALL_ITEMS, None ) ]
|
|
|
|
json_parsing_formula = ClientParsing.ParseFormulaJSON( parse_rules = parse_rules, content_to_fetch = ClientParsing.JSON_CONTENT_STRING )
|
|
|
|
|
|
self._json_parsing_formula = json_parsing_formula
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
|
|
serialisable_filename_string_converter = self._filename_string_converter.GetSerialisableTuple()
|
|
serialisable_json_parsing_formula = self._json_parsing_formula.GetSerialisableTuple()
|
|
|
|
return ( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, serialisable_json_parsing_formula )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, serialisable_json_parsing_formula ) = serialisable_info
|
|
|
|
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
|
|
self._filename_string_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_filename_string_converter )
|
|
self._json_parsing_formula = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_json_parsing_formula )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
( suffix, serialisable_json_parsing_formula ) = old_serialisable_info
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
serialisable_string_processor = string_processor.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_string_processor, suffix, serialisable_json_parsing_formula )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
if version == 2:
|
|
|
|
( serialisable_string_processor, suffix, serialisable_json_parsing_formula ) = old_serialisable_info
|
|
|
|
remove_actual_filename_ext = False
|
|
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.json' )
|
|
|
|
serialisable_filename_string_converter = filename_string_converter.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter, serialisable_json_parsing_formula )
|
|
|
|
return ( 3, new_serialisable_info )
|
|
|
|
|
|
|
|
def GetExpectedSidecarPath( self, actual_file_path: str ):
|
|
|
|
return ClientMetadataMigrationCore.GetSidecarPath( actual_file_path, self._remove_actual_filename_ext, self._suffix, self._filename_string_converter, 'json' )
|
|
|
|
|
|
def GetJSONParsingFormula( self ) -> ClientParsing.ParseFormulaJSON:
|
|
|
|
return self._json_parsing_formula
|
|
|
|
|
|
def Import( self, actual_file_path: str ) -> typing.Collection[ str ]:
|
|
|
|
path = self.GetExpectedSidecarPath( actual_file_path )
|
|
|
|
if not os.path.exists( path ):
|
|
|
|
return []
|
|
|
|
|
|
try:
|
|
|
|
with open( path, 'r', encoding = 'utf-8' ) as f:
|
|
|
|
read_raw_json = f.read()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
raise Exception( 'Could not import from {}: {}'.format( path, str( e ) ) )
|
|
|
|
|
|
parsing_context = {}
|
|
collapse_newlines = False
|
|
|
|
rows = self._json_parsing_formula.Parse( parsing_context, read_raw_json, collapse_newlines )
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
rows = self._string_processor.ProcessStrings( rows )
|
|
|
|
|
|
return rows
|
|
|
|
|
|
def SetJSONParsingFormula( self, json_parsing_formula: ClientParsing.ParseFormulaJSON ):
|
|
|
|
self._json_parsing_formula = json_parsing_formula
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
|
|
|
|
else:
|
|
|
|
full_munge_text = ''
|
|
|
|
|
|
return 'from JSON sidecar{}'.format( full_munge_text )
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_JSON ] = SingleFileMetadataImporterJSON
|
|
|
|
class SingleFileMetadataImporterTXT( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterSidecar ):
|
|
|
|
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_TXT
|
|
SERIALISABLE_NAME = 'Metadata Single File Importer TXT'
|
|
SERIALISABLE_VERSION = 4
|
|
|
|
def __init__( self, string_processor = None, remove_actual_filename_ext = None, suffix = None, filename_string_converter = None, separator = None ):
|
|
|
|
if remove_actual_filename_ext is None:
|
|
|
|
remove_actual_filename_ext = False
|
|
|
|
|
|
if suffix is None:
|
|
|
|
suffix = ''
|
|
|
|
|
|
if filename_string_converter is None:
|
|
|
|
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.txt' )
|
|
|
|
|
|
if string_processor is None:
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
|
|
if separator is None:
|
|
|
|
separator = '\n'
|
|
|
|
|
|
self._separator = separator
|
|
|
|
HydrusSerialisable.SerialisableBase.__init__( self )
|
|
SingleFileMetadataImporterSidecar.__init__( self, string_processor, remove_actual_filename_ext, suffix, filename_string_converter )
|
|
|
|
|
|
def _GetSerialisableInfo( self ):
|
|
|
|
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
|
|
serialisable_filename_string_converter = self._filename_string_converter.GetSerialisableTuple()
|
|
|
|
return ( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, self._separator )
|
|
|
|
|
|
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
|
|
|
|
( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, self._separator ) = serialisable_info
|
|
|
|
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
|
|
self._filename_string_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_filename_string_converter )
|
|
|
|
|
|
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
|
|
|
|
if version == 1:
|
|
|
|
suffix = old_serialisable_info
|
|
|
|
string_processor = ClientStrings.StringProcessor()
|
|
|
|
serialisable_string_processor = string_processor.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_string_processor, suffix )
|
|
|
|
return ( 2, new_serialisable_info )
|
|
|
|
|
|
if version == 2:
|
|
|
|
( serialisable_string_processor, suffix ) = old_serialisable_info
|
|
|
|
remove_actual_filename_ext = False
|
|
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.txt' )
|
|
|
|
serialisable_filename_string_converter = filename_string_converter.GetSerialisableTuple()
|
|
|
|
new_serialisable_info = ( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter )
|
|
|
|
return ( 3, new_serialisable_info )
|
|
|
|
|
|
if version == 3:
|
|
|
|
( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter ) = old_serialisable_info
|
|
|
|
separator = '\n'
|
|
|
|
new_serialisable_info = ( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter, separator )
|
|
|
|
return ( 4, new_serialisable_info )
|
|
|
|
|
|
|
|
def GetExpectedSidecarPath( self, actual_file_path: str ):
|
|
|
|
return ClientMetadataMigrationCore.GetSidecarPath( actual_file_path, self._remove_actual_filename_ext, self._suffix, self._filename_string_converter, 'txt' )
|
|
|
|
|
|
def GetSeparator( self ) -> str:
|
|
|
|
return self._separator
|
|
|
|
|
|
def Import( self, actual_file_path: str ) -> typing.Collection[ str ]:
|
|
|
|
path = self.GetExpectedSidecarPath( actual_file_path )
|
|
|
|
if not os.path.exists( path ):
|
|
|
|
return []
|
|
|
|
|
|
try:
|
|
|
|
with open( path, 'r', encoding = 'utf-8' ) as f:
|
|
|
|
raw_text = f.read()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
raise Exception( 'Could not import from {}: {}'.format( path, str( e ) ) )
|
|
|
|
|
|
rows = HydrusText.DeserialiseNewlinedTexts( raw_text )
|
|
|
|
if self._separator != '\n':
|
|
|
|
# don't want any newlines, so this 'undo' is correct
|
|
rejoined_text = ''.join( rows )
|
|
|
|
rows = rejoined_text.split( self._separator )
|
|
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
rows = self._string_processor.ProcessStrings( rows )
|
|
|
|
|
|
return rows
|
|
|
|
|
|
def SetSeparator( self, separator: str ):
|
|
|
|
self._separator = separator
|
|
|
|
|
|
def ToString( self ) -> str:
|
|
|
|
if self._string_processor.MakesChanges():
|
|
|
|
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
|
|
|
|
else:
|
|
|
|
full_munge_text = ''
|
|
|
|
|
|
return 'from .txt sidecar'.format( full_munge_text )
|
|
|
|
|
|
|
|
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_TXT ] = SingleFileMetadataImporterTXT
|