hydrus/hydrus/client/metadata/ClientMetadataMigrationImpo...

602 lines
20 KiB
Python

import os
import typing
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusText
from hydrus.client import ClientConstants as CC
from hydrus.client import ClientParsing
from hydrus.client import ClientStrings
from hydrus.client.media import ClientMediaResult
from hydrus.client.metadata import ClientMetadataMigrationCore
from hydrus.client.metadata import ClientTags
# TODO: All importers should probably have a string processor
class SingleFileMetadataImporter( ClientMetadataMigrationCore.ImporterExporterNode ):
def __init__( self, string_processor: ClientStrings.StringProcessor ):
self._string_processor = string_processor
def GetStringProcessor( self ) -> ClientStrings.StringProcessor:
return self._string_processor
def Import( self, *args, **kwargs ):
raise NotImplementedError()
def ToString( self ) -> str:
raise NotImplementedError()
class SingleFileMetadataImporterMedia( SingleFileMetadataImporter ):
def Import( self, media_result: ClientMediaResult.MediaResult ):
raise NotImplementedError()
def ToString( self ) -> str:
raise NotImplementedError()
class SingleFileMetadataImporterSidecar( SingleFileMetadataImporter, ClientMetadataMigrationCore.SidecarNode ):
def __init__( self, string_processor: ClientStrings.StringProcessor, remove_actual_filename_ext: bool, suffix: str, filename_string_converter: ClientStrings.StringConverter ):
ClientMetadataMigrationCore.SidecarNode.__init__( self, remove_actual_filename_ext, suffix, filename_string_converter )
SingleFileMetadataImporter.__init__( self, string_processor )
def GetExpectedSidecarPath( self, path: str ):
raise NotImplementedError()
def Import( self, actual_file_path: str ):
raise NotImplementedError()
def ToString( self ) -> str:
raise NotImplementedError()
class SingleFileMetadataImporterMediaTags( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterMedia ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_TAGS
SERIALISABLE_NAME = 'Metadata Single File Importer Media Tags'
SERIALISABLE_VERSION = 2
def __init__( self, string_processor = None, service_key = None ):
if string_processor is None:
string_processor = ClientStrings.StringProcessor()
HydrusSerialisable.SerialisableBase.__init__( self )
SingleFileMetadataImporterMedia.__init__( self, string_processor )
if service_key is None:
service_key = CC.COMBINED_TAG_SERVICE_KEY
self._service_key = service_key
def _GetSerialisableInfo( self ):
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
serialisable_service_key = self._service_key.hex()
return ( serialisable_string_processor, serialisable_service_key )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( serialisable_string_processor, serialisable_service_key ) = serialisable_info
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
self._service_key = bytes.fromhex( serialisable_service_key )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
serialisable_service_key = old_serialisable_info
string_processor = ClientStrings.StringProcessor()
serialisable_string_processor = string_processor.GetSerialisableTuple()
new_serialisable_info = ( serialisable_string_processor, serialisable_service_key )
return ( 2, new_serialisable_info )
def GetExampleStrings( self ):
examples = [
'blue eyes',
'blonde hair',
'skirt',
'character:jane smith',
'series:jane smith adventures',
'creator:some guy'
]
return examples
def GetServiceKey( self ) -> bytes:
return self._service_key
def Import( self, media_result: ClientMediaResult.MediaResult ):
tags = media_result.GetTagsManager().GetCurrent( self._service_key, ClientTags.TAG_DISPLAY_STORAGE )
if self._string_processor.MakesChanges():
tags = self._string_processor.ProcessStrings( tags )
return tags
def SetServiceKey( self, service_key: bytes ):
self._service_key = service_key
def ToString( self ) -> str:
try:
name = HG.client_controller.services_manager.GetName( self._service_key )
except:
name = 'unknown service'
if self._string_processor.MakesChanges():
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
else:
full_munge_text = ''
return '"{}" tags from media{}'.format( name, full_munge_text )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_TAGS ] = SingleFileMetadataImporterMediaTags
class SingleFileMetadataImporterMediaURLs( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterMedia ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_URLS
SERIALISABLE_NAME = 'Metadata Single File Importer Media URLs'
SERIALISABLE_VERSION = 2
def __init__( self, string_processor = None ):
if string_processor is None:
string_processor = ClientStrings.StringProcessor()
HydrusSerialisable.SerialisableBase.__init__( self )
SingleFileMetadataImporterMedia.__init__( self, string_processor )
def _GetSerialisableInfo( self ):
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
return serialisable_string_processor
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
serialisable_string_processor = serialisable_info
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
gumpf = old_serialisable_info
string_processor = ClientStrings.StringProcessor()
serialisable_string_processor = string_processor.GetSerialisableTuple()
new_serialisable_info = serialisable_string_processor
return ( 2, new_serialisable_info )
def GetExampleStrings( self ):
examples = [
'https://example.com/gallery/index.php?post=123456&page=show',
'https://cdn3.expl.com/files/file_id?id=123456&token=0123456789abcdef'
]
return examples
def Import( self, media_result: ClientMediaResult.MediaResult ):
urls = media_result.GetLocationsManager().GetURLs()
if self._string_processor.MakesChanges():
urls = self._string_processor.ProcessStrings( urls )
return urls
def ToString( self ) -> str:
if self._string_processor.MakesChanges():
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
else:
full_munge_text = ''
return 'urls from media{}'.format( full_munge_text )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_URLS ] = SingleFileMetadataImporterMediaURLs
class SingleFileMetadataImporterJSON( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterSidecar ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_JSON
SERIALISABLE_NAME = 'Metadata Single File Importer JSON'
SERIALISABLE_VERSION = 3
def __init__( self, string_processor = None, remove_actual_filename_ext = None, suffix = None, filename_string_converter = None, json_parsing_formula = None ):
if remove_actual_filename_ext is None:
remove_actual_filename_ext = False
if suffix is None:
suffix = ''
if filename_string_converter is None:
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.json' )
if string_processor is None:
string_processor = ClientStrings.StringProcessor()
HydrusSerialisable.SerialisableBase.__init__( self )
SingleFileMetadataImporterSidecar.__init__( self, string_processor, remove_actual_filename_ext, suffix, filename_string_converter )
if json_parsing_formula is None:
parse_rules = [ ( ClientParsing.JSON_PARSE_RULE_TYPE_ALL_ITEMS, None ) ]
json_parsing_formula = ClientParsing.ParseFormulaJSON( parse_rules = parse_rules, content_to_fetch = ClientParsing.JSON_CONTENT_STRING )
self._json_parsing_formula = json_parsing_formula
def _GetSerialisableInfo( self ):
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
serialisable_filename_string_converter = self._filename_string_converter.GetSerialisableTuple()
serialisable_json_parsing_formula = self._json_parsing_formula.GetSerialisableTuple()
return ( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, serialisable_json_parsing_formula )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, serialisable_json_parsing_formula ) = serialisable_info
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
self._filename_string_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_filename_string_converter )
self._json_parsing_formula = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_json_parsing_formula )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
( suffix, serialisable_json_parsing_formula ) = old_serialisable_info
string_processor = ClientStrings.StringProcessor()
serialisable_string_processor = string_processor.GetSerialisableTuple()
new_serialisable_info = ( serialisable_string_processor, suffix, serialisable_json_parsing_formula )
return ( 2, new_serialisable_info )
if version == 2:
( serialisable_string_processor, suffix, serialisable_json_parsing_formula ) = old_serialisable_info
remove_actual_filename_ext = False
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.json' )
serialisable_filename_string_converter = filename_string_converter.GetSerialisableTuple()
new_serialisable_info = ( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter, serialisable_json_parsing_formula )
return ( 3, new_serialisable_info )
def GetExpectedSidecarPath( self, actual_file_path: str ):
return ClientMetadataMigrationCore.GetSidecarPath( actual_file_path, self._remove_actual_filename_ext, self._suffix, self._filename_string_converter, 'json' )
def GetJSONParsingFormula( self ) -> ClientParsing.ParseFormulaJSON:
return self._json_parsing_formula
def Import( self, actual_file_path: str ) -> typing.Collection[ str ]:
path = self.GetExpectedSidecarPath( actual_file_path )
if not os.path.exists( path ):
return []
try:
with open( path, 'r', encoding = 'utf-8' ) as f:
read_raw_json = f.read()
except Exception as e:
raise Exception( 'Could not import from {}: {}'.format( path, str( e ) ) )
parsing_context = {}
collapse_newlines = False
rows = self._json_parsing_formula.Parse( parsing_context, read_raw_json, collapse_newlines )
if self._string_processor.MakesChanges():
rows = self._string_processor.ProcessStrings( rows )
return rows
def SetJSONParsingFormula( self, json_parsing_formula: ClientParsing.ParseFormulaJSON ):
self._json_parsing_formula = json_parsing_formula
def ToString( self ) -> str:
if self._string_processor.MakesChanges():
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
else:
full_munge_text = ''
return 'from JSON sidecar{}'.format( full_munge_text )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_JSON ] = SingleFileMetadataImporterJSON
class SingleFileMetadataImporterTXT( HydrusSerialisable.SerialisableBase, SingleFileMetadataImporterSidecar ):
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_TXT
SERIALISABLE_NAME = 'Metadata Single File Importer TXT'
SERIALISABLE_VERSION = 4
def __init__( self, string_processor = None, remove_actual_filename_ext = None, suffix = None, filename_string_converter = None, separator = None ):
if remove_actual_filename_ext is None:
remove_actual_filename_ext = False
if suffix is None:
suffix = ''
if filename_string_converter is None:
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.txt' )
if string_processor is None:
string_processor = ClientStrings.StringProcessor()
if separator is None:
separator = '\n'
self._separator = separator
HydrusSerialisable.SerialisableBase.__init__( self )
SingleFileMetadataImporterSidecar.__init__( self, string_processor, remove_actual_filename_ext, suffix, filename_string_converter )
def _GetSerialisableInfo( self ):
serialisable_string_processor = self._string_processor.GetSerialisableTuple()
serialisable_filename_string_converter = self._filename_string_converter.GetSerialisableTuple()
return ( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, self._separator )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( serialisable_string_processor, self._remove_actual_filename_ext, self._suffix, serialisable_filename_string_converter, self._separator ) = serialisable_info
self._string_processor = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_processor )
self._filename_string_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_filename_string_converter )
def _UpdateSerialisableInfo( self, version, old_serialisable_info ):
if version == 1:
suffix = old_serialisable_info
string_processor = ClientStrings.StringProcessor()
serialisable_string_processor = string_processor.GetSerialisableTuple()
new_serialisable_info = ( serialisable_string_processor, suffix )
return ( 2, new_serialisable_info )
if version == 2:
( serialisable_string_processor, suffix ) = old_serialisable_info
remove_actual_filename_ext = False
filename_string_converter = ClientStrings.StringConverter( example_string = 'my_image.jpg.txt' )
serialisable_filename_string_converter = filename_string_converter.GetSerialisableTuple()
new_serialisable_info = ( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter )
return ( 3, new_serialisable_info )
if version == 3:
( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter ) = old_serialisable_info
separator = '\n'
new_serialisable_info = ( serialisable_string_processor, remove_actual_filename_ext, suffix, serialisable_filename_string_converter, separator )
return ( 4, new_serialisable_info )
def GetExpectedSidecarPath( self, actual_file_path: str ):
return ClientMetadataMigrationCore.GetSidecarPath( actual_file_path, self._remove_actual_filename_ext, self._suffix, self._filename_string_converter, 'txt' )
def GetSeparator( self ) -> str:
return self._separator
def Import( self, actual_file_path: str ) -> typing.Collection[ str ]:
path = self.GetExpectedSidecarPath( actual_file_path )
if not os.path.exists( path ):
return []
try:
with open( path, 'r', encoding = 'utf-8' ) as f:
raw_text = f.read()
except Exception as e:
raise Exception( 'Could not import from {}: {}'.format( path, str( e ) ) )
rows = HydrusText.DeserialiseNewlinedTexts( raw_text )
if self._separator != '\n':
# don't want any newlines, so this 'undo' is correct
rejoined_text = ''.join( rows )
rows = rejoined_text.split( self._separator )
if self._string_processor.MakesChanges():
rows = self._string_processor.ProcessStrings( rows )
return rows
def SetSeparator( self, separator: str ):
self._separator = separator
def ToString( self ) -> str:
if self._string_processor.MakesChanges():
full_munge_text = ', applying {}'.format( self._string_processor.ToString() )
else:
full_munge_text = ''
return 'from .txt sidecar'.format( full_munge_text )
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_TXT ] = SingleFileMetadataImporterTXT