hydrus/hydrus/client/db/ClientDBTagSiblings.py

1004 lines
45 KiB
Python

import collections
import itertools
import sqlite3
import typing
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusData
from hydrus.core import HydrusDBBase
from hydrus.client import ClientConstants as CC
from hydrus.client.db import ClientDBDefinitionsCache
from hydrus.client.db import ClientDBMaster
from hydrus.client.db import ClientDBModule
from hydrus.client.db import ClientDBServices
from hydrus.client.metadata import ClientTags
from hydrus.client.metadata import ClientTagsHandling
def GenerateTagSiblingsLookupCacheTableName( display_type: int, service_id: int ):
( cache_ideal_tag_siblings_lookup_table_name, cache_actual_tag_siblings_lookup_table_name ) = GenerateTagSiblingsLookupCacheTableNames( service_id )
if display_type == ClientTags.TAG_DISPLAY_IDEAL:
return cache_ideal_tag_siblings_lookup_table_name
elif display_type == ClientTags.TAG_DISPLAY_ACTUAL:
return cache_actual_tag_siblings_lookup_table_name
def GenerateTagSiblingsLookupCacheTableNames( service_id ):
cache_ideal_tag_siblings_lookup_table_name = 'external_caches.ideal_tag_siblings_lookup_cache_{}'.format( service_id )
cache_actual_tag_siblings_lookup_table_name = 'external_caches.actual_tag_siblings_lookup_cache_{}'.format( service_id )
return ( cache_ideal_tag_siblings_lookup_table_name, cache_actual_tag_siblings_lookup_table_name )
class ClientDBTagSiblings( ClientDBModule.ClientDBModule ):
CAN_REPOPULATE_ALL_MISSING_DATA = True
def __init__( self, cursor: sqlite3.Cursor, modules_services: ClientDBServices.ClientDBMasterServices, modules_tags: ClientDBMaster.ClientDBMasterTags, modules_tags_local_cache: ClientDBDefinitionsCache.ClientDBCacheLocalTags ):
self.modules_services = modules_services
self.modules_tags_local_cache = modules_tags_local_cache
self.modules_tags = modules_tags
self._service_ids_to_display_application_status = {}
self._service_ids_to_applicable_service_ids = None
self._service_ids_to_interested_service_ids = None
ClientDBModule.ClientDBModule.__init__( self, 'client tag siblings', cursor )
def _GenerateApplicationDicts( self ):
unsorted_dict = HydrusData.BuildKeyToListDict( ( master_service_id, ( index, application_service_id ) ) for ( master_service_id, index, application_service_id ) in self._Execute( 'SELECT master_service_id, service_index, application_service_id FROM tag_sibling_application;' ) )
self._service_ids_to_applicable_service_ids = collections.defaultdict( list )
self._service_ids_to_applicable_service_ids.update( { master_service_id : [ application_service_id for ( index, application_service_id ) in sorted( index_and_applicable_service_ids ) ] for ( master_service_id, index_and_applicable_service_ids ) in unsorted_dict.items() } )
self._service_ids_to_interested_service_ids = collections.defaultdict( set )
for ( master_service_id, application_service_ids ) in self._service_ids_to_applicable_service_ids.items():
for application_service_id in application_service_ids:
self._service_ids_to_interested_service_ids[ application_service_id ].add( master_service_id )
def _GetInitialIndexGenerationDict( self ) -> dict:
index_generation_dict = {}
index_generation_dict[ 'tag_siblings' ] = [
( [ 'service_id', 'good_tag_id' ], False, 420 )
]
index_generation_dict[ 'tag_sibling_petitions' ] = [
( [ 'service_id', 'good_tag_id' ], False, 420 )
]
return index_generation_dict
def _GetInitialTableGenerationDict( self ) -> dict:
return {
'main.tag_siblings' : ( 'CREATE TABLE IF NOT EXISTS {} ( service_id INTEGER, bad_tag_id INTEGER, good_tag_id INTEGER, status INTEGER, PRIMARY KEY ( service_id, bad_tag_id, status ) );', 414 ),
'main.tag_sibling_petitions' : ( 'CREATE TABLE IF NOT EXISTS {} ( service_id INTEGER, bad_tag_id INTEGER, good_tag_id INTEGER, status INTEGER, reason_id INTEGER, PRIMARY KEY ( service_id, bad_tag_id, status ) );', 414 ),
'main.tag_sibling_application' : ( 'CREATE TABLE IF NOT EXISTS {} ( master_service_id INTEGER, service_index INTEGER, application_service_id INTEGER, PRIMARY KEY ( master_service_id, service_index ) );', 414 )
}
def _GetServiceIndexGenerationDict( self, service_id ) -> dict:
( cache_ideal_tag_siblings_lookup_table_name, cache_actual_tag_siblings_lookup_table_name ) = GenerateTagSiblingsLookupCacheTableNames( service_id )
index_generation_dict = {}
index_generation_dict[ cache_actual_tag_siblings_lookup_table_name ] = [
( [ 'ideal_tag_id' ], False, 414 )
]
index_generation_dict[ cache_ideal_tag_siblings_lookup_table_name ] = [
( [ 'ideal_tag_id' ], False, 414 )
]
return index_generation_dict
def _GetServiceTableGenerationDict( self, service_id ) -> dict:
( cache_ideal_tag_siblings_lookup_table_name, cache_actual_tag_siblings_lookup_table_name ) = GenerateTagSiblingsLookupCacheTableNames( service_id )
return {
cache_actual_tag_siblings_lookup_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( bad_tag_id INTEGER PRIMARY KEY, ideal_tag_id INTEGER );', 414 ),
cache_ideal_tag_siblings_lookup_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( bad_tag_id INTEGER PRIMARY KEY, ideal_tag_id INTEGER );', 414 )
}
def _GetServiceIdsWeGenerateDynamicTablesFor( self ):
return self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
def _RepairRepopulateTables( self, repopulate_table_names, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper ):
for service_id in self._GetServiceIdsWeGenerateDynamicTablesFor():
table_generation_dict = self._GetServiceTableGenerationDict( service_id )
this_service_table_names = set( table_generation_dict.keys() )
this_service_needs_repopulation = len( this_service_table_names.intersection( repopulate_table_names ) ) > 0
if this_service_needs_repopulation:
self._service_ids_to_applicable_service_ids = None
self._service_ids_to_interested_service_ids = None
self.Regen( ( service_id, ) )
cursor_transaction_wrapper.CommitAndBegin()
def AddTagSiblings( self, service_id, pairs ):
self._ExecuteMany( 'DELETE FROM tag_siblings WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ?;', ( ( service_id, bad_tag_id, good_tag_id ) for ( bad_tag_id, good_tag_id ) in pairs ) )
self._ExecuteMany( 'DELETE FROM tag_sibling_petitions WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ? AND status = ?;', ( ( service_id, bad_tag_id, good_tag_id, HC.CONTENT_STATUS_PENDING ) for ( bad_tag_id, good_tag_id ) in pairs ) )
self._ExecuteMany( 'INSERT OR IGNORE INTO tag_siblings ( service_id, bad_tag_id, good_tag_id, status ) VALUES ( ?, ?, ?, ? );', ( ( service_id, bad_tag_id, good_tag_id, HC.CONTENT_STATUS_CURRENT ) for ( bad_tag_id, good_tag_id ) in pairs ) )
def ClearActual( self, service_id ):
cache_actual_tag_sibling_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( ClientTags.TAG_DISPLAY_ACTUAL, service_id )
self._Execute( 'DELETE FROM {};'.format( cache_actual_tag_sibling_lookup_table_name ) )
if service_id in self._service_ids_to_display_application_status:
del self._service_ids_to_display_application_status[ service_id ]
def DeleteTagSiblings( self, service_id, pairs ):
self._ExecuteMany( 'DELETE FROM tag_siblings WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ?;', ( ( service_id, bad_tag_id, good_tag_id ) for ( bad_tag_id, good_tag_id ) in pairs ) )
self._ExecuteMany( 'DELETE FROM tag_sibling_petitions WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ? AND status = ?;', ( ( service_id, bad_tag_id, good_tag_id, HC.CONTENT_STATUS_PETITIONED ) for ( bad_tag_id, good_tag_id ) in pairs ) )
self._ExecuteMany( 'INSERT OR IGNORE INTO tag_siblings ( service_id, bad_tag_id, good_tag_id, status ) VALUES ( ?, ?, ?, ? );', ( ( service_id, bad_tag_id, good_tag_id, HC.CONTENT_STATUS_DELETED ) for ( bad_tag_id, good_tag_id ) in pairs ) )
def Drop( self, tag_service_id ):
self._Execute( 'DELETE FROM tag_siblings WHERE service_id = ?;', ( tag_service_id, ) )
self._Execute( 'DELETE FROM tag_sibling_petitions WHERE service_id = ?;', ( tag_service_id, ) )
( cache_ideal_tag_siblings_lookup_table_name, cache_actual_tag_siblings_lookup_table_name ) = GenerateTagSiblingsLookupCacheTableNames( tag_service_id )
self._Execute( 'DROP TABLE IF EXISTS {};'.format( cache_actual_tag_siblings_lookup_table_name ) )
self._Execute( 'DROP TABLE IF EXISTS {};'.format( cache_ideal_tag_siblings_lookup_table_name ) )
self._Execute( 'DELETE FROM tag_sibling_application WHERE master_service_id = ? OR application_service_id = ?;', ( tag_service_id, tag_service_id ) )
self._service_ids_to_applicable_service_ids = None
self._service_ids_to_interested_service_ids = None
def FilterChained( self, display_type, tag_service_id, tag_ids ):
if len( tag_ids ) == 0:
return set()
elif len( tag_ids ) == 1:
( tag_id, ) = tag_ids
if self.IsChained( display_type, tag_service_id, tag_id ):
return { tag_id }
else:
return set()
# get the tag_ids that are part of a sibling chain
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_table_name:
# keep these separate--older sqlite can't do cross join to an OR ON
# temp tags to lookup
chain_tag_ids = self._STS( self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} ON ( bad_tag_id = tag_id );'.format( temp_table_name, cache_tag_siblings_lookup_table_name ) ) )
chain_tag_ids.update( self._STI( self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} ON ( ideal_tag_id = tag_id );'.format( temp_table_name, cache_tag_siblings_lookup_table_name ) ) ) )
return chain_tag_ids
def FilterChainedIdealsIntoTable( self, display_type, tag_service_id, tag_ids_table_name, results_table_name ):
# get the tag_ids that are part of a sibling chain
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
# keep these separate--older sqlite can't do cross join to an OR ON
# temp tags to lookup
self._Execute( 'INSERT OR IGNORE INTO {} SELECT ideal_tag_id FROM {} CROSS JOIN {} ON ( bad_tag_id = tag_id );'.format( results_table_name, tag_ids_table_name, cache_tag_siblings_lookup_table_name ) )
self._STI( self._Execute( 'INSERT OR IGNORE INTO {} SELECT ideal_tag_id FROM {} CROSS JOIN {} ON ( ideal_tag_id = tag_id );'.format( results_table_name, tag_ids_table_name, cache_tag_siblings_lookup_table_name ) ) )
def FilterChainedIntoTable( self, display_type, tag_service_id, tag_ids_table_name, results_table_name ):
# get the tag_ids that are part of a sibling chain
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
# keep these separate--older sqlite can't do cross join to an OR ON
# temp tags to lookup
self._Execute( 'INSERT OR IGNORE INTO {} SELECT tag_id FROM {} CROSS JOIN {} ON ( bad_tag_id = tag_id );'.format( results_table_name, tag_ids_table_name, cache_tag_siblings_lookup_table_name ) )
self._STI( self._Execute( 'INSERT OR IGNORE INTO {} SELECT tag_id FROM {} CROSS JOIN {} ON ( ideal_tag_id = tag_id );'.format( results_table_name, tag_ids_table_name, cache_tag_siblings_lookup_table_name ) ) )
def Generate( self, tag_service_id ):
table_generation_dict = self._GetServiceTableGenerationDict( tag_service_id )
for ( table_name, ( create_query_without_name, version_added ) ) in table_generation_dict.items():
self._CreateTable( create_query_without_name, table_name )
index_generation_dict = self._GetServiceIndexGenerationDict( tag_service_id )
for ( table_name, columns, unique, version_added ) in self._FlattenIndexGenerationDict( index_generation_dict ):
self._CreateIndex( table_name, columns, unique = unique )
self._Execute( 'INSERT OR IGNORE INTO tag_sibling_application ( master_service_id, service_index, application_service_id ) VALUES ( ?, ?, ? );', ( tag_service_id, 0, tag_service_id ) )
self._service_ids_to_applicable_service_ids = None
self._service_ids_to_interested_service_ids = None
self.Regen( ( tag_service_id, ) )
def GetAllTagIds( self, display_type, tag_service_id ):
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
tag_ids = set()
tag_ids.update( self._STI( self._Execute( 'SELECT DISTINCT bad_tag_id FROM {};'.format( cache_tag_siblings_lookup_table_name ) ) ) )
tag_ids.update( self._STI( self._Execute( 'SELECT ideal_tag_id FROM {};'.format( cache_tag_siblings_lookup_table_name ) ) ) )
return tag_ids
def GetApplicableServiceIds( self, tag_service_id ):
if self._service_ids_to_applicable_service_ids is None:
self._GenerateApplicationDicts()
return self._service_ids_to_applicable_service_ids[ tag_service_id ]
def GetApplication( self ):
if self._service_ids_to_applicable_service_ids is None:
self._GenerateApplicationDicts()
service_ids_to_service_keys = {}
service_keys_to_applicable_service_keys = {}
for ( master_service_id, applicable_service_ids ) in self._service_ids_to_applicable_service_ids.items():
all_service_ids = [ master_service_id ] + list( applicable_service_ids )
for service_id in all_service_ids:
if service_id not in service_ids_to_service_keys:
service_ids_to_service_keys[ service_id ] = self.modules_services.GetService( service_id ).GetServiceKey()
service_keys_to_applicable_service_keys[ service_ids_to_service_keys[ master_service_id ] ] = [ service_ids_to_service_keys[ service_id ] for service_id in applicable_service_ids ]
return service_keys_to_applicable_service_keys
def GetApplicationStatus( self, service_id ):
if service_id not in self._service_ids_to_display_application_status:
( cache_ideal_tag_siblings_lookup_table_name, cache_actual_tag_siblings_lookup_table_name ) = GenerateTagSiblingsLookupCacheTableNames( service_id )
actual_sibling_rows = set( self._Execute( 'SELECT bad_tag_id, ideal_tag_id FROM {};'.format( cache_actual_tag_siblings_lookup_table_name ) ) )
ideal_sibling_rows = set( self._Execute( 'SELECT bad_tag_id, ideal_tag_id FROM {};'.format( cache_ideal_tag_siblings_lookup_table_name ) ) )
sibling_rows_to_remove = actual_sibling_rows.difference( ideal_sibling_rows )
sibling_rows_to_add = ideal_sibling_rows.difference( actual_sibling_rows )
num_actual_rows = len( actual_sibling_rows )
num_ideal_rows = len( ideal_sibling_rows )
self._service_ids_to_display_application_status[ service_id ] = ( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows )
( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows ) = self._service_ids_to_display_application_status[ service_id ]
return ( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows )
def GetChainMembersFromIdeal( self, display_type, tag_service_id, ideal_tag_id ) -> typing.Set[ int ]:
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
sibling_tag_ids = self._STS( self._Execute( 'SELECT bad_tag_id FROM {} WHERE ideal_tag_id = ?;'.format( cache_tag_siblings_lookup_table_name ), ( ideal_tag_id, ) ) )
sibling_tag_ids.add( ideal_tag_id )
return sibling_tag_ids
def GetChainsMembersFromIdeals( self, display_type, tag_service_id, ideal_tag_ids ) -> typing.Set[ int ]:
if len( ideal_tag_ids ) == 0:
return set()
elif len( ideal_tag_ids ) == 1:
( ideal_tag_id, ) = ideal_tag_ids
return self.GetChainMembersFromIdeal( display_type, tag_service_id, ideal_tag_id )
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
with self._MakeTemporaryIntegerTable( ideal_tag_ids, 'ideal_tag_id' ) as temp_table_name:
# temp tags to lookup
sibling_tag_ids = self._STS( self._Execute( 'SELECT bad_tag_id FROM {} CROSS JOIN {} USING ( ideal_tag_id );'.format( temp_table_name, cache_tag_siblings_lookup_table_name ) ) )
sibling_tag_ids.update( ideal_tag_ids )
return sibling_tag_ids
def GetChainsMembersFromIdealsTables( self, display_type, tag_service_id, ideal_tag_ids_table_name, results_table_name ):
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id ) SELECT ideal_tag_id FROM {};'.format( results_table_name, ideal_tag_ids_table_name ) )
# tags to lookup
self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id ) SELECT bad_tag_id FROM {} CROSS JOIN {} USING ( ideal_tag_id );'.format( results_table_name, ideal_tag_ids_table_name, cache_tag_siblings_lookup_table_name ) )
def GetIdeals( self, tag_display_type, service_key, tags ) -> typing.Set[ str ]:
if service_key == CC.COMBINED_TAG_SERVICE_KEY:
tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
else:
tag_service_ids = ( self.modules_services.GetServiceId( service_key ), )
existing_tags = { tag for tag in tags if self.modules_tags.TagExists( tag ) }
existing_tag_ids = set( self.modules_tags_local_cache.GetTagIdsToTags( tags = existing_tags ).keys() )
result_ideal_tag_ids = set()
for tag_service_id in tag_service_ids:
ideal_tag_ids = self.GetIdealTagIds( tag_display_type, tag_service_id, existing_tag_ids )
result_ideal_tag_ids.update( ideal_tag_ids )
tag_ids_to_tags = self.modules_tags_local_cache.GetTagIdsToTags( tag_ids = result_ideal_tag_ids )
ideal_tags = set( tag_ids_to_tags.values() )
for tag in tags:
if tag not in existing_tags:
ideal_tags.add( tag )
return ideal_tags
def GetIdealTagId( self, display_type, tag_service_id, tag_id ) -> int:
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
result = self._Execute( 'SELECT ideal_tag_id FROM {} WHERE bad_tag_id = ?;'.format( cache_tag_siblings_lookup_table_name ), ( tag_id, ) ).fetchone()
if result is None:
return tag_id
else:
( ideal_tag_id, ) = result
return ideal_tag_id
def GetIdealTagIds( self, display_type, tag_service_id, tag_ids ) -> typing.Set[ int ]:
if not isinstance( tag_ids, set ):
tag_ids = set( tag_ids )
if len( tag_ids ) == 0:
return set()
elif len( tag_ids ) == 1:
( tag_id, ) = tag_ids
return { self.GetIdealTagId( display_type, tag_service_id, tag_id ) }
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_ids_table_name:
magic_case = 'CASE WHEN ideal_tag_id NOT NULL THEN ideal_tag_id ELSE tag_id END'
cursor = self._Execute(
'SELECT {} FROM {} LEFT OUTER JOIN {} ON ( tag_id = bad_tag_id );'.format(
magic_case,
temp_tag_ids_table_name,
cache_tag_siblings_lookup_table_name
)
)
return self._STS( cursor )
'''
no_ideal_found_tag_ids = set( tag_ids )
ideal_tag_ids = set()
with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_table_name:
# temp tags to lookup
for ( tag_id, ideal_tag_id ) in self._Execute( 'SELECT tag_id, ideal_tag_id FROM {} CROSS JOIN {} ON ( bad_tag_id = tag_id );'.format( temp_table_name, cache_tag_siblings_lookup_table_name ) ):
no_ideal_found_tag_ids.discard( tag_id )
ideal_tag_ids.add( ideal_tag_id )
ideal_tag_ids.update( no_ideal_found_tag_ids )
return ideal_tag_ids
'''
def GetIdealTagIdsIntoTable( self, display_type, tag_service_id, tag_ids_table_name, results_table_name ):
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
magic_case = 'CASE WHEN ideal_tag_id NOT NULL THEN ideal_tag_id ELSE tag_id END'
cursor = self._Execute(
'INSERT OR IGNORE INTO {} ( ideal_tag_id ) SELECT {} FROM {} LEFT OUTER JOIN {} ON ( tag_id = bad_tag_id );'.format(
results_table_name,
magic_case,
tag_ids_table_name,
cache_tag_siblings_lookup_table_name
)
)
return self._STS( cursor )
def GetIdealTagIdsToChains( self, display_type, tag_service_id, ideal_tag_ids ):
# this only takes ideal_tag_ids
if len( ideal_tag_ids ) == 0:
return {}
elif len( ideal_tag_ids ) == 1:
( ideal_tag_id, ) = ideal_tag_ids
chain_tag_ids = self.GetChainMembersFromIdeal( display_type, tag_service_id, ideal_tag_id )
return { ideal_tag_id : chain_tag_ids }
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
with self._MakeTemporaryIntegerTable( ideal_tag_ids, 'ideal_tag_id' ) as temp_table_name:
# temp tags to lookup
ideal_tag_ids_to_chain_members = HydrusData.BuildKeyToSetDict( self._Execute( 'SELECT ideal_tag_id, bad_tag_id FROM {} CROSS JOIN {} USING ( ideal_tag_id );'.format( temp_table_name, cache_tag_siblings_lookup_table_name ) ) )
# this returns ideal in the chain, and chains of size 1
for ideal_tag_id in ideal_tag_ids:
ideal_tag_ids_to_chain_members[ ideal_tag_id ].add( ideal_tag_id )
return ideal_tag_ids_to_chain_members
def GetInterestedServiceIds( self, tag_service_id ):
if self._service_ids_to_interested_service_ids is None:
self._GenerateApplicationDicts()
return self._service_ids_to_interested_service_ids[ tag_service_id ]
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
if content_type == HC.CONTENT_TYPE_TAG:
return [
( 'tag_siblings', 'bad_tag_id' ),
( 'tag_siblings', 'good_tag_id' ),
( 'tag_sibling_petitions', 'bad_tag_id' ),
( 'tag_sibling_petitions', 'good_tag_id' )
]
return []
def GetTagIdsToIdealTagIds( self, display_type, tag_service_id, tag_ids ):
if not isinstance( tag_ids, set ):
tag_ids = set( tag_ids )
if len( tag_ids ) == 0:
return {}
elif len( tag_ids ) == 1:
( tag_id, ) = tag_ids
return { tag_id : self.GetIdealTagId( display_type, tag_service_id, tag_id ) }
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
no_ideal_found_tag_ids = set( tag_ids )
tag_ids_to_ideal_tag_ids = {}
with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_table_name:
# temp tags to lookup
for ( tag_id, ideal_tag_id ) in self._Execute( 'SELECT tag_id, ideal_tag_id FROM {} CROSS JOIN {} ON ( bad_tag_id = tag_id );'.format( temp_table_name, cache_tag_siblings_lookup_table_name ) ):
no_ideal_found_tag_ids.discard( tag_id )
tag_ids_to_ideal_tag_ids[ tag_id ] = ideal_tag_id
tag_ids_to_ideal_tag_ids.update( { tag_id : tag_id for tag_id in no_ideal_found_tag_ids } )
return tag_ids_to_ideal_tag_ids
def GetTagSiblingsForTags( self, service_key, tags ) -> typing.Dict[ str, str ]:
if service_key == CC.COMBINED_TAG_SERVICE_KEY:
tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
else:
tag_service_ids = ( self.modules_services.GetServiceId( service_key ), )
existing_tags = { tag for tag in tags if self.modules_tags.TagExists( tag ) }
existing_tag_ids = set( self.modules_tags_local_cache.GetTagIdsToTags( tags = existing_tags ).keys() )
tag_ids_to_chain_tag_ids = collections.defaultdict( set )
for tag_service_id in tag_service_ids:
tag_ids_to_ideal_tag_ids = self.GetTagIdsToIdealTagIds( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, existing_tag_ids )
ideal_tag_ids = set( tag_ids_to_ideal_tag_ids.values() )
ideal_tag_ids_to_chain_tag_ids = self.GetIdealTagIdsToChains( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id, ideal_tag_ids )
for tag_id in existing_tag_ids:
chain_tag_ids = ideal_tag_ids_to_chain_tag_ids[ tag_ids_to_ideal_tag_ids[ tag_id ] ]
tag_ids_to_chain_tag_ids[ tag_id ].update( chain_tag_ids )
all_tag_ids = set( tag_ids_to_chain_tag_ids.keys() )
all_tag_ids.update( itertools.chain.from_iterable( tag_ids_to_chain_tag_ids.values() ) )
tag_ids_to_tags = self.modules_tags_local_cache.GetTagIdsToTags( tag_ids = all_tag_ids )
tags_to_siblings = { tag_ids_to_tags[ tag_id ] : { tag_ids_to_tags[ chain_tag_id ] for chain_tag_id in chain_tag_ids } for ( tag_id, chain_tag_ids ) in tag_ids_to_chain_tag_ids.items() }
for tag in tags:
if tag not in existing_tags:
tags_to_siblings[ tag ] = { tag }
return tags_to_siblings
def GetTagSiblingsIdeals( self, service_key ):
tag_service_id = self.modules_services.GetServiceId( service_key )
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( ClientTags.TAG_DISPLAY_ACTUAL, tag_service_id )
pair_ids = self._Execute( 'SELECT bad_tag_id, ideal_tag_id FROM {};'.format( cache_tag_siblings_lookup_table_name ) ).fetchall()
all_tag_ids = set( itertools.chain.from_iterable( pair_ids ) )
tag_ids_to_tags = self.modules_tags_local_cache.GetTagIdsToTags( tag_ids = all_tag_ids )
tags_to_ideals = { tag_ids_to_tags[ bad_tag_id ] : tag_ids_to_tags[ good_tag_id ] for ( bad_tag_id, good_tag_id ) in pair_ids }
return tags_to_ideals
def GetTagSiblings( self, service_key ):
service_id = self.modules_services.GetServiceId( service_key )
statuses_to_pair_ids = self.GetTagSiblingsIds( service_id )
all_tag_ids = set()
for pair_ids in statuses_to_pair_ids.values():
for ( bad_tag_id, good_tag_id ) in pair_ids:
all_tag_ids.add( bad_tag_id )
all_tag_ids.add( good_tag_id )
tag_ids_to_tags = self.modules_tags_local_cache.GetTagIdsToTags( tag_ids = all_tag_ids )
statuses_to_pairs = collections.defaultdict( set )
statuses_to_pairs.update( { status : { ( tag_ids_to_tags[ bad_tag_id ], tag_ids_to_tags[ good_tag_id ] ) for ( bad_tag_id, good_tag_id ) in pair_ids } for ( status, pair_ids ) in statuses_to_pair_ids.items() } )
return statuses_to_pairs
def GetTagSiblingsIds( self, service_id ):
statuses_and_pair_ids = self._Execute( 'SELECT status, bad_tag_id, good_tag_id FROM tag_siblings WHERE service_id = ? UNION SELECT status, bad_tag_id, good_tag_id FROM tag_sibling_petitions WHERE service_id = ?;', ( service_id, service_id ) ).fetchall()
unsorted_statuses_to_pair_ids = HydrusData.BuildKeyToListDict( ( status, ( bad_tag_id, good_tag_id ) ) for ( status, bad_tag_id, good_tag_id ) in statuses_and_pair_ids )
statuses_to_pair_ids = collections.defaultdict( list )
statuses_to_pair_ids.update( { status : sorted( pair_ids ) for ( status, pair_ids ) in unsorted_statuses_to_pair_ids.items() } )
return statuses_to_pair_ids
def GetTagSiblingsIdsChains( self, service_id, tag_ids ):
done_tag_ids = set()
next_tag_ids = set( tag_ids )
result_rows = set()
while len( next_tag_ids ) > 0:
with self._MakeTemporaryIntegerTable( next_tag_ids, 'tag_id' ) as temp_next_tag_ids_table_name:
done_tag_ids.update( next_tag_ids )
next_tag_ids = set()
# keep these separate--older sqlite can't do cross join to an OR ON
# temp tag_ids to siblings
queries = [
'SELECT status, bad_tag_id, good_tag_id FROM {} CROSS JOIN tag_siblings ON ( bad_tag_id = tag_id ) WHERE service_id = ?'.format( temp_next_tag_ids_table_name ),
'SELECT status, bad_tag_id, good_tag_id FROM {} CROSS JOIN tag_siblings ON ( good_tag_id = tag_id ) WHERE service_id = ?'.format( temp_next_tag_ids_table_name ),
'SELECT status, bad_tag_id, good_tag_id FROM {} CROSS JOIN tag_sibling_petitions ON ( bad_tag_id = tag_id ) WHERE service_id = ?'.format( temp_next_tag_ids_table_name ),
'SELECT status, bad_tag_id, good_tag_id FROM {} CROSS JOIN tag_sibling_petitions ON ( good_tag_id = tag_id ) WHERE service_id = ?'.format( temp_next_tag_ids_table_name )
]
query = ' UNION '.join( queries )
for row in self._Execute( query, ( service_id, service_id, service_id, service_id ) ):
result_rows.add( row )
( status, bad_tag_id, good_tag_id ) = row
for tag_id in ( bad_tag_id, good_tag_id ):
if tag_id not in done_tag_ids:
next_tag_ids.add( tag_id )
unsorted_statuses_to_pair_ids = HydrusData.BuildKeyToListDict( ( status, ( bad_tag_id, good_tag_id ) ) for ( status, bad_tag_id, good_tag_id ) in result_rows )
statuses_to_pair_ids = collections.defaultdict( list )
statuses_to_pair_ids.update( { status : sorted( pair_ids ) for ( status, pair_ids ) in unsorted_statuses_to_pair_ids.items() } )
return statuses_to_pair_ids
def IsChained( self, display_type, tag_service_id, tag_id ):
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( display_type, tag_service_id )
return self._Execute( 'SELECT 1 FROM {} WHERE bad_tag_id = ? OR ideal_tag_id = ?;'.format( cache_tag_siblings_lookup_table_name ), ( tag_id, tag_id ) ).fetchone() is not None
def NotifySiblingAddRowSynced( self, tag_service_id, row ):
if tag_service_id in self._service_ids_to_display_application_status:
( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows ) = self._service_ids_to_display_application_status[ tag_service_id ]
sibling_rows_to_add.discard( row )
num_actual_rows += 1
self._service_ids_to_display_application_status[ tag_service_id ] = ( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows )
def NotifySiblingDeleteRowSynced( self, tag_service_id, row ):
if tag_service_id in self._service_ids_to_display_application_status:
( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows ) = self._service_ids_to_display_application_status[ tag_service_id ]
sibling_rows_to_remove.discard( row )
num_actual_rows -= 1
self._service_ids_to_display_application_status[ tag_service_id ] = ( sibling_rows_to_add, sibling_rows_to_remove, num_actual_rows, num_ideal_rows )
def PendTagSiblings( self, service_id, triples ):
self._ExecuteMany( 'DELETE FROM tag_sibling_petitions WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ?;', ( ( service_id, bad_tag_id, good_tag_id ) for ( bad_tag_id, good_tag_id, reason_id ) in triples ) )
self._ExecuteMany( 'INSERT OR IGNORE INTO tag_sibling_petitions ( service_id, bad_tag_id, good_tag_id, reason_id, status ) VALUES ( ?, ?, ?, ?, ? );', ( ( service_id, bad_tag_id, good_tag_id, reason_id, HC.CONTENT_STATUS_PENDING ) for ( bad_tag_id, good_tag_id, reason_id ) in triples ) )
def PetitionTagSiblings( self, service_id, triples ):
self._ExecuteMany( 'DELETE FROM tag_sibling_petitions WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ?;', ( ( service_id, bad_tag_id, good_tag_id ) for ( bad_tag_id, good_tag_id, reason_id ) in triples ) )
self._ExecuteMany( 'INSERT OR IGNORE INTO tag_sibling_petitions ( service_id, bad_tag_id, good_tag_id, reason_id, status ) VALUES ( ?, ?, ?, ?, ? );', ( ( service_id, bad_tag_id, good_tag_id, reason_id, HC.CONTENT_STATUS_PETITIONED ) for ( bad_tag_id, good_tag_id, reason_id ) in triples ) )
def RescindPendingTagSiblings( self, service_id, pairs ):
self._ExecuteMany( 'DELETE FROM tag_sibling_petitions WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ? AND status = ?;', ( ( service_id, bad_tag_id, good_tag_id, HC.CONTENT_STATUS_PENDING ) for ( bad_tag_id, good_tag_id ) in pairs ) )
def RescindPetitionedTagSiblings( self, service_id, pairs ):
self._ExecuteMany( 'DELETE FROM tag_sibling_petitions WHERE service_id = ? AND bad_tag_id = ? AND good_tag_id = ? AND status = ?;', ( ( service_id, bad_tag_id, good_tag_id, HC.CONTENT_STATUS_PETITIONED ) for ( bad_tag_id, good_tag_id ) in pairs ) )
def Regen( self, tag_service_ids ):
for tag_service_id in tag_service_ids:
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( ClientTags.TAG_DISPLAY_IDEAL, tag_service_id )
self._Execute( 'DELETE FROM {};'.format( cache_tag_siblings_lookup_table_name ) )
applicable_service_ids = self.GetApplicableServiceIds( tag_service_id )
tss = ClientTagsHandling.TagSiblingsStructure()
for applicable_service_id in applicable_service_ids:
statuses_to_pair_ids = self.GetTagSiblingsIds( service_id = applicable_service_id )
petitioned_fast_lookup = set( statuses_to_pair_ids[ HC.CONTENT_STATUS_PETITIONED ] )
for ( bad_tag_id, good_tag_id ) in statuses_to_pair_ids[ HC.CONTENT_STATUS_CURRENT ]:
if ( bad_tag_id, good_tag_id ) in petitioned_fast_lookup:
continue
tss.AddPair( bad_tag_id, good_tag_id )
for ( bad_tag_id, good_tag_id ) in statuses_to_pair_ids[ HC.CONTENT_STATUS_PENDING ]:
tss.AddPair( bad_tag_id, good_tag_id )
self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( bad_tag_id, ideal_tag_id ) VALUES ( ?, ? );'.format( cache_tag_siblings_lookup_table_name ), tss.GetBadTagsToIdealTags().items() )
if tag_service_id in self._service_ids_to_display_application_status:
del self._service_ids_to_display_application_status[ tag_service_id ]
def RegenChains( self, tag_service_ids, tag_ids ):
# as this guy can change ideals, the related parent chains need to be regenned afterwards too
if len( tag_ids ) == 0:
return
for tag_service_id in tag_service_ids:
cache_tag_siblings_lookup_table_name = GenerateTagSiblingsLookupCacheTableName( ClientTags.TAG_DISPLAY_IDEAL, tag_service_id )
tag_ids_to_clear_and_regen = set( tag_ids )
ideal_tag_ids = self.GetIdealTagIds( ClientTags.TAG_DISPLAY_IDEAL, tag_service_id, tag_ids )
tag_ids_to_clear_and_regen.update( self.GetChainsMembersFromIdeals( ClientTags.TAG_DISPLAY_IDEAL, tag_service_id, ideal_tag_ids ) )
self._ExecuteMany( 'DELETE FROM {} WHERE bad_tag_id = ? OR ideal_tag_id = ?;'.format( cache_tag_siblings_lookup_table_name ), ( ( tag_id, tag_id ) for tag_id in tag_ids_to_clear_and_regen ) )
applicable_tag_service_ids = self.GetApplicableServiceIds( tag_service_id )
tss = ClientTagsHandling.TagSiblingsStructure()
for applicable_tag_service_id in applicable_tag_service_ids:
service_key = self.modules_services.GetService( applicable_tag_service_id ).GetServiceKey()
statuses_to_pair_ids = self.GetTagSiblingsIdsChains( applicable_tag_service_id, tag_ids_to_clear_and_regen )
petitioned_fast_lookup = set( statuses_to_pair_ids[ HC.CONTENT_STATUS_PETITIONED ] )
for ( bad_tag_id, good_tag_id ) in statuses_to_pair_ids[ HC.CONTENT_STATUS_CURRENT ]:
if ( bad_tag_id, good_tag_id ) in petitioned_fast_lookup:
continue
tss.AddPair( bad_tag_id, good_tag_id )
for ( bad_tag_id, good_tag_id ) in statuses_to_pair_ids[ HC.CONTENT_STATUS_PENDING ]:
tss.AddPair( bad_tag_id, good_tag_id )
self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( bad_tag_id, ideal_tag_id ) VALUES ( ?, ? );'.format( cache_tag_siblings_lookup_table_name ), tss.GetBadTagsToIdealTags().items() )
if tag_service_id in self._service_ids_to_display_application_status:
del self._service_ids_to_display_application_status[ tag_service_id ]
def SetApplication( self, service_keys_to_applicable_service_keys ):
if self._service_ids_to_applicable_service_ids is None:
self._GenerateApplicationDicts()
new_service_ids_to_applicable_service_ids = collections.defaultdict( list )
for ( master_service_key, applicable_service_keys ) in service_keys_to_applicable_service_keys.items():
master_service_id = self.modules_services.GetServiceId( master_service_key )
applicable_service_ids = [ self.modules_services.GetServiceId( service_key ) for service_key in applicable_service_keys ]
new_service_ids_to_applicable_service_ids[ master_service_id ] = applicable_service_ids
old_and_new_master_service_ids = set( self._service_ids_to_applicable_service_ids.keys() )
old_and_new_master_service_ids.update( new_service_ids_to_applicable_service_ids.keys() )
inserts = []
service_ids_to_sync = set()
for master_service_id in old_and_new_master_service_ids:
if master_service_id in new_service_ids_to_applicable_service_ids:
applicable_service_ids = new_service_ids_to_applicable_service_ids[ master_service_id ]
inserts.extend( ( ( master_service_id, i, applicable_service_id ) for ( i, applicable_service_id ) in enumerate( applicable_service_ids ) ) )
if applicable_service_ids != self._service_ids_to_applicable_service_ids[ master_service_id ]:
service_ids_to_sync.add( master_service_id )
else:
service_ids_to_sync.add( master_service_id )
self._Execute( 'DELETE FROM tag_sibling_application;' )
self._ExecuteMany( 'INSERT OR IGNORE INTO tag_sibling_application ( master_service_id, service_index, application_service_id ) VALUES ( ?, ?, ? );', inserts )
self._service_ids_to_applicable_service_ids = None
self._service_ids_to_interested_service_ids = None
return service_ids_to_sync