409 lines
15 KiB
Python
409 lines
15 KiB
Python
import sqlite3
|
|
import typing
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusDB
|
|
from hydrus.core import HydrusDBBase
|
|
from hydrus.core import HydrusExceptions
|
|
from hydrus.core import HydrusGlobals as HG
|
|
from hydrus.core import HydrusTags
|
|
|
|
from hydrus.client.db import ClientDBFilesStorage
|
|
from hydrus.client.db import ClientDBMappingsCounts
|
|
from hydrus.client.db import ClientDBMaster
|
|
from hydrus.client.db import ClientDBModule
|
|
from hydrus.client.db import ClientDBServices
|
|
from hydrus.client.metadata import ClientTags
|
|
|
|
class ClientDBCacheLocalHashes( ClientDBModule.ClientDBModule ):
|
|
|
|
CAN_REPOPULATE_ALL_MISSING_DATA = True
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor, modules_hashes: ClientDBMaster.ClientDBMasterHashes, modules_services: ClientDBServices.ClientDBMasterServices, modules_files_storage: ClientDBFilesStorage.ClientDBFilesStorage ):
|
|
|
|
self.modules_hashes = modules_hashes
|
|
self.modules_services = modules_services
|
|
self.modules_files_storage = modules_files_storage
|
|
|
|
self._hash_ids_to_hashes_cache = {}
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client hashes local cache', cursor )
|
|
|
|
|
|
def _GetInitialTableGenerationDict( self ) -> dict:
|
|
|
|
return {
|
|
'external_caches.local_hashes_cache' : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, hash BLOB_BYTES UNIQUE );', 429 )
|
|
}
|
|
|
|
|
|
def _PopulateHashIdsToHashesCache( self, hash_ids ):
|
|
|
|
if len( self._hash_ids_to_hashes_cache ) > 100000:
|
|
|
|
if not isinstance( hash_ids, set ):
|
|
|
|
hash_ids = set( hash_ids )
|
|
|
|
|
|
self._hash_ids_to_hashes_cache = { hash_id : hash for ( hash_id, hash ) in self._hash_ids_to_hashes_cache.items() if hash_id in hash_ids }
|
|
|
|
|
|
uncached_hash_ids = { hash_id for hash_id in hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
|
|
|
|
if len( uncached_hash_ids ) > 0:
|
|
|
|
if len( uncached_hash_ids ) == 1:
|
|
|
|
( uncached_hash_id, ) = uncached_hash_ids
|
|
|
|
# this makes 0 or 1 rows, so do fetchall rather than fetchone
|
|
local_uncached_hash_ids_to_hashes = { hash_id : hash for ( hash_id, hash ) in self._Execute( 'SELECT hash_id, hash FROM local_hashes_cache WHERE hash_id = ?;', ( uncached_hash_id, ) ) }
|
|
|
|
else:
|
|
|
|
with self._MakeTemporaryIntegerTable( uncached_hash_ids, 'hash_id' ) as temp_table_name:
|
|
|
|
# temp hash_ids to actual hashes
|
|
local_uncached_hash_ids_to_hashes = { hash_id : hash for ( hash_id, hash ) in self._Execute( 'SELECT hash_id, hash FROM {} CROSS JOIN local_hashes_cache USING ( hash_id );'.format( temp_table_name ) ) }
|
|
|
|
|
|
|
|
self._hash_ids_to_hashes_cache.update( local_uncached_hash_ids_to_hashes )
|
|
|
|
uncached_hash_ids = { hash_id for hash_id in uncached_hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
|
|
|
|
|
|
if len( uncached_hash_ids ) > 0:
|
|
|
|
hash_ids_to_hashes = self.modules_hashes.GetHashIdsToHashes( hash_ids = uncached_hash_ids )
|
|
|
|
self._hash_ids_to_hashes_cache.update( hash_ids_to_hashes )
|
|
|
|
|
|
|
|
def _RepairRepopulateTables( self, table_names, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper ):
|
|
|
|
self.Repopulate()
|
|
|
|
cursor_transaction_wrapper.CommitAndBegin()
|
|
|
|
|
|
def AddHashIdsToCache( self, hash_ids ):
|
|
|
|
hash_ids_to_hashes = self.modules_hashes.GetHashIdsToHashes( hash_ids = hash_ids )
|
|
|
|
self._ExecuteMany( 'INSERT OR IGNORE INTO local_hashes_cache ( hash_id, hash ) VALUES ( ?, ? );', ( ( hash_id, sqlite3.Binary( hash ) ) for ( hash_id, hash ) in hash_ids_to_hashes.items() ) )
|
|
|
|
|
|
def ClearCache( self ):
|
|
|
|
self._Execute( 'DELETE FROM local_hashes_cache;' )
|
|
|
|
|
|
def DropHashIdsFromCache( self, hash_ids ):
|
|
|
|
self._ExecuteMany( 'DELETE FROM local_hashes_cache WHERE hash_id = ?;', ( ( hash_id, ) for hash_id in hash_ids ) )
|
|
|
|
|
|
def GetHash( self, hash_id ) -> str:
|
|
|
|
self._PopulateHashIdsToHashesCache( ( hash_id, ) )
|
|
|
|
return self._hash_ids_to_hashes_cache[ hash_id ]
|
|
|
|
|
|
def GetHashes( self, hash_ids ) -> typing.List[ bytes ]:
|
|
|
|
self._PopulateHashIdsToHashesCache( hash_ids )
|
|
|
|
return [ self._hash_ids_to_hashes_cache[ hash_id ] for hash_id in hash_ids ]
|
|
|
|
|
|
def GetHashId( self, hash ) -> int:
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes_cache WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
return self.modules_hashes.GetHashId( hash )
|
|
|
|
else:
|
|
|
|
( hash_id, ) = result
|
|
|
|
|
|
return hash_id
|
|
|
|
|
|
def GetHashIds( self, hashes ) -> typing.Set[ int ]:
|
|
|
|
hash_ids = set()
|
|
hashes_not_in_cache = set()
|
|
|
|
for hash in hashes:
|
|
|
|
if hash is None:
|
|
|
|
continue
|
|
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes_cache WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
hashes_not_in_cache.add( hash )
|
|
|
|
else:
|
|
|
|
( hash_id, ) = result
|
|
|
|
hash_ids.add( hash_id )
|
|
|
|
|
|
|
|
if len( hashes_not_in_cache ) > 0:
|
|
|
|
hash_ids.update( self.modules_hashes.GetHashIds( hashes_not_in_cache ) )
|
|
|
|
|
|
return hash_ids
|
|
|
|
|
|
def GetHashIdsToHashes( self, hash_ids = None, hashes = None, create_new_hash_ids = True ) -> typing.Dict[ int, bytes ]:
|
|
|
|
if hash_ids is not None:
|
|
|
|
self._PopulateHashIdsToHashesCache( hash_ids )
|
|
|
|
hash_ids_to_hashes = { hash_id : self._hash_ids_to_hashes_cache[ hash_id ] for hash_id in hash_ids }
|
|
|
|
elif hashes is not None:
|
|
|
|
if not create_new_hash_ids:
|
|
|
|
hashes = [ hash for hash in hashes if self.HasHash( hash ) or self.modules_hashes.HasHash( hash ) ]
|
|
|
|
|
|
hash_ids_to_hashes = { self.GetHashId( hash ) : hash for hash in hashes }
|
|
|
|
|
|
return hash_ids_to_hashes
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
# we actually provide a backup, which we may want to automate later in mappings caches etc...
|
|
|
|
return []
|
|
|
|
|
|
def HasHash( self, hash: bytes ):
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes_cache WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
return result is not None
|
|
|
|
|
|
def HasHashId( self, hash_id: int ):
|
|
|
|
result = self._Execute( 'SELECT 1 FROM local_hashes_cache WHERE hash_id = ?;', ( hash_id, ) ).fetchone()
|
|
|
|
return result is not None
|
|
|
|
|
|
def Repopulate( self ):
|
|
|
|
self.ClearCache()
|
|
|
|
HG.client_controller.frame_splash_status.SetSubtext( 'reading local file data' )
|
|
|
|
local_hash_ids = self.modules_files_storage.GetCurrentHashIdsList( self.modules_services.combined_local_file_service_id )
|
|
|
|
BLOCK_SIZE = 10000
|
|
num_to_do = len( local_hash_ids )
|
|
|
|
for ( i, block_of_hash_ids ) in enumerate( HydrusData.SplitListIntoChunks( local_hash_ids, BLOCK_SIZE ) ):
|
|
|
|
HG.client_controller.frame_splash_status.SetSubtext( 'caching local file data {}'.format( HydrusData.ConvertValueRangeToPrettyString( i * BLOCK_SIZE, num_to_do ) ) )
|
|
|
|
self.AddHashIdsToCache( block_of_hash_ids )
|
|
|
|
|
|
|
|
class ClientDBCacheLocalTags( ClientDBModule.ClientDBModule ):
|
|
|
|
CAN_REPOPULATE_ALL_MISSING_DATA = True
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor, modules_tags: ClientDBMaster.ClientDBMasterTags, modules_services: ClientDBServices.ClientDBMasterServices, modules_mappings_counts: ClientDBMappingsCounts.ClientDBMappingsCounts ):
|
|
|
|
self.modules_tags = modules_tags
|
|
self.modules_services = modules_services
|
|
self.modules_mappings_counts = modules_mappings_counts
|
|
|
|
self._tag_ids_to_tags_cache = {}
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client tags local cache', cursor )
|
|
|
|
|
|
def _GetInitialTableGenerationDict( self ) -> dict:
|
|
|
|
return {
|
|
'external_caches.local_tags_cache' : ( 'CREATE TABLE IF NOT EXISTS {} ( tag_id INTEGER PRIMARY KEY, tag TEXT UNIQUE );', 400 )
|
|
}
|
|
|
|
|
|
def _PopulateTagIdsToTagsCache( self, tag_ids ):
|
|
|
|
if len( self._tag_ids_to_tags_cache ) > 100000:
|
|
|
|
if not isinstance( tag_ids, set ):
|
|
|
|
tag_ids = set( tag_ids )
|
|
|
|
|
|
self._tag_ids_to_tags_cache = { tag_id : tag for ( tag_id, tag ) in self._tag_ids_to_tags_cache.items() if tag_id in tag_ids }
|
|
|
|
|
|
uncached_tag_ids = { tag_id for tag_id in tag_ids if tag_id not in self._tag_ids_to_tags_cache }
|
|
|
|
if len( uncached_tag_ids ) > 0:
|
|
|
|
if len( uncached_tag_ids ) == 1:
|
|
|
|
( uncached_tag_id, ) = uncached_tag_ids
|
|
|
|
# this makes 0 or 1 rows, so do fetchall rather than fetchone
|
|
local_uncached_tag_ids_to_tags = { tag_id : tag for ( tag_id, tag ) in self._Execute( 'SELECT tag_id, tag FROM local_tags_cache WHERE tag_id = ?;', ( uncached_tag_id, ) ) }
|
|
|
|
else:
|
|
|
|
with self._MakeTemporaryIntegerTable( uncached_tag_ids, 'tag_id' ) as temp_table_name:
|
|
|
|
# temp tag_ids to actual tags
|
|
local_uncached_tag_ids_to_tags = { tag_id : tag for ( tag_id, tag ) in self._Execute( 'SELECT tag_id, tag FROM {} CROSS JOIN local_tags_cache USING ( tag_id );'.format( temp_table_name ) ) }
|
|
|
|
|
|
|
|
self._tag_ids_to_tags_cache.update( local_uncached_tag_ids_to_tags )
|
|
|
|
uncached_tag_ids = { tag_id for tag_id in uncached_tag_ids if tag_id not in self._tag_ids_to_tags_cache }
|
|
|
|
|
|
if len( uncached_tag_ids ) > 0:
|
|
|
|
tag_ids_to_tags = self.modules_tags.GetTagIdsToTags( tag_ids = uncached_tag_ids )
|
|
|
|
self._tag_ids_to_tags_cache.update( tag_ids_to_tags )
|
|
|
|
|
|
|
|
def _RepairRepopulateTables( self, table_names, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper ):
|
|
|
|
self.Repopulate()
|
|
|
|
cursor_transaction_wrapper.CommitAndBegin()
|
|
|
|
|
|
def AddTagIdsToCache( self, tag_ids ):
|
|
|
|
tag_ids_to_tags = self.modules_tags.GetTagIdsToTags( tag_ids = tag_ids )
|
|
|
|
self._ExecuteMany( 'INSERT OR IGNORE INTO local_tags_cache ( tag_id, tag ) VALUES ( ?, ? );', tag_ids_to_tags.items() )
|
|
|
|
|
|
def ClearCache( self ):
|
|
|
|
self._Execute( 'DELETE FROM local_tags_cache;' )
|
|
|
|
|
|
def DropTagIdsFromCache( self, tag_ids ):
|
|
|
|
self._ExecuteMany( 'DELETE FROM local_tags_cache WHERE tag_id = ?;', ( ( tag_id, ) for tag_id in tag_ids ) )
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
# we actually provide a backup, which we may want to automate later in mappings caches etc...
|
|
|
|
return []
|
|
|
|
|
|
def GetTag( self, tag_id ) -> str:
|
|
|
|
self._PopulateTagIdsToTagsCache( ( tag_id, ) )
|
|
|
|
return self._tag_ids_to_tags_cache[ tag_id ]
|
|
|
|
|
|
def GetTagId( self, tag ) -> int:
|
|
|
|
clean_tag = HydrusTags.CleanTag( tag )
|
|
|
|
try:
|
|
|
|
HydrusTags.CheckTagNotEmpty( clean_tag )
|
|
|
|
except HydrusExceptions.TagSizeException:
|
|
|
|
raise HydrusExceptions.TagSizeException( '"{}" tag seems not valid--when cleaned, it ends up with zero size!'.format( tag ) )
|
|
|
|
|
|
result = self._Execute( 'SELECT tag_id FROM local_tags_cache WHERE tag = ?;', ( tag, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
return self.modules_tags.GetTagId( tag )
|
|
|
|
else:
|
|
|
|
( tag_id, ) = result
|
|
|
|
|
|
return tag_id
|
|
|
|
|
|
def GetTagIdsToTags( self, tag_ids = None, tags = None ) -> typing.Dict[ int, str ]:
|
|
|
|
if tag_ids is not None:
|
|
|
|
self._PopulateTagIdsToTagsCache( tag_ids )
|
|
|
|
tag_ids_to_tags = { tag_id : self._tag_ids_to_tags_cache[ tag_id ] for tag_id in tag_ids }
|
|
|
|
elif tags is not None:
|
|
|
|
tag_ids_to_tags = { self.GetTagId( tag ) : tag for tag in tags }
|
|
|
|
|
|
return tag_ids_to_tags
|
|
|
|
|
|
def UpdateTagInCache( self, tag_id, tag ):
|
|
|
|
self._Execute( 'UPDATE local_tags_cache SET tag = ? WHERE tag_id = ?;', ( tag, tag_id ) )
|
|
|
|
if tag_id in self._tag_ids_to_tags_cache:
|
|
|
|
del self._tag_ids_to_tags_cache[ tag_id ]
|
|
|
|
|
|
|
|
def Repopulate( self ):
|
|
|
|
self.ClearCache()
|
|
|
|
tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
|
|
|
|
queries = [ self.modules_mappings_counts.GetQueryPhraseForCurrentTagIds( ClientTags.TAG_DISPLAY_STORAGE, self.modules_services.combined_local_file_service_id, tag_service_id ) for tag_service_id in tag_service_ids ]
|
|
|
|
full_query = '{};'.format( ' UNION '.join( queries ) )
|
|
|
|
for ( block_of_tag_ids, num_done, num_to_do ) in HydrusDB.ReadLargeIdQueryInSeparateChunks( self._c, full_query, 1024 ):
|
|
|
|
self.AddTagIdsToCache( block_of_tag_ids )
|
|
|
|
|
|
|