hydrus/hydrus/client/db/ClientDBMappingsCounts.py

577 lines
22 KiB
Python

import collections
import sqlite3
import typing
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusDBBase
from hydrus.client import ClientData
from hydrus.client.db import ClientDBModule
from hydrus.client.db import ClientDBServices
from hydrus.client.metadata import ClientTags
def GenerateCombinedFilesMappingsCountsCacheTableName( tag_display_type, tag_service_id ):
if tag_display_type == ClientTags.TAG_DISPLAY_STORAGE:
name = 'combined_files_ac_cache'
elif tag_display_type == ClientTags.TAG_DISPLAY_ACTUAL:
name = 'combined_files_display_ac_cache'
suffix = str( tag_service_id )
combined_counts_cache_table_name = 'external_caches.{}_{}'.format( name, suffix )
return combined_counts_cache_table_name
def GenerateSpecificCountsCacheTableName( tag_display_type, file_service_id, tag_service_id ):
if tag_display_type == ClientTags.TAG_DISPLAY_STORAGE:
name = 'specific_ac_cache'
elif tag_display_type == ClientTags.TAG_DISPLAY_ACTUAL:
name = 'specific_display_ac_cache'
suffix = '{}_{}'.format( file_service_id, tag_service_id )
specific_counts_cache_table_name = 'external_caches.{}_{}'.format( name, suffix )
return specific_counts_cache_table_name
class ClientDBMappingsCounts( ClientDBModule.ClientDBModule ):
CAN_REPOPULATE_ALL_MISSING_DATA = True
def __init__( self, cursor: sqlite3.Cursor, modules_services: ClientDBServices.ClientDBMasterServices ):
self.modules_services = modules_services
ClientDBModule.ClientDBModule.__init__( self, 'client mappings counts', cursor )
self._missing_storage_tag_service_pairs = set()
self._missing_display_tag_service_pairs = set()
def _GetServiceTableGenerationDictSingle( self, tag_display_type, file_service_id, tag_service_id ):
table_dict = {}
table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
# the version was earlier here but we updated when adding combined delete files and ipfs to these tables
version = 486 if file_service_id == self.modules_services.combined_local_media_service_id else 465
table_dict[ table_name ] = ( 'CREATE TABLE IF NOT EXISTS {} ( tag_id INTEGER PRIMARY KEY, current_count INTEGER, pending_count INTEGER );', version )
return table_dict
def _GetServiceTableGenerationDict( self, service_id ) -> dict:
tag_service_id = service_id
table_dict = {}
file_service_ids = list( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) )
file_service_ids.append( self.modules_services.combined_file_service_id )
for file_service_id in file_service_ids:
for tag_display_type in ( ClientTags.TAG_DISPLAY_STORAGE, ClientTags.TAG_DISPLAY_ACTUAL ):
single_table_dict = self._GetServiceTableGenerationDictSingle( tag_display_type, file_service_id, tag_service_id )
table_dict.update( single_table_dict )
return table_dict
def _GetServiceIdsWeGenerateDynamicTablesFor( self ):
return self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
def _RepairRepopulateTables( self, table_names, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper ):
file_service_ids = list( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_TAG_LOOKUP_CACHES ) )
file_service_ids.append( self.modules_services.combined_file_service_id )
tag_service_ids = list( self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES ) )
for tag_service_id in tag_service_ids:
for file_service_id in file_service_ids:
storage_table_dict_for_this = self._GetServiceTableGenerationDictSingle( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id )
storage_table_names_for_this = set( storage_table_dict_for_this.keys() )
if not storage_table_names_for_this.isdisjoint( table_names ):
self._missing_storage_tag_service_pairs.add( ( file_service_id, tag_service_id ) )
display_table_dict_for_this = self._GetServiceTableGenerationDictSingle( ClientTags.TAG_DISPLAY_ACTUAL, file_service_id, tag_service_id )
display_table_names_for_this = set( display_table_dict_for_this.keys() )
if not display_table_names_for_this.isdisjoint( table_names ):
self._missing_display_tag_service_pairs.add( ( file_service_id, tag_service_id ) )
def AddCounts( self, tag_display_type, file_service_id, tag_service_id, ac_cache_changes ):
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
new_tag_ids = set()
new_local_tag_ids = set()
for ( tag_id, current_delta, pending_delta ) in ac_cache_changes:
self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, current_count, pending_count ) VALUES ( ?, ?, ? );'.format( counts_cache_table_name ), ( tag_id, current_delta, pending_delta ) )
if self._GetRowCount() > 0:
new_tag_ids.add( tag_id )
if file_service_id == self.modules_services.combined_local_file_service_id: # and tag_service_id = all known tags
new_local_tag_ids.add( tag_id )
if len( new_tag_ids ) < len( ac_cache_changes ):
self._ExecuteMany( 'UPDATE {} SET current_count = current_count + ?, pending_count = pending_count + ? WHERE tag_id = ?;'.format( counts_cache_table_name ), ( ( num_current, num_pending, tag_id ) for ( tag_id, num_current, num_pending ) in ac_cache_changes if tag_id not in new_tag_ids ) )
return ( new_tag_ids, new_local_tag_ids )
def ClearCounts( self, tag_display_type, file_service_id, tag_service_id, keep_current = False, keep_pending = False ):
table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
if keep_current:
self._Execute( 'UPDATE {} SET pending_count = 0 WHERE pending_count > 0;'.format( table_name ) )
self._Execute( 'DELETE FROM {} WHERE current_count = 0 AND pending_count = 0;'.format( table_name ) )
elif keep_pending:
self._Execute( 'UPDATE {} SET current_count = 0 WHERE current_count > 0;'.format( table_name ) )
self._Execute( 'DELETE FROM {} WHERE current_count = 0 AND pending_count = 0;'.format( table_name ) )
else:
self._Execute( 'DELETE FROM {};'.format( table_name ) )
def CreateTables( self, tag_display_type, file_service_id, tag_service_id, populate_from_storage = False ):
table_generation_dict = self._GetServiceTableGenerationDictSingle( tag_display_type, file_service_id, tag_service_id )
for ( table_name, ( create_query_without_name, version_added ) ) in table_generation_dict.items():
self._CreateTable( create_query_without_name, table_name )
#
if tag_display_type == ClientTags.TAG_DISPLAY_ACTUAL and populate_from_storage:
display_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
storage_table_name = self.GetCountsCacheTableName( ClientTags.TAG_DISPLAY_STORAGE, file_service_id, tag_service_id )
self._Execute( 'INSERT OR IGNORE INTO {} ( tag_id, current_count, pending_count ) SELECT tag_id, current_count, pending_count FROM {};'.format( display_table_name, storage_table_name ) )
def DropTables( self, tag_display_type, file_service_id, tag_service_id ):
table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
self._Execute( 'DROP TABLE IF EXISTS {};'.format( table_name ) )
def FilterExistingTagIds( self, tag_display_type, file_service_id, tag_service_id, tag_ids_table_name ):
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
return self._STS( self._Execute( 'SELECT tag_id FROM {} CROSS JOIN {} USING ( tag_id );'.format( tag_ids_table_name, counts_cache_table_name ) ) )
def GetAutocompleteCountEstimate( self, tag_display_type: int, tag_service_id: int, file_service_id: int, tag_ids: typing.Collection[ int ], include_current_tags: bool, include_pending_tags: bool ):
count = 0
if not include_current_tags and not include_pending_tags:
return count
( current_count, pending_count ) = self.GetAutocompleteCountEstimateStatuses( tag_display_type, tag_service_id, file_service_id, tag_ids )
if include_current_tags:
count += current_count
if include_current_tags:
count += pending_count
return count
def GetAutocompleteCountEstimateStatuses( self, tag_display_type: int, tag_service_id: int, file_service_id: int, tag_ids: typing.Collection[ int ] ):
include_current_tags = True
include_pending_tags = True
ids_to_count = self.GetCounts( tag_display_type, tag_service_id, file_service_id, tag_ids, include_current_tags, include_pending_tags )
current_count = 0
pending_count = 0
for ( current_min, current_max, pending_min, pending_max ) in ids_to_count.values():
current_count += current_min
pending_count += pending_min
return ( current_count, pending_count )
def GetCounts( self, tag_display_type, tag_service_id, file_service_id, tag_ids, include_current, include_pending, domain_is_cross_referenced = True, zero_count_ok = False, job_key = None, tag_ids_table_name = None ):
if len( tag_ids ) == 0:
return {}
if tag_service_id == self.modules_services.combined_tag_service_id and file_service_id == self.modules_services.combined_file_service_id:
ids_to_count = {}
return ids_to_count
if tag_service_id == self.modules_services.combined_tag_service_id:
search_tag_service_ids = self.modules_services.GetServiceIds( HC.REAL_TAG_SERVICES )
else:
search_tag_service_ids = [ tag_service_id ]
cache_results = []
if len( tag_ids ) > 1:
if tag_ids_table_name is None:
with self._MakeTemporaryIntegerTable( tag_ids, 'tag_id' ) as temp_tag_id_table_name:
for search_tag_service_id in search_tag_service_ids:
if job_key is not None and job_key.IsCancelled():
return {}
cache_results.extend( self.GetCountsForTags( tag_display_type, file_service_id, search_tag_service_id, temp_tag_id_table_name ) )
else:
for search_tag_service_id in search_tag_service_ids:
if job_key is not None and job_key.IsCancelled():
return {}
cache_results.extend( self.GetCountsForTags( tag_display_type, file_service_id, search_tag_service_id, tag_ids_table_name ) )
else:
( tag_id, ) = tag_ids
for search_tag_service_id in search_tag_service_ids:
cache_results.extend( self.GetCountsForTag( tag_display_type, file_service_id, search_tag_service_id, tag_id ) )
#
ids_to_count = {}
for ( tag_id, current_count, pending_count ) in cache_results:
if not include_current:
current_count = 0
if not include_pending:
pending_count = 0
if current_count == 0 and pending_count == 0 and not zero_count_ok:
continue
current_max = current_count
pending_max = pending_count
if domain_is_cross_referenced:
# file counts are perfectly accurate
current_min = current_count
pending_min = pending_count
else:
# for instance this is a search for 'my files' deleted files, but we are searching on 'all deleted files' domain
current_min = 0
pending_min = 0
if tag_id in ids_to_count:
( existing_current_min, existing_current_max, existing_pending_min, existing_pending_max ) = ids_to_count[ tag_id ]
( current_min, current_max ) = ClientData.MergeCounts( existing_current_min, existing_current_max, current_min, current_max )
( pending_min, pending_max ) = ClientData.MergeCounts( existing_pending_min, existing_pending_max, pending_min, pending_max )
ids_to_count[ tag_id ] = ( current_min, current_max, pending_min, pending_max )
if zero_count_ok:
for tag_id in tag_ids:
if tag_id not in ids_to_count:
ids_to_count[ tag_id ] = ( 0, 0, 0, 0 )
return ids_to_count
def GetCountsCacheTableName( self, tag_display_type, file_service_id, tag_service_id ):
if file_service_id == self.modules_services.combined_file_service_id:
counts_cache_table_name = GenerateCombinedFilesMappingsCountsCacheTableName( tag_display_type, tag_service_id )
else:
counts_cache_table_name = GenerateSpecificCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
return counts_cache_table_name
def GetCountsEstimate( self, tag_display_type: int, tag_service_id: int, file_service_id: int, tag_ids: typing.Collection[ int ], include_current_tags: bool, include_pending_tags: bool ):
ids_to_count = collections.Counter()
if not include_current_tags and not include_pending_tags:
return ids_to_count
ids_to_count_statuses = self.GetCountsEstimateStatuses( tag_display_type, tag_service_id, file_service_id, tag_ids )
for ( tag_id, ( current_count, pending_count ) ) in ids_to_count_statuses.items():
count = 0
if include_current_tags:
count += current_count
if include_current_tags:
count += pending_count
ids_to_count[ tag_id ] = count
return ids_to_count
def GetCountsEstimateStatuses( self, tag_display_type: int, tag_service_id: int, file_service_id: int, tag_ids: typing.Collection[ int ] ):
include_current_tags = True
include_pending_tags = True
ids_to_count_full = self.GetCounts( tag_display_type, tag_service_id, file_service_id, tag_ids, include_current_tags, include_pending_tags )
ids_to_count_statuses = collections.defaultdict( lambda: ( 0, 0 ) )
for ( tag_id, ( current_min, current_max, pending_min, pending_max ) ) in ids_to_count_full.items():
ids_to_count_statuses[ tag_id ] = ( current_min, pending_min )
return ids_to_count_statuses
def GetCountsForTag( self, tag_display_type, file_service_id, tag_service_id, tag_id ):
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
return self._Execute( 'SELECT tag_id, current_count, pending_count FROM {} WHERE tag_id = ?;'.format( counts_cache_table_name ), ( tag_id, ) ).fetchall()
def GetCountsForTags( self, tag_display_type, file_service_id, tag_service_id, temp_tag_id_table_name ):
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
# temp tags to counts
return self._Execute( 'SELECT tag_id, current_count, pending_count FROM {} CROSS JOIN {} USING ( tag_id );'.format( temp_tag_id_table_name, counts_cache_table_name ) ).fetchall()
def GetCurrentPendingPositiveCountsAndWeights( self, tag_display_type, file_service_id, tag_service_id, tag_ids, tag_ids_table_name = None ):
include_current = True
include_pending = True
ids_to_count = self.GetCounts( tag_display_type, tag_service_id, file_service_id, tag_ids, include_current, include_pending, tag_ids_table_name = tag_ids_table_name )
current_tag_ids = set()
current_tag_weight = 0
pending_tag_ids = set()
pending_tag_weight = 0
for ( tag_id, ( current_min, current_max, pending_min, pending_max ) ) in ids_to_count.items():
if current_min > 0:
current_tag_ids.add( tag_id )
current_tag_weight += current_min
if pending_min > 0:
pending_tag_ids.add( tag_id )
pending_tag_weight += pending_min
return ( current_tag_ids, current_tag_weight, pending_tag_ids, pending_tag_weight )
def GetMissingTagCountServicePairs( self ):
return ( self._missing_storage_tag_service_pairs, self._missing_display_tag_service_pairs )
def GetQueryPhraseForCurrentTagIds( self, tag_display_type, file_service_id, tag_service_id ):
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
return 'SELECT tag_id FROM {} WHERE current_count > 0'.format( counts_cache_table_name )
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
tables_and_columns = []
if content_type == HC.CONTENT_TYPE_TAG:
table_dict = self._GetServicesTableGenerationDict()
for table_name in table_dict.keys():
tables_and_columns.append( ( table_name, 'tag_id' ) )
return tables_and_columns
def GetTotalCurrentCount( self, tag_display_type, file_service_id, tag_service_id ):
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
result = self._Execute( 'SELECT SUM( current_count ) FROM {};'.format( counts_cache_table_name ) ).fetchone()
if result is None or result[0] is None:
count = 0
else:
( count, ) = result
return count
def ReduceCounts( self, tag_display_type, file_service_id, tag_service_id, ac_cache_changes ):
# this takes positive counts, despite ultimately being a reduce guy
counts_cache_table_name = self.GetCountsCacheTableName( tag_display_type, file_service_id, tag_service_id )
deleted_tag_ids = set()
deleted_local_tag_ids = set()
for ( tag_id, current_delta, pending_delta ) in ac_cache_changes:
self._Execute( 'DELETE FROM {} WHERE tag_id = ? AND current_count = ? AND pending_count = ?;'.format( counts_cache_table_name ), ( tag_id, current_delta, pending_delta ) )
if self._GetRowCount() > 0:
deleted_tag_ids.add( tag_id )
if file_service_id == self.modules_services.combined_local_file_service_id: # and tag_service_id = all known tags
deleted_local_tag_ids.add( tag_id )
if len( deleted_tag_ids ) < len( ac_cache_changes ):
self._ExecuteMany( 'UPDATE {} SET current_count = current_count - ?, pending_count = pending_count - ? WHERE tag_id = ?;'.format( counts_cache_table_name ), ( ( current_delta, pending_delta, tag_id ) for ( tag_id, current_delta, pending_delta ) in ac_cache_changes if tag_id not in deleted_tag_ids ) )
return ( deleted_tag_ids, deleted_local_tag_ids )