import collections import sqlite3 import typing from hydrus.core import HydrusConstants as HC from hydrus.core import HydrusData from hydrus.core import HydrusDB from hydrus.core import HydrusDBBase from hydrus.core import HydrusExceptions from hydrus.client import ClientConstants as CC from hydrus.client import ClientLocation from hydrus.client.db import ClientDBMaster from hydrus.client.db import ClientDBModule from hydrus.client.db import ClientDBServices def GenerateFilesTableNames( service_id: int ) -> typing.Tuple[ str, str, str, str ]: suffix = str( service_id ) current_files_table_name = 'main.current_files_{}'.format( suffix ) deleted_files_table_name = 'main.deleted_files_{}'.format( suffix ) pending_files_table_name = 'main.pending_files_{}'.format( suffix ) petitioned_files_table_name = 'main.petitioned_files_{}'.format( suffix ) return ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) def GenerateFilesTableName( service_id: int, status: int ) -> str: ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) if status == HC.CONTENT_STATUS_CURRENT: return current_files_table_name elif status == HC.CONTENT_STATUS_DELETED: return deleted_files_table_name elif status == HC.CONTENT_STATUS_PENDING: return pending_files_table_name else: return petitioned_files_table_name class DBLocationContext( object ): def __init__( self, location_context: ClientLocation.LocationContext ): self.location_context = location_context def GetLocationContext( self ) -> ClientLocation.LocationContext: return self.location_context def GetMultipleFilesTableNames( self ): raise HydrusExceptions.DataMissing( 'Sorry, this DB Location Context has no multiple file tables!' ) def GetSingleFilesTableName( self ): raise HydrusExceptions.DataMissing( 'Sorry, this DB Location Context has no single file table!' ) def GetTableJoinIteratedByFileDomain( self, table_phrase: str ) -> str: raise NotImplementedError() def GetTableJoinLimitedByFileDomain( self, table_phrase: str ) -> str: raise NotImplementedError() def SingleTableIsFast( self ) -> bool: return False class DBLocationContextAllKnownFiles( DBLocationContext ): def GetTableJoinIteratedByFileDomain( self, table_phrase: str ) -> str: return table_phrase def GetTableJoinLimitedByFileDomain( self, table_phrase: str ) -> str: return table_phrase class DBLocationContextLeaf( DBLocationContext ): def __init__( self, location_context: ClientLocation.LocationContext, files_table_name: str ): DBLocationContext.__init__( self, location_context ) self._files_table_name = files_table_name def GetMultipleFilesTableNames( self ): return [ self.GetSingleFilesTableName() ] def GetSingleFilesTableName( self ): return self._files_table_name def GetTableJoinIteratedByFileDomain( self, table_phrase: str ) -> str: return '{} CROSS JOIN {} USING ( hash_id )'.format( self._files_table_name, table_phrase ) def GetTableJoinLimitedByFileDomain( self, table_phrase: str ) -> str: return '{} CROSS JOIN {} USING ( hash_id )'.format( table_phrase, self._files_table_name ) def SingleTableIsFast( self ) -> bool: return True class DBLocationContextBranch( DBLocationContext, ClientDBModule.ClientDBModule ): # this still sucks and should be random and then dropped neatly by a manager or something so we can have more than one of these guys at once SINGLE_TABLE_NAME = 'mem.temp_file_storage_hash_id' def __init__( self, cursor: sqlite3.Cursor, location_context: ClientLocation.LocationContext, files_table_names: typing.Collection[ str ] ): ClientDBModule.ClientDBModule.__init__( self, 'db location (branch)', cursor ) DBLocationContext.__init__( self, location_context ) self._files_table_names = files_table_names self._single_table_initialised = False def _InitialiseSingleTableIfNeeded( self ): if self._single_table_initialised: return result = self._Execute( 'SELECT 1 FROM mem.sqlite_master WHERE name = ?;', ( self.SINGLE_TABLE_NAME, ) ).fetchone() if result is None: self._Execute( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY );'.format( self.SINGLE_TABLE_NAME ) ) else: self._Execute( 'DELETE FROM {};'.format( self.SINGLE_TABLE_NAME ) ) select_query = ' UNION '.join( ( 'SELECT hash_id FROM {}'.format( table_name ) for table_name in self._files_table_names ) ) # these notes are old and transplanted from a previous creation method that prepopulated the table and sent it to this class already made. now it happens on demand # feel free to clear out or reconsider, but my current feeling is we have to bite the bullet a little here. best use of time is working on gettablejoiniterated/limitedby. the callers there may be ok with multiple access too # # ok, so I _can_ just go: # # files_table_name = '({})'.format( select_query ) # # here and not populate an actual temp table. basically making a VIEW. this seems to be SEARCH for two tables and SCAN for three or more, at least on newer SQLite. I'm pretty sure older SQLite has trouble optimising # so it is potentially super fast, but worst case is really bad since that SCAN will happen over and over # the temp table population puts a fixed one-time SCAN overhead so is lame but stable # the ideal answer is to rewrite all dblocationcontext code to handle multiple table names. this isn't possible for some requests like duplicates, although honestly they could do the table population themselves # THE ANSWER: since that is more complicated, what I really need is a new subclass of DBLocationClass object that can handle more complicated situations, write a method for single/multiple, # and if the consumer needs the single (as some dupe code does), then the class itself borrows a temp table name from a manager class, like tempinttables, and populates it there and then # MOVING THIS STUFF TO THIS NEW BRANCH OBJECT IS THIS WORK, MORE CAN BE DONE # # another possible solution might be another file table that I always keep synced, maybe something like ( hash_id, service_id, status ). that might be quickly searchable as a VIEW. quicker than this anyway # some experimentation with this results in some really bad worst case query planning at times. most of the time it is great, but sometimes it can't figure out the hash_id as the thing to SEARCH with # # another idea, if we are going to end up adding 'sync' code, is to have multiple not-so-temp tables for different service combinations and then just invalidate them (or even update them) on file changes # we can just re-use them mate self._Execute( 'INSERT OR IGNORE INTO {} ( hash_id ) {};'.format( self.SINGLE_TABLE_NAME, select_query ) ) self._single_table_initialised = True def GetMultipleFilesTableNames( self ): return self._files_table_names def GetSingleFilesTableName( self ): self._InitialiseSingleTableIfNeeded() return self.SINGLE_TABLE_NAME def GetTableJoinIteratedByFileDomain( self, table_phrase: str ) -> str: self._InitialiseSingleTableIfNeeded() return '{} CROSS JOIN {} USING ( hash_id )'.format( self.SINGLE_TABLE_NAME, table_phrase ) def GetTableJoinLimitedByFileDomain( self, table_phrase: str ) -> str: self._InitialiseSingleTableIfNeeded() return '{} CROSS JOIN {} USING ( hash_id )'.format( table_phrase, self.SINGLE_TABLE_NAME ) def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]: return [] def SingleTableIsFast( self ) -> bool: return False class ClientDBFilesStorage( ClientDBModule.ClientDBModule ): def __init__( self, cursor: sqlite3.Cursor, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper, modules_services: ClientDBServices.ClientDBMasterServices, modules_hashes: ClientDBMaster.ClientDBMasterHashes, modules_texts: ClientDBMaster.ClientDBMasterTexts ): self._cursor_transaction_wrapper = cursor_transaction_wrapper self.modules_services = modules_services self.modules_hashes = modules_hashes self.modules_texts = modules_texts ClientDBModule.ClientDBModule.__init__( self, 'client file locations', cursor ) def _GetInitialTableGenerationDict( self ) -> dict: return { 'main.local_file_deletion_reasons' : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, reason_id INTEGER );', 400 ), 'main.deferred_physical_file_deletes' : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY );', 464 ), 'main.deferred_physical_thumbnail_deletes' : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY );', 464 ) } def _GetServiceIndexGenerationDict( self, service_id ) -> dict: ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) index_generation_dict = {} index_generation_dict[ current_files_table_name ] = [ ( [ 'timestamp' ], False, 447 ) ] index_generation_dict[ deleted_files_table_name ] = [ ( [ 'timestamp' ], False, 447 ), ( [ 'original_timestamp' ], False, 447 ) ] index_generation_dict[ petitioned_files_table_name ] = [ ( [ 'reason_id' ], False, 447 ) ] return index_generation_dict def _GetServiceTableGenerationDict( self, service_id ) -> dict: ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) return { current_files_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, timestamp INTEGER );', 447 ), deleted_files_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, timestamp INTEGER, original_timestamp INTEGER );', 447 ), pending_files_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY );', 447 ), petitioned_files_table_name : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, reason_id INTEGER );', 447 ) } def _GetServiceIdsWeGenerateDynamicTablesFor( self ): return self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) def AddFiles( self, service_id, insert_rows ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) self._ExecuteMany( 'INSERT OR IGNORE INTO {} VALUES ( ?, ? );'.format( current_files_table_name ), ( ( hash_id, timestamp ) for ( hash_id, timestamp ) in insert_rows ) ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( pending_files_table_name ), ( ( hash_id, ) for ( hash_id, timestamp ) in insert_rows ) ) if service_id == self.modules_services.combined_local_file_service_id: for ( hash_id, timestamp ) in insert_rows: self.ClearDeferredPhysicalDeleteIds( file_hash_id = hash_id, thumbnail_hash_id = hash_id ) elif self.modules_services.GetService( service_id ).GetServiceType() == HC.FILE_REPOSITORY: # it may be the case the files were just uploaded after being deleted self.DeferFilesDeleteIfNowOrphan( [ hash_id for ( hash_id, timestamp ) in insert_rows ] ) pending_changed = self._GetRowCount() > 0 return pending_changed def ClearDeferredPhysicalDelete( self, file_hash = None, thumbnail_hash = None ): file_hash_id = None if file_hash is None else self.modules_hashes.GetHashId( file_hash ) thumbnail_hash_id = None if thumbnail_hash is None else self.modules_hashes.GetHashId( thumbnail_hash ) self.ClearDeferredPhysicalDeleteIds( file_hash_id = file_hash_id, thumbnail_hash_id = thumbnail_hash_id ) def ClearDeferredPhysicalDeleteIds( self, file_hash_id = None, thumbnail_hash_id = None ): if file_hash_id is not None: self._Execute( 'DELETE FROM deferred_physical_file_deletes WHERE hash_id = ?;', ( file_hash_id, ) ) if thumbnail_hash_id is not None: self._Execute( 'DELETE FROM deferred_physical_thumbnail_deletes WHERE hash_id = ?;', ( thumbnail_hash_id, ) ) def ClearDeleteRecord( self, service_id, hash_ids ): deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( deleted_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) num_deleted = self._GetRowCount() return num_deleted def ClearFileDeletionReason( self, hash_ids ): self._ExecuteMany( 'DELETE FROM local_file_deletion_reasons WHERE hash_id = ?;', ( ( hash_id, ) for hash_id in hash_ids ) ) def ClearFilesTables( self, service_id: int, keep_pending = False ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) self._Execute( 'DELETE FROM {};'.format( current_files_table_name ) ) self._Execute( 'DELETE FROM {};'.format( deleted_files_table_name ) ) if not keep_pending: self._Execute( 'DELETE FROM {};'.format( pending_files_table_name ) ) self._Execute( 'DELETE FROM {};'.format( petitioned_files_table_name ) ) def ClearLocalDeleteRecord( self, hash_ids = None ): # we delete from everywhere, but not for files currently in the trash service_ids_to_nums_cleared = {} local_non_trash_service_ids = self.modules_services.GetServiceIds( ( HC.COMBINED_LOCAL_FILE, HC.COMBINED_LOCAL_MEDIA, HC.LOCAL_FILE_DOMAIN ) ) if hash_ids is None: trash_current_files_table_name = GenerateFilesTableName( self.modules_services.trash_service_id, HC.CONTENT_STATUS_CURRENT ) for service_id in local_non_trash_service_ids: deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) self._Execute( 'DELETE FROM {} WHERE hash_id NOT IN ( SELECT hash_id FROM {} );'.format( deleted_files_table_name, trash_current_files_table_name ) ) num_cleared = self._GetRowCount() service_ids_to_nums_cleared[ service_id ] = num_cleared self._Execute( 'DELETE FROM local_file_deletion_reasons WHERE hash_id NOT IN ( SELECT hash_id FROM {} );'.format( trash_current_files_table_name ) ) else: trashed_hash_ids = self.FilterHashIdsToStatus( self.modules_services.trash_service_id, hash_ids, HC.CONTENT_STATUS_CURRENT ) ok_to_clear_hash_ids = set( hash_ids ).difference( trashed_hash_ids ) if len( ok_to_clear_hash_ids ) > 0: for service_id in local_non_trash_service_ids: deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( deleted_files_table_name ), ( ( hash_id, ) for hash_id in ok_to_clear_hash_ids ) ) num_cleared = self._GetRowCount() service_ids_to_nums_cleared[ service_id ] = num_cleared self.ClearFileDeletionReason( ok_to_clear_hash_ids ) return service_ids_to_nums_cleared def DeferFilesDeleteIfNowOrphan( self, hash_ids, definitely_no_thumbnails = False, ignore_service_id = None ): orphan_hash_ids = self.FilterOrphanFileHashIds( hash_ids, ignore_service_id = ignore_service_id ) if len( orphan_hash_ids ) > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO deferred_physical_file_deletes ( hash_id ) VALUES ( ? );', ( ( hash_id, ) for hash_id in orphan_hash_ids ) ) self._cursor_transaction_wrapper.pub_after_job( 'notify_new_physical_file_deletes' ) self._cursor_transaction_wrapper.pub_after_job( 'notify_new_physical_file_delete_numbers' ) if not definitely_no_thumbnails: orphan_hash_ids = self.FilterOrphanThumbnailHashIds( hash_ids, ignore_service_id = ignore_service_id ) if len( orphan_hash_ids ) > 0: self._ExecuteMany( 'INSERT OR IGNORE INTO deferred_physical_thumbnail_deletes ( hash_id ) VALUES ( ? );', ( ( hash_id, ) for hash_id in orphan_hash_ids ) ) self._cursor_transaction_wrapper.pub_after_job( 'notify_new_physical_file_deletes' ) self._cursor_transaction_wrapper.pub_after_job( 'notify_new_physical_file_delete_numbers' ) def DeletePending( self, service_id: int ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) if self.modules_services.GetService( service_id ).GetServiceType() == HC.FILE_REPOSITORY: for ( block_of_hash_ids, num_done, num_to_do ) in HydrusDB.ReadLargeIdQueryInSeparateChunks( self._c, 'SELECT hash_id FROM {};'.format( pending_files_table_name ), 1024 ): self.DeferFilesDeleteIfNowOrphan( block_of_hash_ids, ignore_service_id = service_id ) self._Execute( 'DELETE FROM {};'.format( pending_files_table_name ) ) self._Execute( 'DELETE FROM {};'.format( petitioned_files_table_name ) ) def DropFilesTables( self, service_id: int ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) if self.modules_services.GetService( service_id ).GetServiceType() == HC.FILE_REPOSITORY: for ( block_of_hash_ids, num_done, num_to_do ) in HydrusDB.ReadLargeIdQueryInSeparateChunks( self._c, 'SELECT hash_id FROM {};'.format( pending_files_table_name ), 1024 ): self.DeferFilesDeleteIfNowOrphan( block_of_hash_ids, ignore_service_id = service_id ) self._Execute( 'DROP TABLE IF EXISTS {};'.format( current_files_table_name ) ) self._Execute( 'DROP TABLE IF EXISTS {};'.format( deleted_files_table_name ) ) self._Execute( 'DROP TABLE IF EXISTS {};'.format( pending_files_table_name ) ) self._Execute( 'DROP TABLE IF EXISTS {};'.format( petitioned_files_table_name ) ) def FilterAllCurrentHashIds( self, hash_ids, just_these_service_ids = None ): if just_these_service_ids is None: service_ids = self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) else: service_ids = just_these_service_ids current_hash_ids = set() with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: for service_id in service_ids: current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) hash_id_iterator = self._STI( self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, current_files_table_name ) ) ) current_hash_ids.update( hash_id_iterator ) return current_hash_ids def FilterAllPendingHashIds( self, hash_ids, just_these_service_ids = None ): if just_these_service_ids is None: service_ids = self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) else: service_ids = just_these_service_ids pending_hash_ids = set() with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: for service_id in service_ids: pending_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PENDING ) hash_id_iterator = self._STI( self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, pending_files_table_name ) ) ) pending_hash_ids.update( hash_id_iterator ) return pending_hash_ids def FilterHashIds( self, location_context: ClientLocation.LocationContext, hash_ids ) -> set: if location_context.IsEmpty(): return set() if location_context.IsAllKnownFiles(): return hash_ids filtered_hash_ids = set() with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: for file_service_key in location_context.current_service_keys: service_id = self.modules_services.GetServiceId( file_service_key ) current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) matching_hash_ids = self._STS( self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, current_files_table_name ) ) ) if len( matching_hash_ids ) > 0: filtered_hash_ids.update( matching_hash_ids ) if len( filtered_hash_ids ) == len( hash_ids ): return filtered_hash_ids self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( temp_hash_ids_table_name ), ( ( hash_id, ) for hash_id in matching_hash_ids ) ) for file_service_key in location_context.deleted_service_keys: service_id = self.modules_services.GetServiceId( file_service_key ) deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) matching_hash_ids = self._STS( self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, deleted_files_table_name ) ) ) if len( matching_hash_ids ) > 0: filtered_hash_ids.update( matching_hash_ids ) if len( filtered_hash_ids ) == len( hash_ids ): return filtered_hash_ids self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( temp_hash_ids_table_name ), ( ( hash_id, ) for hash_id in matching_hash_ids ) ) return filtered_hash_ids def FilterHashIdsToStatus( self, service_id, hash_ids, status ) -> typing.Set[ int ]: if service_id == self.modules_services.combined_file_service_id: if status == HC.CONTENT_STATUS_CURRENT: return set( hash_ids ) else: return set() with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: files_table_name = GenerateFilesTableName( service_id, status ) result_hash_ids = self._STS( self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, files_table_name ) ) ) return result_hash_ids def FilterOrphanFileHashIds( self, hash_ids, ignore_service_id = None ): useful_hash_ids = self.FilterHashIdsToStatus( self.modules_services.combined_local_file_service_id, hash_ids, HC.CONTENT_STATUS_CURRENT ) orphan_hash_ids = set( hash_ids ).difference( useful_hash_ids ) if len( orphan_hash_ids ) > 0: just_these_service_ids = self.modules_services.GetServiceIds( ( HC.FILE_REPOSITORY, ) ) if ignore_service_id is not None: just_these_service_ids.discard( ignore_service_id ) # anything pending upload somewhere, we want to keep useful_hash_ids = self.FilterAllPendingHashIds( orphan_hash_ids, just_these_service_ids = just_these_service_ids ) orphan_hash_ids.difference_update( useful_hash_ids ) return orphan_hash_ids def FilterOrphanThumbnailHashIds( self, hash_ids, ignore_service_id = None ): services = self.modules_services.GetServices( ( HC.COMBINED_LOCAL_FILE, HC.FILE_REPOSITORY ) ) current_service_keys = [ service.GetServiceKey() for service in services ] if ignore_service_id is not None: service = self.modules_services.GetService( ignore_service_id ) ignore_service_key = service.GetServiceKey() if ignore_service_key in current_service_keys: current_service_keys.remove( ignore_service_key ) location_context = ClientLocation.LocationContext.STATICCreateAllCurrent( current_service_keys ) current_hash_ids = self.FilterHashIds( location_context, hash_ids ) orphan_hash_ids = set( hash_ids ).difference( current_hash_ids ) if len( orphan_hash_ids ) > 0: just_these_service_ids = self.modules_services.GetServiceIds( ( HC.FILE_REPOSITORY, ) ) if ignore_service_id is not None: just_these_service_ids.discard( ignore_service_id ) # anything pending upload somewhere, we want to keep since we'll be wanting the thumb soon anyway useful_hash_ids = self.FilterAllPendingHashIds( orphan_hash_ids, just_these_service_ids = just_these_service_ids ) orphan_hash_ids.difference_update( useful_hash_ids ) # we could try and be clever and say "and then filter xxx by 'mimes with thumbnails' using files_info", but let's not get too ahead of ourselves # the places where the difference would matter, like some client going back to an earlier version where .clips no longer have thumbs on a file repo, are complicated and would have little benefit when correct # no need to sharpen that knife too much return orphan_hash_ids def GenerateFilesTables( self, service_id: int ): table_generation_dict = self._GetServiceTableGenerationDict( service_id ) for ( table_name, ( create_query_without_name, version_added ) ) in table_generation_dict.items(): self._CreateTable( create_query_without_name, table_name ) index_generation_dict = self._GetServiceIndexGenerationDict( service_id ) for ( table_name, columns, unique, version_added ) in self._FlattenIndexGenerationDict( index_generation_dict ): self._CreateIndex( table_name, columns, unique = unique ) def GetAPendingHashId( self, service_id ): pending_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PENDING ) result = self._Execute( 'SELECT hash_id FROM {};'.format( pending_files_table_name ) ).fetchone() if result is None: return None else: ( hash_id, ) = result return hash_id def GetAPetitionedHashId( self, service_id ): petitioned_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PETITIONED ) result = self._Execute( 'SELECT hash_id FROM {};'.format( petitioned_files_table_name ) ).fetchone() if result is None: return None else: ( hash_id, ) = result return hash_id def GetCurrentFilesCount( self, service_id, only_viewable = False ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) if only_viewable: # hashes to mimes result = self._Execute( 'SELECT COUNT( * ) FROM {} CROSS JOIN files_info USING ( hash_id ) WHERE mime IN {};'.format( current_files_table_name, HydrusData.SplayListForDB( HC.SEARCHABLE_MIMES ) ) ).fetchone() else: result = self._Execute( 'SELECT COUNT( * ) FROM {};'.format( current_files_table_name ) ).fetchone() ( count, ) = result return count def GetCurrentFilesInboxCount( self, service_id ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) result = self._Execute( 'SELECT COUNT( * ) FROM {} CROSS JOIN file_inbox USING ( hash_id );'.format( current_files_table_name ) ).fetchone() ( count, ) = result return count def GetCurrentHashIdsList( self, service_id ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) hash_ids = self._STL( self._Execute( 'SELECT hash_id FROM {};'.format( current_files_table_name ) ) ) return hash_ids def GetCurrentFilesTotalSize( self, service_id ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) # hashes to size result = self._Execute( 'SELECT SUM( size ) FROM {} CROSS JOIN files_info USING ( hash_id );'.format( current_files_table_name ) ).fetchone() ( count, ) = result return count def GetCurrentHashIdsToTimestamps( self, service_id, hash_ids ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: rows = dict( self._Execute( 'SELECT hash_id, timestamp FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, current_files_table_name ) ) ) return rows def GetCurrentTimestamp( self, service_id: int, hash_id: int ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) result = self._Execute( 'SELECT timestamp FROM {} WHERE hash_id = ?;'.format( current_files_table_name ), ( hash_id, ) ).fetchone() if result is None: return None else: ( timestamp, ) = result return timestamp def GetDeferredPhysicalDelete( self ): file_result = self._Execute( 'SELECT hash_id FROM deferred_physical_file_deletes LIMIT 1;' ).fetchone() if file_result is not None: ( hash_id, ) = file_result file_result = self.modules_hashes.GetHash( hash_id ) thumbnail_result = self._Execute( 'SELECT hash_id FROM deferred_physical_thumbnail_deletes LIMIT 1;' ).fetchone() if thumbnail_result is not None: ( hash_id, ) = thumbnail_result thumbnail_result = self.modules_hashes.GetHash( hash_id ) return ( file_result, thumbnail_result ) def GetDeferredPhysicalDeleteCounts( self ): ( num_files, ) = self._Execute( 'SELECT COUNT( * ) FROM deferred_physical_file_deletes;' ).fetchone() ( num_thumbnails, ) = self._Execute( 'SELECT COUNT( * ) FROM deferred_physical_thumbnail_deletes;' ).fetchone() return ( num_files, num_thumbnails ) def GetDeletedFilesCount( self, service_id: int ) -> int: deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) result = self._Execute( 'SELECT COUNT( * ) FROM {};'.format( deleted_files_table_name ) ).fetchone() ( count, ) = result return count def GetDeletedHashIdsList( self, service_id ): deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) hash_ids = self._STL( self._Execute( 'SELECT hash_id FROM {};'.format( deleted_files_table_name ) ) ) return hash_ids def GetDeletionStatus( self, service_id, hash_id ): # can have a value here and just be in trash, so we fetch it whatever the end result result = self._Execute( 'SELECT reason_id FROM local_file_deletion_reasons WHERE hash_id = ?;', ( hash_id, ) ).fetchone() if result is None: file_deletion_reason = 'Unknown deletion reason.' else: ( reason_id, ) = result file_deletion_reason = self.modules_texts.GetText( reason_id ) deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) is_deleted = False timestamp = None result = self._Execute( 'SELECT timestamp FROM {} WHERE hash_id = ?;'.format( deleted_files_table_name ), ( hash_id, ) ).fetchone() if result is not None: is_deleted = True ( timestamp, ) = result return ( is_deleted, timestamp, file_deletion_reason ) def GetDBLocationContext( self, location_context: ClientLocation.LocationContext ): if location_context.IsEmpty(): location_context = ClientLocation.LocationContext.STATICCreateSimple( CC.COMBINED_FILE_SERVICE_KEY ) if location_context.IsAllKnownFiles(): # no table set, obviously return DBLocationContextAllKnownFiles( location_context ) table_names = [] for current_service_key in location_context.current_service_keys: service_id = self.modules_services.GetServiceId( current_service_key ) table_names.append( GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) ) for deleted_service_key in location_context.deleted_service_keys: service_id = self.modules_services.GetServiceId( deleted_service_key ) table_names.append( GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) ) if len( table_names ) == 1: files_table_name = table_names[0] return DBLocationContextLeaf( location_context, files_table_name ) else: return DBLocationContextBranch( self._c, location_context, table_names ) def GetHashIdsToCurrentServiceIds( self, temp_hash_ids_table_name ): hash_ids_to_current_file_service_ids = collections.defaultdict( list ) for service_id in self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) for hash_id in self._STI( self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, current_files_table_name ) ) ): hash_ids_to_current_file_service_ids[ hash_id ].append( service_id ) return hash_ids_to_current_file_service_ids def GetHashIdsToFileDeletionReasons( self, hash_ids_table_name ): return dict( self._Execute( 'SELECT hash_id, text FROM {} CROSS JOIN local_file_deletion_reasons USING ( hash_id ) CROSS JOIN texts ON ( reason_id = text_id );'.format( hash_ids_table_name ) ) ) def GetHashIdsToServiceInfoDicts( self, temp_hash_ids_table_name ): hash_ids_to_current_file_service_ids_and_timestamps = collections.defaultdict( list ) hash_ids_to_deleted_file_service_ids_and_timestamps = collections.defaultdict( list ) hash_ids_to_pending_file_service_ids = collections.defaultdict( list ) hash_ids_to_petitioned_file_service_ids = collections.defaultdict( list ) for service_id in self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) for ( hash_id, timestamp ) in self._Execute( 'SELECT hash_id, timestamp FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, current_files_table_name ) ): hash_ids_to_current_file_service_ids_and_timestamps[ hash_id ].append( ( service_id, timestamp ) ) for ( hash_id, timestamp, original_timestamp ) in self._Execute( 'SELECT hash_id, timestamp, original_timestamp FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, deleted_files_table_name ) ): hash_ids_to_deleted_file_service_ids_and_timestamps[ hash_id ].append( ( service_id, timestamp, original_timestamp ) ) for hash_id in self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, pending_files_table_name ) ): hash_ids_to_pending_file_service_ids[ hash_id ].append( service_id ) for hash_id in self._Execute( 'SELECT hash_id FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, petitioned_files_table_name ) ): hash_ids_to_petitioned_file_service_ids[ hash_id ].append( service_id ) return ( hash_ids_to_current_file_service_ids_and_timestamps, hash_ids_to_deleted_file_service_ids_and_timestamps, hash_ids_to_pending_file_service_ids, hash_ids_to_petitioned_file_service_ids ) def GetLocationContextForAllServicesDeletedFiles( self ) -> ClientLocation.LocationContext: deleted_service_keys = { service.GetServiceKey() for service in self.modules_services.GetServices( limited_types = HC.FILE_SERVICES_COVERED_BY_COMBINED_DELETED_FILE ) } location_context = ClientLocation.LocationContext( [], deleted_service_keys ) return location_context def GetNumLocal( self, service_id: int ) -> int: current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) combined_local_current_files_table_name = GenerateFilesTableName( self.modules_services.combined_local_file_service_id, HC.CONTENT_STATUS_CURRENT ) ( num_local, ) = self._Execute( 'SELECT COUNT( * ) FROM {} CROSS JOIN {} USING ( hash_id );'.format( current_files_table_name, combined_local_current_files_table_name ) ).fetchone() return num_local def GetPendingFilesCount( self, service_id: int ) -> int: pending_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PENDING ) result = self._Execute( 'SELECT COUNT( * ) FROM {};'.format( pending_files_table_name ) ).fetchone() ( count, ) = result return count def GetPetitionedFilesCount( self, service_id: int ) -> int: petitioned_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PETITIONED ) result = self._Execute( 'SELECT COUNT( * ) FROM {};'.format( petitioned_files_table_name ) ).fetchone() ( count, ) = result return count def GetServiceIdCounts( self, hash_ids ) -> typing.Dict[ int, int ]: with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: service_ids_to_counts = {} for service_id in self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ): current_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_CURRENT ) # temp hashes to files ( count, ) = self._Execute( 'SELECT COUNT( * ) FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, current_files_table_name ) ).fetchone() service_ids_to_counts[ service_id ] = count return service_ids_to_counts def GetSomePetitionedRows( self, service_id: int ): petitioned_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PETITIONED ) petitioned_rows = list( HydrusData.BuildKeyToListDict( self._Execute( 'SELECT reason_id, hash_id FROM {} ORDER BY reason_id LIMIT 100;'.format( petitioned_files_table_name ) ) ).items() ) return petitioned_rows def GetTableJoinIteratedByFileDomain( self, service_id, table_name, status ): files_table_name = GenerateFilesTableName( service_id, status ) return '{} CROSS JOIN {} USING ( hash_id )'.format( files_table_name, table_name ) def GetTableJoinLimitedByFileDomain( self, service_id, table_name, status ): files_table_name = GenerateFilesTableName( service_id, status ) return '{} CROSS JOIN {} USING ( hash_id )'.format( table_name, files_table_name ) def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]: tables_and_columns = [ ( 'deferred_physical_file_deletes', 'hash_id' ), ( 'deferred_physical_thumbnail_deletes', 'hash_id' ) ] if content_type == HC.CONTENT_TYPE_HASH: for service_id in self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) tables_and_columns.extend( [ ( current_files_table_name, 'hash_id' ), ( deleted_files_table_name, 'hash_id' ), ( pending_files_table_name, 'hash_id' ), ( petitioned_files_table_name, 'hash_id' ) ] ) return tables_and_columns def GetUndeleteRows( self, service_id, hash_ids ): deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) with self._MakeTemporaryIntegerTable( hash_ids, 'hash_id' ) as temp_hash_ids_table_name: rows = self._Execute( 'SELECT hash_id, original_timestamp FROM {} CROSS JOIN {} USING ( hash_id );'.format( temp_hash_ids_table_name, deleted_files_table_name ) ).fetchall() return rows def GroupHashIdsByTagCachedFileServiceId( self, hash_ids, hash_ids_table_name, hash_ids_to_current_file_service_ids = None ): # when we would love to do a fast cache lookup, it is useful to know if all the hash_ids are on one or two common file domains if hash_ids_to_current_file_service_ids is None: hash_ids_to_current_file_service_ids = self.GetHashIdsToCurrentServiceIds( hash_ids_table_name ) cached_file_service_ids = set( self.modules_services.GetServiceIds( HC.FILE_SERVICES_WITH_SPECIFIC_MAPPING_CACHES ) ) file_service_ids_to_hash_ids = collections.defaultdict( set ) for ( hash_id, file_service_ids ) in hash_ids_to_current_file_service_ids.items(): for file_service_id in file_service_ids: if file_service_id in cached_file_service_ids: file_service_ids_to_hash_ids[ file_service_id ].add( hash_id ) # ok, we have our map, let's sort it out # sorting by most comprehensive service_id first file_service_ids_to_value = sorted( ( ( file_service_id, len( hash_ids ) ) for ( file_service_id, hash_ids ) in file_service_ids_to_hash_ids.items() ), key = lambda p: p[1], reverse = True ) seen_hash_ids = set() # make our mapping non-overlapping for pair in file_service_ids_to_value: file_service_id = pair[0] this_services_hash_ids_set = file_service_ids_to_hash_ids[ file_service_id ] if len( seen_hash_ids ) > 0: this_services_hash_ids_set.difference_update( seen_hash_ids ) if len( this_services_hash_ids_set ) == 0: del file_service_ids_to_hash_ids[ file_service_id ] else: seen_hash_ids.update( this_services_hash_ids_set ) unmapped_hash_ids = set( hash_ids ).difference( seen_hash_ids ) if len( unmapped_hash_ids ) > 0: file_service_ids_to_hash_ids[ self.modules_services.combined_file_service_id ] = unmapped_hash_ids return file_service_ids_to_hash_ids def PendFiles( self, service_id, hash_ids ): pending_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PENDING ) self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( hash_id ) VALUES ( ? );'.format( pending_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) def PetitionFiles( self, service_id, reason_id, hash_ids ): petitioned_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PETITIONED ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( petitioned_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( hash_id, reason_id ) VALUES ( ?, ? );'.format( petitioned_files_table_name ), ( ( hash_id, reason_id ) for hash_id in hash_ids ) ) def RecordDeleteFiles( self, service_id, insert_rows ): deleted_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_DELETED ) now = HydrusData.GetNow() self._ExecuteMany( 'INSERT OR IGNORE INTO {} ( hash_id, timestamp, original_timestamp ) VALUES ( ?, ?, ? );'.format( deleted_files_table_name ), ( ( hash_id, now, original_timestamp ) for ( hash_id, original_timestamp ) in insert_rows ) ) num_new_deleted_files = self._GetRowCount() return num_new_deleted_files def RescindPendFiles( self, service_id, hash_ids ): pending_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PENDING ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( pending_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) self.DeferFilesDeleteIfNowOrphan( hash_ids ) def RescindPetitionFiles( self, service_id, hash_ids ): petitioned_files_table_name = GenerateFilesTableName( service_id, HC.CONTENT_STATUS_PETITIONED ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( petitioned_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) def RemoveFiles( self, service_id, hash_ids ): ( current_files_table_name, deleted_files_table_name, pending_files_table_name, petitioned_files_table_name ) = GenerateFilesTableNames( service_id ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( current_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) self._ExecuteMany( 'DELETE FROM {} WHERE hash_id = ?;'.format( petitioned_files_table_name ), ( ( hash_id, ) for hash_id in hash_ids ) ) if self.modules_services.GetService( service_id ).GetServiceType() in ( HC.COMBINED_LOCAL_FILE, HC.FILE_REPOSITORY ): self.DeferFilesDeleteIfNowOrphan( hash_ids ) pending_changed = self._GetRowCount() > 0 return pending_changed def SetFileDeletionReason( self, hash_ids, reason ): reason_id = self.modules_texts.GetTextId( reason ) self._ExecuteMany( 'REPLACE INTO local_file_deletion_reasons ( hash_id, reason_id ) VALUES ( ?, ? );', ( ( hash_id, reason_id ) for hash_id in hash_ids ) )