import os import random import sqlite3 import time import typing from hydrus.core import HydrusConstants as HC from hydrus.core import HydrusData from hydrus.core import HydrusGlobals as HG from hydrus.client import ClientThreading from hydrus.client.db import ClientDBModule class ClientDBMaintenance( ClientDBModule.ClientDBModule ): def __init__( self, cursor: sqlite3.Cursor, db_dir: str, db_filenames: typing.Collection[ str ] ): ClientDBModule.ClientDBModule.__init__( self, 'client db maintenance', cursor ) self._db_dir = db_dir self._db_filenames = db_filenames def _GetInitialTableGenerationDict( self ) -> dict: return { 'main.last_shutdown_work_time' : ( 'CREATE TABLE IF NOT EXISTS {} ( last_shutdown_work_time INTEGER );', 400 ), 'main.analyze_timestamps' : ( 'CREATE TABLE IF NOT EXISTS {} ( name TEXT, num_rows INTEGER, timestamp INTEGER );', 400 ), 'main.vacuum_timestamps' : ( 'CREATE TABLE IF NOT EXISTS {} ( name TEXT, timestamp INTEGER );', 400 ) } def _TableHasAtLeastRowCount( self, name, row_count ): cursor = self._Execute( 'SELECT 1 FROM {};'.format( name ) ) for i in range( row_count ): r = cursor.fetchone() if r is None: return False return True def _TableIsEmpty( self, name ): result = self._Execute( 'SELECT 1 FROM {};'.format( name ) ) return result is None def AnalyzeDueTables( self, maintenance_mode = HC.MAINTENANCE_FORCED, stop_time = None, force_reanalyze = False ): names_to_analyze = self.GetTableNamesDueAnalysis( force_reanalyze = force_reanalyze ) if len( names_to_analyze ) > 0: job_key = ClientThreading.JobKey( maintenance_mode = maintenance_mode, cancellable = True ) try: job_key.SetStatusTitle( 'database maintenance - analyzing' ) HG.client_controller.pub( 'modal_message', job_key ) random.shuffle( names_to_analyze ) for name in names_to_analyze: HG.client_controller.frame_splash_status.SetText( 'analyzing ' + name ) job_key.SetVariable( 'popup_text_1', 'analyzing ' + name ) time.sleep( 0.02 ) started = HydrusData.GetNowPrecise() self.AnalyzeTable( name ) time_took = HydrusData.GetNowPrecise() - started if time_took > 1: HydrusData.Print( 'Analyzed ' + name + ' in ' + HydrusData.TimeDeltaToPrettyTimeDelta( time_took ) ) p1 = HG.client_controller.ShouldStopThisWork( maintenance_mode, stop_time = stop_time ) p2 = job_key.IsCancelled() if p1 or p2: break self._Execute( 'ANALYZE sqlite_master;' ) # this reloads the current stats into the query planner job_key.SetVariable( 'popup_text_1', 'done!' ) HydrusData.Print( job_key.ToString() ) finally: job_key.Finish() job_key.Delete( 10 ) def AnalyzeTable( self, name ): do_it = True result = self._Execute( 'SELECT num_rows FROM analyze_timestamps WHERE name = ?;', ( name, ) ).fetchone() if result is not None: ( num_rows, ) = result # if we have previously analyzed a table with some data but the table is now empty, we do not want a new analyze if num_rows > 0 and self._TableIsEmpty( name ): do_it = False if do_it: self._Execute( 'ANALYZE ' + name + ';' ) ( num_rows, ) = self._Execute( 'SELECT COUNT( * ) FROM ' + name + ';' ).fetchone() self._Execute( 'DELETE FROM analyze_timestamps WHERE name = ?;', ( name, ) ) self._Execute( 'INSERT OR IGNORE INTO analyze_timestamps ( name, num_rows, timestamp ) VALUES ( ?, ?, ? );', ( name, num_rows, HydrusData.GetNow() ) ) def GetLastShutdownWorkTime( self ): result = self._Execute( 'SELECT last_shutdown_work_time FROM last_shutdown_work_time;' ).fetchone() if result is None: return 0 ( last_shutdown_work_time, ) = result return last_shutdown_work_time def GetTableNamesDueAnalysis( self, force_reanalyze = False ): db_names = [ name for ( index, name, path ) in self._Execute( 'PRAGMA database_list;' ) if name not in ( 'mem', 'temp', 'durable_temp' ) ] all_names = set() for db_name in db_names: all_names.update( ( name for ( name, ) in self._Execute( 'SELECT name FROM {}.sqlite_master WHERE type = ?;'.format( db_name ), ( 'table', ) ) ) ) all_names.discard( 'sqlite_stat1' ) if force_reanalyze: names_to_analyze = list( all_names ) else: # Some tables get huge real fast (usually after syncing to big repo) # If they have only ever been analyzed with incomplete or empty data, they work slow # Analyze on a small table takes ~1ms, so let's instead do smaller tables more frequently and try to catch them as they grow boundaries = [] boundaries.append( ( 100, True, 6 * 3600 ) ) boundaries.append( ( 10000, True, 3 * 86400 ) ) boundaries.append( ( 100000, False, 3 * 30 * 86400 ) ) # anything bigger than 100k rows will now not be analyzed existing_names_to_info = { name : ( num_rows, timestamp ) for ( name, num_rows, timestamp ) in self._Execute( 'SELECT name, num_rows, timestamp FROM analyze_timestamps;' ) } names_to_analyze = [] for name in all_names: if name in existing_names_to_info: ( num_rows, timestamp ) = existing_names_to_info[ name ] for ( row_limit_for_this_boundary, can_analyze_immediately, period ) in boundaries: if num_rows > row_limit_for_this_boundary: continue if not HydrusData.TimeHasPassed( timestamp + period ): continue if can_analyze_immediately: # if it has grown, send up to user, as it could be huge. else do it now if self._TableHasAtLeastRowCount( name, row_limit_for_this_boundary ): names_to_analyze.append( name ) else: self.AnalyzeTable( name ) else: names_to_analyze.append( name ) else: names_to_analyze.append( name ) return names_to_analyze def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]: tables_and_columns = [] return tables_and_columns def GetVacuumData( self ): vacuum_data = {} for ( name, filename ) in self._db_filenames.items(): path = os.path.join( self._db_dir, filename ) ( page_size, ) = self._Execute( 'PRAGMA {}.page_size;'.format( name ) ).fetchone() ( page_count, ) = self._Execute( 'PRAGMA {}.page_count;'.format( name ) ).fetchone() ( freelist_count, ) = self._Execute( 'PRAGMA {}.freelist_count;'.format( name ) ).fetchone() result = self._Execute( 'SELECT timestamp FROM vacuum_timestamps WHERE name = ?;', ( name, ) ).fetchone() if result is None: last_vacuumed = None else: ( last_vacuumed, ) = result this_vacuum_data = {} this_vacuum_data[ 'path' ] = path this_vacuum_data[ 'page_size' ] = page_size this_vacuum_data[ 'page_count' ] = page_count this_vacuum_data[ 'freelist_count' ] = freelist_count this_vacuum_data[ 'last_vacuumed' ] = last_vacuumed vacuum_data[ name ] = this_vacuum_data return vacuum_data def RegisterShutdownWork( self ): self._Execute( 'DELETE FROM last_shutdown_work_time;' ) self._Execute( 'INSERT INTO last_shutdown_work_time ( last_shutdown_work_time ) VALUES ( ? );', ( HydrusData.GetNow(), ) ) def RegisterSuccessfulVacuum( self, name: str ): self._Execute( 'DELETE FROM vacuum_timestamps WHERE name = ?;', ( name, ) ) self._Execute( 'INSERT OR IGNORE INTO vacuum_timestamps ( name, timestamp ) VALUES ( ?, ? );', ( name, HydrusData.GetNow() ) ) def TouchAnalyzeNewTables( self ): # just a little thing to run after creating and populating tables that will scan any actual new stuff # TODO: Actually lmao, this didn't do what I wanted and often caused megalag pass # self.GetTableNamesDueAnalysis()