712 lines
22 KiB
Python
712 lines
22 KiB
Python
import collections
|
|
import typing
|
|
|
|
import psutil
|
|
import sqlite3
|
|
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusPaths
|
|
from hydrus.core import HydrusGlobals as HG
|
|
from hydrus.core import HydrusTemp
|
|
from hydrus.core import HydrusTime
|
|
|
|
def CheckHasSpaceForDBTransaction( db_dir, num_bytes ):
|
|
|
|
space_needed = int( num_bytes * 1.1 )
|
|
|
|
if HG.no_db_temp_files:
|
|
|
|
approx_available_memory = psutil.virtual_memory().available * 4 / 5
|
|
|
|
if approx_available_memory < num_bytes:
|
|
|
|
raise Exception( 'I believe you need about {} available memory, since you are running in no_db_temp_files mode, but you only seem to have {}.'.format( HydrusData.ToHumanBytes( space_needed ), HydrusData.ToHumanBytes( approx_available_memory ) ) )
|
|
|
|
|
|
db_disk_free_space = HydrusPaths.GetFreeSpace( db_dir )
|
|
|
|
if db_disk_free_space < space_needed:
|
|
|
|
raise Exception( 'I believe you need about {} on your db\'s disk partition, but you only seem to have {}.'.format( HydrusData.ToHumanBytes( space_needed ), HydrusData.ToHumanBytes( db_disk_free_space ) ) )
|
|
|
|
|
|
else:
|
|
|
|
temp_dir = HydrusTemp.GetCurrentTempDir()
|
|
|
|
temp_disk_free_space = HydrusPaths.GetFreeSpace( temp_dir )
|
|
|
|
temp_and_db_on_same_device = HydrusPaths.GetDevice( temp_dir ) == HydrusPaths.GetDevice( db_dir )
|
|
|
|
if temp_and_db_on_same_device:
|
|
|
|
space_needed *= 2
|
|
|
|
if temp_disk_free_space < space_needed:
|
|
|
|
raise Exception( 'I believe you need about {} on your db\'s disk partition, which I think also holds your temporary path, but you only seem to have {}.'.format( HydrusData.ToHumanBytes( space_needed ), HydrusData.ToHumanBytes( temp_disk_free_space ) ) )
|
|
|
|
|
|
else:
|
|
|
|
if temp_disk_free_space < space_needed:
|
|
|
|
message = 'I believe you need about {} on your temporary path\'s disk partition, which I think is {}, but you only seem to have {}.'.format( HydrusData.ToHumanBytes( space_needed ), temp_dir, HydrusData.ToHumanBytes( temp_disk_free_space ) )
|
|
|
|
if HydrusPaths.GetTotalSpace( temp_dir ) <= 4 * 1024 * 1024 * 1024:
|
|
|
|
message += ' I think you might be using a ramdisk! You may want to instead launch hydrus with a different temp dir. Please check the "launch arguments" section of the help.'
|
|
|
|
else:
|
|
|
|
message += ' Please note that temporary paths can be complicated, and if you have a ramdisk or OS settings limiting how large it can get, or you simply cannot free space on your system drive, you may want to instead launch hydrus with a different temp directory. Please check the "launch arguments" section of the help.'
|
|
|
|
|
|
raise Exception( message )
|
|
|
|
|
|
db_disk_free_space = HydrusPaths.GetFreeSpace( db_dir )
|
|
|
|
if db_disk_free_space < space_needed:
|
|
|
|
raise Exception( 'I believe you need about {} on your db\'s disk partition, but you only seem to have {}.'.format( HydrusData.ToHumanBytes( space_needed ), HydrusData.ToHumanBytes( db_disk_free_space ) ) )
|
|
|
|
|
|
|
|
|
|
|
|
def ReadFromCancellableCursor( cursor, largest_group_size, cancelled_hook = None ):
|
|
|
|
if cancelled_hook is None:
|
|
|
|
return cursor.fetchall()
|
|
|
|
|
|
results = []
|
|
|
|
if cancelled_hook():
|
|
|
|
return results
|
|
|
|
|
|
NUM_TO_GET = 1
|
|
|
|
group_of_results = cursor.fetchmany( NUM_TO_GET )
|
|
|
|
while len( group_of_results ) > 0:
|
|
|
|
results.extend( group_of_results )
|
|
|
|
if cancelled_hook():
|
|
|
|
break
|
|
|
|
|
|
if NUM_TO_GET < largest_group_size:
|
|
|
|
NUM_TO_GET *= 2
|
|
|
|
|
|
group_of_results = cursor.fetchmany( NUM_TO_GET )
|
|
|
|
|
|
return results
|
|
|
|
|
|
class TemporaryIntegerTableNameCache( object ):
|
|
|
|
my_instance = None
|
|
|
|
def __init__( self ):
|
|
|
|
TemporaryIntegerTableNameCache.my_instance = self
|
|
|
|
self._column_names_to_table_names = collections.defaultdict( collections.deque )
|
|
self._column_names_counter = collections.Counter()
|
|
|
|
|
|
@staticmethod
|
|
def instance() -> 'TemporaryIntegerTableNameCache':
|
|
|
|
if TemporaryIntegerTableNameCache.my_instance is None:
|
|
|
|
raise Exception( 'TemporaryIntegerTableNameCache is not yet initialised!' )
|
|
|
|
else:
|
|
|
|
return TemporaryIntegerTableNameCache.my_instance
|
|
|
|
|
|
|
|
def Clear( self ):
|
|
|
|
self._column_names_to_table_names = collections.defaultdict( collections.deque )
|
|
self._column_names_counter = collections.Counter()
|
|
|
|
|
|
def GetName( self, column_name ):
|
|
|
|
table_names = self._column_names_to_table_names[ column_name ]
|
|
|
|
initialised = True
|
|
|
|
if len( table_names ) == 0:
|
|
|
|
initialised = False
|
|
|
|
i = self._column_names_counter[ column_name ]
|
|
|
|
table_name = 'mem.temp_int_{}_{}'.format( column_name, i )
|
|
|
|
table_names.append( table_name )
|
|
|
|
self._column_names_counter[ column_name ] += 1
|
|
|
|
|
|
table_name = table_names.pop()
|
|
|
|
return ( initialised, table_name )
|
|
|
|
|
|
def ReleaseName( self, column_name, table_name ):
|
|
|
|
self._column_names_to_table_names[ column_name ].append( table_name )
|
|
|
|
|
|
class TemporaryIntegerTable( object ):
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor, integer_iterable, column_name ):
|
|
|
|
if not isinstance( integer_iterable, set ):
|
|
|
|
integer_iterable = set( integer_iterable )
|
|
|
|
|
|
self._cursor = cursor
|
|
self._integer_iterable = integer_iterable
|
|
self._column_name = column_name
|
|
|
|
( self._initialised, self._table_name ) = TemporaryIntegerTableNameCache.instance().GetName( self._column_name )
|
|
|
|
|
|
def __enter__( self ):
|
|
|
|
if not self._initialised:
|
|
|
|
self._cursor.execute( 'CREATE TABLE IF NOT EXISTS {} ( {} INTEGER PRIMARY KEY );'.format( self._table_name, self._column_name ) )
|
|
|
|
|
|
self._cursor.executemany( 'INSERT INTO {} ( {} ) VALUES ( ? );'.format( self._table_name, self._column_name ), ( ( i, ) for i in self._integer_iterable ) )
|
|
|
|
return self._table_name
|
|
|
|
|
|
def __exit__( self, exc_type, exc_val, exc_tb ):
|
|
|
|
self._cursor.execute( 'DELETE FROM {};'.format( self._table_name ) )
|
|
|
|
TemporaryIntegerTableNameCache.instance().ReleaseName( self._column_name, self._table_name )
|
|
|
|
return False
|
|
|
|
|
|
class DBBase( object ):
|
|
|
|
def __init__( self ):
|
|
|
|
self._c = None
|
|
|
|
|
|
def _AnalyzeTempTable( self, temp_table_name ):
|
|
|
|
# this is useful to do after populating a temp table so the query planner can decide which index to use in a big join that uses it
|
|
|
|
self._Execute( 'ANALYZE {};'.format( temp_table_name ) )
|
|
self._Execute( 'ANALYZE mem.sqlite_master;' ) # this reloads the current stats into the query planner, may no longer be needed
|
|
|
|
|
|
def _CloseCursor( self ):
|
|
|
|
if self._c is not None:
|
|
|
|
self._c.close()
|
|
|
|
del self._c
|
|
|
|
self._c = None
|
|
|
|
|
|
|
|
def _CreateIndex( self, table_name, columns, unique = False ):
|
|
|
|
if unique:
|
|
|
|
create_phrase = 'CREATE UNIQUE INDEX IF NOT EXISTS'
|
|
|
|
else:
|
|
|
|
create_phrase = 'CREATE INDEX IF NOT EXISTS'
|
|
|
|
|
|
ideal_index_name = self._GenerateIdealIndexName( table_name, columns )
|
|
|
|
index_name = ideal_index_name
|
|
|
|
i = 0
|
|
|
|
while self._ActuaIndexExists( index_name ):
|
|
|
|
index_name = f'{ideal_index_name}_{i}'
|
|
|
|
i += 1
|
|
|
|
|
|
if '.' in table_name:
|
|
|
|
table_name_simple = table_name.split( '.' )[1]
|
|
|
|
else:
|
|
|
|
table_name_simple = table_name
|
|
|
|
|
|
statement = '{} {} ON {} ({});'.format( create_phrase, index_name, table_name_simple, ', '.join( columns ) )
|
|
|
|
self._Execute( statement )
|
|
|
|
|
|
def _Execute( self, query, *query_args ) -> sqlite3.Cursor:
|
|
|
|
if HG.query_planner_mode and query not in HG.queries_planned:
|
|
|
|
plan_lines = self._c.execute( 'EXPLAIN QUERY PLAN {}'.format( query ), *query_args ).fetchall()
|
|
|
|
HG.query_planner_query_count += 1
|
|
|
|
HG.controller.PrintQueryPlan( query, plan_lines )
|
|
|
|
|
|
return self._c.execute( query, *query_args )
|
|
|
|
|
|
def _ExecuteCancellable( self, query, query_args, cancelled_hook: typing.Callable[ [], bool ] ):
|
|
|
|
cursor = self._Execute( query, query_args )
|
|
|
|
return ReadFromCancellableCursor( cursor, 1024, cancelled_hook = cancelled_hook )
|
|
|
|
|
|
def _ExecuteMany( self, query, args_iterator ):
|
|
|
|
if HG.query_planner_mode and query not in HG.queries_planned:
|
|
|
|
args_iterator = list( args_iterator )
|
|
|
|
if len( args_iterator ) > 0:
|
|
|
|
plan_lines = self._c.execute( 'EXPLAIN QUERY PLAN {}'.format( query ), args_iterator[0] ).fetchall()
|
|
|
|
HG.query_planner_query_count += 1
|
|
|
|
HG.controller.PrintQueryPlan( query, plan_lines )
|
|
|
|
|
|
|
|
self._c.executemany( query, args_iterator )
|
|
|
|
|
|
def _GenerateIdealIndexName( self, table_name, columns ):
|
|
|
|
return '{}_{}_index'.format( table_name, '_'.join( columns ) )
|
|
|
|
|
|
def _GetAttachedDatabaseNames( self, include_temp = False ):
|
|
|
|
if include_temp:
|
|
|
|
f = lambda schema_name, path: True
|
|
|
|
else:
|
|
|
|
f = lambda schema_name, path: schema_name != 'temp' and path != ''
|
|
|
|
|
|
names = [ schema_name for ( number, schema_name, path ) in self._Execute( 'PRAGMA database_list;' ) if f( schema_name, path ) ]
|
|
|
|
return names
|
|
|
|
|
|
def _GetLastRowId( self ) -> int:
|
|
|
|
return self._c.lastrowid
|
|
|
|
|
|
def _GetRowCount( self ):
|
|
|
|
row_count = self._c.rowcount
|
|
|
|
if row_count == -1:
|
|
|
|
return 0
|
|
|
|
else:
|
|
|
|
return row_count
|
|
|
|
|
|
|
|
def _GetSumResult( self, result: typing.Optional[ typing.Tuple[ typing.Optional[ int ] ] ] ) -> int:
|
|
|
|
if result is None or result[0] is None:
|
|
|
|
sum_value = 0
|
|
|
|
else:
|
|
|
|
( sum_value, ) = result
|
|
|
|
|
|
return sum_value
|
|
|
|
|
|
def _ActuaIndexExists( self, index_name ):
|
|
|
|
if '.' in index_name:
|
|
|
|
( schema, index_name ) = index_name.split( '.', 1 )
|
|
|
|
search_schemas = [ schema ]
|
|
|
|
else:
|
|
|
|
search_schemas = self._GetAttachedDatabaseNames()
|
|
|
|
|
|
for schema in search_schemas:
|
|
|
|
result = self._Execute( f'SELECT 1 FROM {schema}.sqlite_master WHERE name = ? and type = ?;', ( index_name, 'index' ) ).fetchone()
|
|
|
|
if result is not None:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
def _IdealIndexExists( self, table_name, columns ):
|
|
|
|
# ok due to deferred delete gubbins, we have overlapping index names. therefore this has to be more flexible than a static name
|
|
# we'll search based on tbl_name in sqlite_master
|
|
|
|
ideal_index_name = self._GenerateIdealIndexName( table_name, columns )
|
|
|
|
if '.' in ideal_index_name:
|
|
|
|
( schema, ideal_index_name ) = ideal_index_name.split( '.', 1 )
|
|
|
|
search_schemas = [ schema ]
|
|
|
|
else:
|
|
|
|
search_schemas = self._GetAttachedDatabaseNames()
|
|
|
|
|
|
if '.' in table_name:
|
|
|
|
table_name = table_name.split( '.', 1 )[1]
|
|
|
|
|
|
for schema in search_schemas:
|
|
|
|
table_result = self._Execute( f'SELECT 1 FROM {schema}.sqlite_master WHERE name = ?;', ( table_name, ) ).fetchone()
|
|
|
|
if table_result is None:
|
|
|
|
continue
|
|
|
|
|
|
# ok the table exists on this db, so let's see if it has our index, whatever its actual name
|
|
|
|
all_indices_of_this_table = self._STL( self._Execute( f'SELECT name FROM {schema}.sqlite_master WHERE tbl_name = ? AND type = ?;', ( table_name, 'index' ) ) )
|
|
|
|
for index_name in all_indices_of_this_table:
|
|
|
|
if ideal_index_name in index_name:
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
def _MakeTemporaryIntegerTable( self, integer_iterable, column_name ):
|
|
|
|
return TemporaryIntegerTable( self._c, integer_iterable, column_name )
|
|
|
|
|
|
def _SetCursor( self, c: sqlite3.Cursor ):
|
|
|
|
self._c = c
|
|
|
|
|
|
def _STI( self, iterable_cursor ):
|
|
|
|
# strip singleton tuples to an iterator
|
|
|
|
return ( item for ( item, ) in iterable_cursor )
|
|
|
|
|
|
def _STL( self, iterable_cursor ):
|
|
|
|
# strip singleton tuples to a list
|
|
|
|
return [ item for ( item, ) in iterable_cursor ]
|
|
|
|
|
|
def _STS( self, iterable_cursor ):
|
|
|
|
# strip singleton tuples to a set
|
|
|
|
return { item for ( item, ) in iterable_cursor }
|
|
|
|
|
|
def _TableExists( self, table_name ):
|
|
|
|
if '.' in table_name:
|
|
|
|
( schema, table_name ) = table_name.split( '.', 1 )
|
|
|
|
search_schemas = [ schema ]
|
|
|
|
else:
|
|
|
|
search_schemas = self._GetAttachedDatabaseNames()
|
|
|
|
|
|
for schema in search_schemas:
|
|
|
|
result = self._Execute( f'SELECT 1 FROM {schema}.sqlite_master WHERE name = ? AND type = ?;', ( table_name, 'table' ) ).fetchone()
|
|
|
|
if result is not None:
|
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
JOURNAL_SIZE_LIMIT = 128 * 1024 * 1024
|
|
JOURNAL_ZERO_PERIOD = 900
|
|
MEM_REFRESH_PERIOD = 600
|
|
WAL_PASSIVE_CHECKPOINT_PERIOD = 300
|
|
WAL_TRUNCATE_CHECKPOINT_PERIOD = 900
|
|
|
|
class DBCursorTransactionWrapper( DBBase ):
|
|
|
|
def __init__( self, c: sqlite3.Cursor, transaction_commit_period: int ):
|
|
|
|
DBBase.__init__( self )
|
|
|
|
self._SetCursor( c )
|
|
|
|
self._transaction_commit_period = transaction_commit_period
|
|
|
|
self._transaction_start_time = 0
|
|
self._in_transaction = False
|
|
self._transaction_contains_writes = False
|
|
|
|
self._last_mem_refresh_time = HydrusTime.GetNow()
|
|
self._last_wal_passive_checkpoint_time = HydrusTime.GetNow()
|
|
self._last_wal_truncate_checkpoint_time = HydrusTime.GetNow()
|
|
self._last_journal_zero_time = HydrusTime.GetNow()
|
|
|
|
self._pubsubs = []
|
|
|
|
|
|
def _ZeroJournal( self ):
|
|
|
|
if HG.db_journal_mode not in ( 'PERSIST', 'WAL' ):
|
|
|
|
return
|
|
|
|
|
|
self._Execute( 'BEGIN IMMEDIATE;' )
|
|
|
|
# durable_temp is not excluded here
|
|
db_names = [ name for ( index, name, path ) in self._Execute( 'PRAGMA database_list;' ) if name not in ( 'mem', 'temp' ) ]
|
|
|
|
for db_name in db_names:
|
|
|
|
self._Execute( 'PRAGMA {}.journal_size_limit = {};'.format( db_name, 0 ) )
|
|
|
|
|
|
self._Execute( 'COMMIT;' )
|
|
|
|
for db_name in db_names:
|
|
|
|
self._Execute( 'PRAGMA {}.journal_size_limit = {};'.format( db_name, JOURNAL_SIZE_LIMIT ) )
|
|
|
|
|
|
|
|
def BeginImmediate( self ):
|
|
|
|
if not self._in_transaction:
|
|
|
|
self._Execute( 'BEGIN IMMEDIATE;' )
|
|
self._Execute( 'SAVEPOINT hydrus_savepoint;' )
|
|
|
|
self._transaction_start_time = HydrusTime.GetNow()
|
|
self._in_transaction = True
|
|
self._transaction_contains_writes = False
|
|
|
|
|
|
|
|
def CleanPubSubs( self ):
|
|
|
|
self._pubsubs = []
|
|
|
|
|
|
def Commit( self ):
|
|
|
|
if self._in_transaction:
|
|
|
|
self.DoPubSubs()
|
|
|
|
self.CleanPubSubs()
|
|
|
|
self._Execute( 'COMMIT;' )
|
|
|
|
self._in_transaction = False
|
|
self._transaction_contains_writes = False
|
|
|
|
if HG.db_journal_mode == 'WAL' and HydrusTime.TimeHasPassed( self._last_wal_passive_checkpoint_time + WAL_PASSIVE_CHECKPOINT_PERIOD ):
|
|
|
|
if HydrusTime.TimeHasPassed( self._last_wal_truncate_checkpoint_time + WAL_TRUNCATE_CHECKPOINT_PERIOD ):
|
|
|
|
self._Execute( 'PRAGMA wal_checkpoint(TRUNCATE);' )
|
|
|
|
self._last_wal_truncate_checkpoint_time = HydrusTime.GetNow()
|
|
|
|
else:
|
|
|
|
self._Execute( 'PRAGMA wal_checkpoint(PASSIVE);' )
|
|
|
|
|
|
self._last_wal_passive_checkpoint_time = HydrusTime.GetNow()
|
|
|
|
|
|
if HydrusTime.TimeHasPassed( self._last_mem_refresh_time + MEM_REFRESH_PERIOD ):
|
|
|
|
self._Execute( 'DETACH mem;' )
|
|
self._Execute( 'ATTACH ":memory:" AS mem;' )
|
|
|
|
TemporaryIntegerTableNameCache.instance().Clear()
|
|
|
|
self._last_mem_refresh_time = HydrusTime.GetNow()
|
|
|
|
|
|
if HG.db_journal_mode == 'PERSIST' and HydrusTime.TimeHasPassed( self._last_journal_zero_time + JOURNAL_ZERO_PERIOD ):
|
|
|
|
self._ZeroJournal()
|
|
|
|
self._last_journal_zero_time = HydrusTime.GetNow()
|
|
|
|
|
|
else:
|
|
|
|
HydrusData.Print( 'Received a call to commit, but was not in a transaction!' )
|
|
|
|
|
|
|
|
def CommitAndBegin( self ):
|
|
|
|
if self._in_transaction:
|
|
|
|
self.Commit()
|
|
|
|
self.BeginImmediate()
|
|
|
|
|
|
|
|
def DoPubSubs( self ):
|
|
|
|
for ( topic, args, kwargs ) in self._pubsubs:
|
|
|
|
HG.controller.pub( topic, *args, **kwargs )
|
|
|
|
|
|
|
|
def InTransaction( self ):
|
|
|
|
return self._in_transaction
|
|
|
|
|
|
def NotifyWriteOccuring( self ):
|
|
|
|
self._transaction_contains_writes = True
|
|
|
|
|
|
def pub_after_job( self, topic, *args, **kwargs ):
|
|
|
|
if len( args ) == 0 and len( kwargs ) == 0:
|
|
|
|
if ( topic, args, kwargs ) in self._pubsubs:
|
|
|
|
return
|
|
|
|
|
|
|
|
self._pubsubs.append( ( topic, args, kwargs ) )
|
|
|
|
|
|
def Rollback( self ):
|
|
|
|
if self._in_transaction:
|
|
|
|
self._Execute( 'ROLLBACK TO hydrus_savepoint;' )
|
|
|
|
# any temp int tables created in this lad will be rolled back, so 'initialised' can't be trusted. just reset, no big deal
|
|
TemporaryIntegerTableNameCache.instance().Clear()
|
|
|
|
# still in transaction
|
|
# transaction may no longer contain writes, but it isn't important to figure out that it doesn't
|
|
|
|
else:
|
|
|
|
HydrusData.Print( 'Received a call to rollback, but was not in a transaction!' )
|
|
|
|
|
|
|
|
def Save( self ):
|
|
|
|
if self._in_transaction:
|
|
|
|
try:
|
|
|
|
self._Execute( 'RELEASE hydrus_savepoint;' )
|
|
|
|
except sqlite3.OperationalError:
|
|
|
|
HydrusData.Print( 'Tried to release a database savepoint, but failed!' )
|
|
|
|
|
|
self._Execute( 'SAVEPOINT hydrus_savepoint;' )
|
|
|
|
else:
|
|
|
|
HydrusData.Print( 'Received a call to save, but was not in a transaction!' )
|
|
|
|
|
|
|
|
def TimeToCommit( self ):
|
|
|
|
return self._in_transaction and self._transaction_contains_writes and HydrusTime.TimeHasPassed( self._transaction_start_time + self._transaction_commit_period )
|
|
|
|
|