848 lines
28 KiB
Python
848 lines
28 KiB
Python
import os
|
|
import sqlite3
|
|
import typing
|
|
|
|
from hydrus.core import HydrusConstants as HC
|
|
from hydrus.core import HydrusData
|
|
from hydrus.core import HydrusDBBase
|
|
from hydrus.core import HydrusExceptions
|
|
from hydrus.core import HydrusTags
|
|
|
|
from hydrus.client.db import ClientDBModule
|
|
from hydrus.client.networking import ClientNetworkingFunctions
|
|
|
|
class ClientDBMasterHashes( ClientDBModule.ClientDBModule ):
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor ):
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client hashes master', cursor )
|
|
|
|
self._hash_ids_to_hashes_cache = {}
|
|
|
|
|
|
def _GetCriticalTableNames( self ) -> typing.Collection[ str ]:
|
|
|
|
return {
|
|
'external_master.hashes'
|
|
}
|
|
|
|
|
|
def _GetInitialIndexGenerationDict( self ) -> dict:
|
|
|
|
index_generation_dict = {}
|
|
|
|
index_generation_dict[ 'external_master.local_hashes' ] = [
|
|
( [ 'md5' ], False, 400 ),
|
|
( [ 'sha1' ], False, 400 ),
|
|
( [ 'sha512' ], False, 400 )
|
|
]
|
|
|
|
return index_generation_dict
|
|
|
|
|
|
def _GetInitialTableGenerationDict( self ) -> dict:
|
|
|
|
return {
|
|
'external_master.hashes' : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, hash BLOB_BYTES UNIQUE );', 400 ),
|
|
'external_master.local_hashes' : ( 'CREATE TABLE IF NOT EXISTS {} ( hash_id INTEGER PRIMARY KEY, md5 BLOB_BYTES, sha1 BLOB_BYTES, sha512 BLOB_BYTES );', 400 )
|
|
}
|
|
|
|
|
|
def _PopulateHashIdsToHashesCache( self, hash_ids, exception_on_error = False ):
|
|
|
|
if len( self._hash_ids_to_hashes_cache ) > 100000:
|
|
|
|
if not isinstance( hash_ids, set ):
|
|
|
|
hash_ids = set( hash_ids )
|
|
|
|
|
|
self._hash_ids_to_hashes_cache = { hash_id : hash for ( hash_id, hash ) in self._hash_ids_to_hashes_cache.items() if hash_id in hash_ids }
|
|
|
|
|
|
uncached_hash_ids = { hash_id for hash_id in hash_ids if hash_id not in self._hash_ids_to_hashes_cache }
|
|
|
|
if len( uncached_hash_ids ) > 0:
|
|
|
|
pubbed_error = False
|
|
|
|
if len( uncached_hash_ids ) == 1:
|
|
|
|
( uncached_hash_id, ) = uncached_hash_ids
|
|
|
|
rows = self._Execute( 'SELECT hash_id, hash FROM hashes WHERE hash_id = ?;', ( uncached_hash_id, ) ).fetchall()
|
|
|
|
else:
|
|
|
|
with self._MakeTemporaryIntegerTable( uncached_hash_ids, 'hash_id' ) as temp_table_name:
|
|
|
|
# temp hash_ids to actual hashes
|
|
rows = self._Execute( 'SELECT hash_id, hash FROM {} CROSS JOIN hashes USING ( hash_id );'.format( temp_table_name ) ).fetchall()
|
|
|
|
|
|
|
|
uncached_hash_ids_to_hashes = dict( rows )
|
|
|
|
if len( uncached_hash_ids_to_hashes ) < len( uncached_hash_ids ):
|
|
|
|
for hash_id in uncached_hash_ids:
|
|
|
|
if hash_id not in uncached_hash_ids_to_hashes:
|
|
|
|
if exception_on_error:
|
|
|
|
raise HydrusExceptions.DataMissing( 'Did not find all entries for those hash ids!' )
|
|
|
|
|
|
HydrusData.DebugPrint( 'Database hash error: hash_id ' + str( hash_id ) + ' was missing!' )
|
|
HydrusData.PrintException( Exception( 'Missing file identifier stack trace.' ) )
|
|
|
|
if not pubbed_error:
|
|
|
|
HydrusData.ShowText( 'A file identifier was missing! This is a serious error that means your client database has an orphan file id! Think about contacting hydrus dev!' )
|
|
|
|
pubbed_error = True
|
|
|
|
|
|
hash = bytes.fromhex( 'aaaaaaaaaaaaaaaa' ) + os.urandom( 16 )
|
|
|
|
uncached_hash_ids_to_hashes[ hash_id ] = hash
|
|
|
|
|
|
|
|
|
|
self._hash_ids_to_hashes_cache.update( uncached_hash_ids_to_hashes )
|
|
|
|
|
|
|
|
def GetExtraHash( self, hash_type, hash_id ) -> bytes:
|
|
|
|
result = self._Execute( 'SELECT {} FROM local_hashes WHERE hash_id = ?;'.format( hash_type ), ( hash_id, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
raise HydrusExceptions.DataMissing( '{} not available for file {}!'.format( hash_type, hash_id ) )
|
|
|
|
|
|
( hash, ) = result
|
|
|
|
return hash
|
|
|
|
|
|
def GetFileHashes( self, given_hashes, given_hash_type, desired_hash_type ) -> typing.Dict[ bytes, bytes ]:
|
|
|
|
if given_hash_type == 'sha256':
|
|
|
|
hashes_we_have = [ hash for hash in given_hashes if self.HasHash( hash ) ]
|
|
|
|
hash_ids_to_source_hashes = self.GetHashIdsToHashes( hashes = hashes_we_have )
|
|
|
|
else:
|
|
|
|
hash_ids_to_source_hashes = {}
|
|
|
|
for given_hash in given_hashes:
|
|
|
|
if given_hash is None:
|
|
|
|
continue
|
|
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes WHERE {} = ?;'.format( given_hash_type ), ( sqlite3.Binary( given_hash ), ) ).fetchone()
|
|
|
|
if result is not None:
|
|
|
|
( hash_id, ) = result
|
|
|
|
hash_ids_to_source_hashes[ hash_id ] = given_hash
|
|
|
|
|
|
|
|
|
|
if desired_hash_type == 'sha256':
|
|
|
|
hash_ids_to_desired_hashes = self.GetHashIdsToHashes( hash_ids = set( hash_ids_to_source_hashes.keys() ) )
|
|
|
|
else:
|
|
|
|
with self._MakeTemporaryIntegerTable( set( hash_ids_to_source_hashes.keys() ), 'hash_id' ) as temp_table_name:
|
|
|
|
hash_ids_to_desired_hashes = { hash_id : desired_hash for ( hash_id, desired_hash ) in self._Execute( 'SELECT hash_id, {} FROM {} CROSS JOIN local_hashes USING ( hash_id );'.format( desired_hash_type, temp_table_name ) ) }
|
|
|
|
|
|
|
|
source_to_desired = { hash_ids_to_source_hashes[ hash_id ] : hash_ids_to_desired_hashes[ hash_id ] for hash_id in list( hash_ids_to_desired_hashes.keys() ) }
|
|
|
|
return source_to_desired
|
|
|
|
|
|
def GetHash( self, hash_id ) -> bytes:
|
|
|
|
self._PopulateHashIdsToHashesCache( ( hash_id, ) )
|
|
|
|
return self._hash_ids_to_hashes_cache[ hash_id ]
|
|
|
|
|
|
def GetHashes( self, hash_ids ) -> typing.List[ bytes ]:
|
|
|
|
self._PopulateHashIdsToHashesCache( hash_ids )
|
|
|
|
return [ self._hash_ids_to_hashes_cache[ hash_id ] for hash_id in hash_ids ]
|
|
|
|
|
|
def GetHashId( self, hash ) -> int:
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM hashes WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO hashes ( hash ) VALUES ( ? );', ( sqlite3.Binary( hash ), ) )
|
|
|
|
hash_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( hash_id, ) = result
|
|
|
|
|
|
return hash_id
|
|
|
|
|
|
def GetHashIdFromExtraHash( self, hash_type, hash ):
|
|
|
|
if hash_type == 'md5':
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes WHERE md5 = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
elif hash_type == 'sha1':
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes WHERE sha1 = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
elif hash_type == 'sha512':
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM local_hashes WHERE sha512 = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
|
|
if result is None:
|
|
|
|
raise HydrusExceptions.DataMissing( 'Hash Id not found for {} hash {}!'.format( hash_type, hash.hex() ) )
|
|
|
|
|
|
( hash_id, ) = result
|
|
|
|
return hash_id
|
|
|
|
|
|
def GetHashIds( self, hashes ) -> typing.Set[ int ]:
|
|
|
|
hash_ids = set()
|
|
hashes_not_in_db = set()
|
|
|
|
for hash in hashes:
|
|
|
|
if hash is None:
|
|
|
|
continue
|
|
|
|
|
|
result = self._Execute( 'SELECT hash_id FROM hashes WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
hashes_not_in_db.add( hash )
|
|
|
|
else:
|
|
|
|
( hash_id, ) = result
|
|
|
|
hash_ids.add( hash_id )
|
|
|
|
|
|
|
|
if len( hashes_not_in_db ) > 0:
|
|
|
|
self._ExecuteMany( 'INSERT INTO hashes ( hash ) VALUES ( ? );', ( ( sqlite3.Binary( hash ), ) for hash in hashes_not_in_db ) )
|
|
|
|
for hash in hashes_not_in_db:
|
|
|
|
( hash_id, ) = self._Execute( 'SELECT hash_id FROM hashes WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
hash_ids.add( hash_id )
|
|
|
|
|
|
|
|
return hash_ids
|
|
|
|
|
|
def GetHashIdsToHashes( self, hash_ids = None, hashes = None ):
|
|
|
|
if hash_ids is not None:
|
|
|
|
self._PopulateHashIdsToHashesCache( hash_ids, exception_on_error = True )
|
|
|
|
hash_ids_to_hashes = { hash_id : self._hash_ids_to_hashes_cache[ hash_id ] for hash_id in hash_ids }
|
|
|
|
elif hashes is not None:
|
|
|
|
hash_ids_to_hashes = { self.GetHashId( hash ) : hash for hash in hashes }
|
|
|
|
|
|
return hash_ids_to_hashes
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
if content_type == HC.CONTENT_TYPE_HASH:
|
|
|
|
return [ ( 'local_hashes', 'hash_id' ) ]
|
|
|
|
|
|
return []
|
|
|
|
|
|
def HasExtraHashes( self, hash_id ):
|
|
|
|
result = self._Execute( 'SELECT 1 FROM local_hashes WHERE hash_id = ?;', ( hash_id, ) ).fetchone()
|
|
|
|
return result is not None
|
|
|
|
|
|
def HasHash( self, hash ):
|
|
|
|
result = self._Execute( 'SELECT 1 FROM hashes WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
|
|
|
return result is not None
|
|
|
|
|
|
def HasHashId( self, hash_id: int ):
|
|
|
|
result = self._Execute( 'SELECT 1 FROM hashes WHERE hash_id = ?;', ( hash_id, ) ).fetchone()
|
|
|
|
return result is not None
|
|
|
|
|
|
def SetExtraHashes( self, hash_id, md5, sha1, sha512 ):
|
|
|
|
self._Execute( 'INSERT OR IGNORE INTO local_hashes ( hash_id, md5, sha1, sha512 ) VALUES ( ?, ?, ?, ? );', ( hash_id, sqlite3.Binary( md5 ), sqlite3.Binary( sha1 ), sqlite3.Binary( sha512 ) ) )
|
|
|
|
|
|
class ClientDBMasterTexts( ClientDBModule.ClientDBModule ):
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor ):
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client texts master', cursor )
|
|
|
|
|
|
def _GetInitialTableGenerationDict( self ) -> dict:
|
|
|
|
return {
|
|
'external_master.labels' : ( 'CREATE TABLE IF NOT EXISTS {} ( label_id INTEGER PRIMARY KEY, label TEXT UNIQUE );', 400 ),
|
|
'external_master.notes' : ( 'CREATE TABLE IF NOT EXISTS {} ( note_id INTEGER PRIMARY KEY, note TEXT UNIQUE );', 400 ),
|
|
'external_master.texts' : ( 'CREATE TABLE IF NOT EXISTS {} ( text_id INTEGER PRIMARY KEY, text TEXT UNIQUE );', 400 ),
|
|
'external_caches.notes_fts4' : ( 'CREATE VIRTUAL TABLE IF NOT EXISTS {} USING fts4( note );', 400 )
|
|
}
|
|
|
|
|
|
def _RepairRepopulateTables( self, repopulate_table_names, cursor_transaction_wrapper: HydrusDBBase.DBCursorTransactionWrapper ):
|
|
|
|
if 'external_caches.notes_fts4' in repopulate_table_names:
|
|
|
|
self._Execute( 'REPLACE INTO notes_fts4 ( docid, note ) SELECT note_id, note FROM notes;' )
|
|
|
|
|
|
|
|
def GetLabelId( self, label ):
|
|
|
|
result = self._Execute( 'SELECT label_id FROM labels WHERE label = ?;', ( label, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO labels ( label ) VALUES ( ? );', ( label, ) )
|
|
|
|
label_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( label_id, ) = result
|
|
|
|
|
|
return label_id
|
|
|
|
|
|
def GetNoteId( self, note: str ) -> int:
|
|
|
|
result = self._Execute( 'SELECT note_id FROM notes WHERE note = ?;', ( note, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO notes ( note ) VALUES ( ? );', ( note, ) )
|
|
|
|
note_id = self._GetLastRowId()
|
|
|
|
self._Execute( 'REPLACE INTO notes_fts4 ( docid, note ) VALUES ( ?, ? );', ( note_id, note ) )
|
|
|
|
else:
|
|
|
|
( note_id, ) = result
|
|
|
|
|
|
return note_id
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
return []
|
|
|
|
|
|
def GetText( self, text_id ):
|
|
|
|
result = self._Execute( 'SELECT text FROM texts WHERE text_id = ?;', ( text_id, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
raise HydrusExceptions.DataMissing( 'Text lookup error in database' )
|
|
|
|
|
|
( text, ) = result
|
|
|
|
return text
|
|
|
|
|
|
def GetTextId( self, text ):
|
|
|
|
result = self._Execute( 'SELECT text_id FROM texts WHERE text = ?;', ( text, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO texts ( text ) VALUES ( ? );', ( text, ) )
|
|
|
|
text_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( text_id, ) = result
|
|
|
|
|
|
return text_id
|
|
|
|
|
|
class ClientDBMasterTags( ClientDBModule.ClientDBModule ):
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor ):
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client tags master', cursor )
|
|
|
|
self.null_namespace_id = None
|
|
|
|
self._tag_ids_to_tags_cache = {}
|
|
|
|
|
|
def _GetCriticalTableNames( self ) -> typing.Collection[ str ]:
|
|
|
|
return {
|
|
'external_master.namespaces',
|
|
'external_master.subtags',
|
|
'external_master.tags'
|
|
}
|
|
|
|
|
|
def _GetInitialIndexGenerationDict( self ) -> dict:
|
|
|
|
index_generation_dict = {}
|
|
|
|
index_generation_dict[ 'external_master.tags' ] = [
|
|
( [ 'subtag_id' ], False, 400 ),
|
|
( [ 'namespace_id', 'subtag_id' ], True, 412 )
|
|
]
|
|
|
|
return index_generation_dict
|
|
|
|
|
|
def _GetInitialTableGenerationDict( self ) -> dict:
|
|
|
|
return {
|
|
'external_master.namespaces' : ( 'CREATE TABLE IF NOT EXISTS {} ( namespace_id INTEGER PRIMARY KEY, namespace TEXT UNIQUE );', 400 ),
|
|
'external_master.subtags' : ( 'CREATE TABLE IF NOT EXISTS {} ( subtag_id INTEGER PRIMARY KEY, subtag TEXT UNIQUE );', 400 ),
|
|
'external_master.tags' : ( 'CREATE TABLE IF NOT EXISTS {} ( tag_id INTEGER PRIMARY KEY, namespace_id INTEGER, subtag_id INTEGER );', 400 )
|
|
}
|
|
|
|
|
|
def _PopulateTagIdsToTagsCache( self, tag_ids ):
|
|
|
|
if len( self._tag_ids_to_tags_cache ) > 100000:
|
|
|
|
if not isinstance( tag_ids, set ):
|
|
|
|
tag_ids = set( tag_ids )
|
|
|
|
|
|
self._tag_ids_to_tags_cache = { tag_id : tag for ( tag_id, tag ) in self._tag_ids_to_tags_cache.items() if tag_id in tag_ids }
|
|
|
|
|
|
uncached_tag_ids = { tag_id for tag_id in tag_ids if tag_id not in self._tag_ids_to_tags_cache }
|
|
|
|
if len( uncached_tag_ids ) > 0:
|
|
|
|
if len( uncached_tag_ids ) == 1:
|
|
|
|
( uncached_tag_id, ) = uncached_tag_ids
|
|
|
|
rows = self._Execute( 'SELECT tag_id, namespace, subtag FROM tags NATURAL JOIN namespaces NATURAL JOIN subtags WHERE tag_id = ?;', ( uncached_tag_id, ) ).fetchall()
|
|
|
|
else:
|
|
|
|
with self._MakeTemporaryIntegerTable( uncached_tag_ids, 'tag_id' ) as temp_table_name:
|
|
|
|
# temp tag_ids to tags to subtags and namespaces
|
|
rows = self._Execute( 'SELECT tag_id, namespace, subtag FROM {} CROSS JOIN tags USING ( tag_id ) CROSS JOIN subtags USING ( subtag_id ) CROSS JOIN namespaces USING ( namespace_id );'.format( temp_table_name ) ).fetchall()
|
|
|
|
|
|
|
|
uncached_tag_ids_to_tags = { tag_id : HydrusTags.CombineTag( namespace, subtag ) for ( tag_id, namespace, subtag ) in rows }
|
|
|
|
if len( uncached_tag_ids_to_tags ) < len( uncached_tag_ids ):
|
|
|
|
for tag_id in uncached_tag_ids:
|
|
|
|
if tag_id not in uncached_tag_ids_to_tags:
|
|
|
|
tag = 'unknown tag:' + HydrusData.GenerateKey().hex()
|
|
|
|
( namespace, subtag ) = HydrusTags.SplitTag( tag )
|
|
|
|
namespace_id = self.GetNamespaceId( namespace )
|
|
subtag_id = self.GetSubtagId( subtag )
|
|
|
|
self._Execute( 'REPLACE INTO tags ( tag_id, namespace_id, subtag_id ) VALUES ( ?, ?, ? );', ( tag_id, namespace_id, subtag_id ) )
|
|
|
|
uncached_tag_ids_to_tags[ tag_id ] = tag
|
|
|
|
|
|
|
|
|
|
self._tag_ids_to_tags_cache.update( uncached_tag_ids_to_tags )
|
|
|
|
|
|
|
|
def GetNamespaceId( self, namespace ) -> int:
|
|
|
|
if namespace == '':
|
|
|
|
if self.null_namespace_id is None:
|
|
|
|
( self.null_namespace_id, ) = self._Execute( 'SELECT namespace_id FROM namespaces WHERE namespace = ?;', ( '', ) ).fetchone()
|
|
|
|
|
|
return self.null_namespace_id
|
|
|
|
|
|
result = self._Execute( 'SELECT namespace_id FROM namespaces WHERE namespace = ?;', ( namespace, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO namespaces ( namespace ) VALUES ( ? );', ( namespace, ) )
|
|
|
|
namespace_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( namespace_id, ) = result
|
|
|
|
|
|
return namespace_id
|
|
|
|
|
|
def GetSubtagId( self, subtag ) -> int:
|
|
|
|
result = self._Execute( 'SELECT subtag_id FROM subtags WHERE subtag = ?;', ( subtag, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO subtags ( subtag ) VALUES ( ? );', ( subtag, ) )
|
|
|
|
subtag_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( subtag_id, ) = result
|
|
|
|
|
|
return subtag_id
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
# maybe content type subtag/namespace, which would useful for bad subtags, although that's tricky because then the knock-on is killing tag definition rows
|
|
|
|
return []
|
|
|
|
|
|
def GetTag( self, tag_id ) -> str:
|
|
|
|
self._PopulateTagIdsToTagsCache( ( tag_id, ) )
|
|
|
|
return self._tag_ids_to_tags_cache[ tag_id ]
|
|
|
|
|
|
def GetTagId( self, tag ) -> int:
|
|
|
|
clean_tag = HydrusTags.CleanTag( tag )
|
|
|
|
try:
|
|
|
|
HydrusTags.CheckTagNotEmpty( clean_tag )
|
|
|
|
except HydrusExceptions.TagSizeException:
|
|
|
|
# update this to instead go 'hey, does the dirty tag exist?' if it does, run the fix invalid tags routine
|
|
|
|
raise HydrusExceptions.TagSizeException( '"{}" tag seems not valid--when cleaned, it ends up with zero size!'.format( tag ) )
|
|
|
|
|
|
( namespace, subtag ) = HydrusTags.SplitTag( clean_tag )
|
|
|
|
namespace_id = self.GetNamespaceId( namespace )
|
|
subtag_id = self.GetSubtagId( subtag )
|
|
|
|
result = self._Execute( 'SELECT tag_id FROM tags WHERE namespace_id = ? AND subtag_id = ?;', ( namespace_id, subtag_id ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO tags ( namespace_id, subtag_id ) VALUES ( ?, ? );', ( namespace_id, subtag_id ) )
|
|
|
|
tag_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( tag_id, ) = result
|
|
|
|
|
|
return tag_id
|
|
|
|
|
|
def GetTagIdsToTags( self, tag_ids = None, tags = None ) -> typing.Dict[ int, str ]:
|
|
|
|
if tag_ids is not None:
|
|
|
|
self._PopulateTagIdsToTagsCache( tag_ids )
|
|
|
|
tag_ids_to_tags = { tag_id : self._tag_ids_to_tags_cache[ tag_id ] for tag_id in tag_ids }
|
|
|
|
elif tags is not None:
|
|
|
|
tag_ids_to_tags = { self.GetTagId( tag ) : tag for tag in tags }
|
|
|
|
|
|
return tag_ids_to_tags
|
|
|
|
|
|
def NamespaceExists( self, namespace ):
|
|
|
|
if namespace == '':
|
|
|
|
return True
|
|
|
|
|
|
result = self._Execute( 'SELECT 1 FROM namespaces WHERE namespace = ?;', ( namespace, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
|
|
|
def SubtagExists( self, subtag ):
|
|
|
|
try:
|
|
|
|
HydrusTags.CheckTagNotEmpty( subtag )
|
|
|
|
except HydrusExceptions.TagSizeException:
|
|
|
|
return False
|
|
|
|
|
|
result = self._Execute( 'SELECT 1 FROM subtags WHERE subtag = ?;', ( subtag, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
|
|
|
def TagExists( self, tag ):
|
|
|
|
try:
|
|
|
|
tag = HydrusTags.CleanTag( tag )
|
|
|
|
except:
|
|
|
|
return False
|
|
|
|
|
|
try:
|
|
|
|
HydrusTags.CheckTagNotEmpty( tag )
|
|
|
|
except HydrusExceptions.TagSizeException:
|
|
|
|
return False
|
|
|
|
|
|
( namespace, subtag ) = HydrusTags.SplitTag( tag )
|
|
|
|
if self.NamespaceExists( namespace ):
|
|
|
|
namespace_id = self.GetNamespaceId( namespace )
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
if self.SubtagExists( subtag ):
|
|
|
|
subtag_id = self.GetSubtagId( subtag )
|
|
|
|
result = self._Execute( 'SELECT 1 FROM tags WHERE namespace_id = ? AND subtag_id = ?;', ( namespace_id, subtag_id ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
return False
|
|
|
|
else:
|
|
|
|
return True
|
|
|
|
|
|
else:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def UpdateTagId( self, tag_id, namespace_id, subtag_id ):
|
|
|
|
self._Execute( 'UPDATE tags SET namespace_id = ?, subtag_id = ? WHERE tag_id = ?;', ( namespace_id, subtag_id, tag_id ) )
|
|
|
|
if tag_id in self._tag_ids_to_tags_cache:
|
|
|
|
del self._tag_ids_to_tags_cache[ tag_id ]
|
|
|
|
|
|
|
|
class ClientDBMasterURLs( ClientDBModule.ClientDBModule ):
|
|
|
|
def __init__( self, cursor: sqlite3.Cursor ):
|
|
|
|
ClientDBModule.ClientDBModule.__init__( self, 'client urls master', cursor )
|
|
|
|
|
|
def _GetInitialIndexGenerationDict( self ) -> dict:
|
|
|
|
index_generation_dict = {}
|
|
|
|
index_generation_dict[ 'external_master.urls' ] = [
|
|
( [ 'domain_id' ], False, 400 )
|
|
]
|
|
|
|
return index_generation_dict
|
|
|
|
|
|
def _GetInitialTableGenerationDict( self ) -> dict:
|
|
|
|
return {
|
|
'external_master.url_domains' : ( 'CREATE TABLE IF NOT EXISTS {} ( domain_id INTEGER PRIMARY KEY, domain TEXT UNIQUE );', 400 ),
|
|
'external_master.urls' : ( 'CREATE TABLE IF NOT EXISTS {} ( url_id INTEGER PRIMARY KEY, domain_id INTEGER, url TEXT UNIQUE );', 400 )
|
|
}
|
|
|
|
|
|
def GetTablesAndColumnsThatUseDefinitions( self, content_type: int ) -> typing.List[ typing.Tuple[ str, str ] ]:
|
|
|
|
# if content type is a domain, then give urls? bleh
|
|
|
|
return []
|
|
|
|
|
|
def GetURLDomainId( self, domain ):
|
|
|
|
result = self._Execute( 'SELECT domain_id FROM url_domains WHERE domain = ?;', ( domain, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
self._Execute( 'INSERT INTO url_domains ( domain ) VALUES ( ? );', ( domain, ) )
|
|
|
|
domain_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( domain_id, ) = result
|
|
|
|
|
|
return domain_id
|
|
|
|
|
|
def GetURLDomainAndSubdomainIds( self, domain, only_www_subdomains = False ):
|
|
|
|
domain = ClientNetworkingFunctions.RemoveWWWFromDomain( domain )
|
|
|
|
domain_ids = set()
|
|
|
|
domain_ids.add( self.GetURLDomainId( domain ) )
|
|
|
|
if only_www_subdomains:
|
|
|
|
search_phrase = 'www%.{}'.format( domain )
|
|
|
|
else:
|
|
|
|
search_phrase = '%.{}'.format( domain )
|
|
|
|
|
|
for ( domain_id, ) in self._Execute( 'SELECT domain_id FROM url_domains WHERE domain LIKE ?;', ( search_phrase, ) ):
|
|
|
|
domain_ids.add( domain_id )
|
|
|
|
|
|
return domain_ids
|
|
|
|
|
|
def GetURLId( self, url ):
|
|
|
|
result = self._Execute( 'SELECT url_id FROM urls WHERE url = ?;', ( url, ) ).fetchone()
|
|
|
|
if result is None:
|
|
|
|
try:
|
|
|
|
domain = ClientNetworkingFunctions.ConvertURLIntoDomain( url )
|
|
|
|
except HydrusExceptions.URLClassException:
|
|
|
|
domain = 'unknown.com'
|
|
|
|
|
|
domain_id = self.GetURLDomainId( domain )
|
|
|
|
self._Execute( 'INSERT INTO urls ( domain_id, url ) VALUES ( ?, ? );', ( domain_id, url ) )
|
|
|
|
url_id = self._GetLastRowId()
|
|
|
|
else:
|
|
|
|
( url_id, ) = result
|
|
|
|
|
|
return url_id
|
|
|
|
|