Adding HydrusTagArchive
This commit is contained in:
parent
33bfb28972
commit
426a70ad04
|
@ -0,0 +1,239 @@
|
|||
import os
|
||||
import sqlite3
|
||||
|
||||
HASH_TYPE_MD5 = 0 # 16 bytes long
|
||||
HASH_TYPE_SHA1 = 1 # 20 bytes long
|
||||
HASH_TYPE_SHA256 = 2 # 32 bytes long
|
||||
HASH_TYPE_SHA512 = 3 # 64 bytes long
|
||||
|
||||
# Please feel free to use this file however you wish.
|
||||
# None of this is thread-safe, though, so don't try to do anything clever.
|
||||
|
||||
|
||||
# If you want to make a new tag archive for use in hydrus, you want to do something like:
|
||||
|
||||
# import HydrusTagArchive
|
||||
# hta = HydrusTagArchive.HydrusTagArchive( 'my_little_archive.db' )
|
||||
# hta.SetHashType( HydrusTagArchive.HASH_TYPE_MD5 )
|
||||
# hta.BeginBigJob()
|
||||
# for ( hash, tags ) in my_complex_mappings_generator: hta.SetMappings( hash, tags )
|
||||
# -or-
|
||||
# for ( hash, tag ) in my_simple_mapping_generator: hta.AddMapping( hash, tag )
|
||||
# hta.CommitBigJob()
|
||||
# del hta
|
||||
|
||||
|
||||
# If you are only adding a couple tags, you can exclude the BigJob stuff. It just makes millions of sequential writes more efficient.
|
||||
|
||||
|
||||
# Also, this manages hashes as bytes, not hex, so if you have something like:
|
||||
|
||||
# hash = ab156e87c5d6e215ab156e87c5d6e215
|
||||
|
||||
# Then go hash = hash.decode( 'hex' ) before you pass it to Add/Get/Has/SetMappings
|
||||
|
||||
|
||||
# If you have tags that are namespaced like hydrus (e.g. series:ghost in the shell), then check out:
|
||||
# GetNamespaces
|
||||
# DeleteNamespaces
|
||||
# and
|
||||
# RebuildNamespaces
|
||||
|
||||
# RebuildNamespaces takes namespaces_to_exclude, if you want to curate your namespaces a little better.
|
||||
|
||||
# If your GetNamespaces gives garbage, then just hit DeleteNamespaces. I'll be using the result of GetNamespaces to populate
|
||||
# the advanced tag options widget when people sync with these archives.
|
||||
|
||||
|
||||
# And also feel free to contact me directly at hydrus.admin@gmail.com if you need help.
|
||||
|
||||
class HydrusTagArchive( object ):
|
||||
|
||||
def __init__( self, path ):
|
||||
|
||||
self._path = path
|
||||
|
||||
if not os.path.exists( self._path ): create_db = True
|
||||
else: create_db = False
|
||||
|
||||
self._InitDBCursor()
|
||||
|
||||
if create_db: self._InitDB()
|
||||
|
||||
self._namespaces = { namespace for ( namespace, ) in self._c.execute( 'SELECT namespace FROM namespaces;' ) }
|
||||
|
||||
|
||||
def _InitDB( self ):
|
||||
|
||||
self._c.execute( 'CREATE TABLE hash_type ( hash_type INTEGER );', )
|
||||
|
||||
self._c.execute( 'CREATE TABLE hashes ( hash_id INTEGER PRIMARY KEY, hash BLOB_BYTES );' )
|
||||
self._c.execute( 'CREATE UNIQUE INDEX hashes_hash_index ON hashes ( hash );' )
|
||||
|
||||
self._c.execute( 'CREATE TABLE mappings ( hash_id INTEGER, tag_id INTEGER );' )
|
||||
self._c.execute( 'CREATE INDEX mappings_hash_id_index ON mappings ( hash_id );' )
|
||||
|
||||
self._c.execute( 'CREATE TABLE namespaces ( namespace TEXT );' )
|
||||
|
||||
self._c.execute( 'CREATE TABLE tags ( tag_id INTEGER PRIMARY KEY, tag TEXT );' )
|
||||
self._c.execute( 'CREATE UNIQUE INDEX tags_tag_index ON tags ( tag );' )
|
||||
|
||||
|
||||
def _InitDBCursor( self ):
|
||||
|
||||
self._db = sqlite3.connect( self._path, isolation_level = None, detect_types = sqlite3.PARSE_DECLTYPES )
|
||||
|
||||
self._c = self._db.cursor()
|
||||
|
||||
|
||||
def _GetHashId( self, hash, read_only = False ):
|
||||
|
||||
result = self._c.execute( 'SELECT hash_id FROM hashes WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
|
||||
|
||||
if result is None:
|
||||
|
||||
if read_only: raise Exception()
|
||||
|
||||
self._c.execute( 'INSERT INTO hashes ( hash ) VALUES ( ? );', ( sqlite3.Binary( hash ), ) )
|
||||
|
||||
hash_id = self._c.lastrowid
|
||||
|
||||
else: ( hash_id, ) = result
|
||||
|
||||
return hash_id
|
||||
|
||||
|
||||
def _GetTagId( self, tag ):
|
||||
|
||||
if ':' in tag:
|
||||
|
||||
( namespace, subtag ) = tag.split( ':', 1 )
|
||||
|
||||
if namespace != '' and namespace not in self._namespaces:
|
||||
|
||||
self._c.execute( 'INSERT INTO namespaces ( namespace ) VALUES ( ? );', ( namespace, ) )
|
||||
|
||||
self._namespaces.add( namespace )
|
||||
|
||||
|
||||
|
||||
result = self._c.execute( 'SELECT tag_id FROM tags WHERE tag = ?;', ( tag, ) ).fetchone()
|
||||
|
||||
if result is None:
|
||||
|
||||
self._c.execute( 'INSERT INTO tags ( tag ) VALUES ( ? );', ( tag, ) )
|
||||
|
||||
tag_id = self._c.lastrowid
|
||||
|
||||
else: ( tag_id, ) = result
|
||||
|
||||
return tag_id
|
||||
|
||||
|
||||
def BeginBigJob( self ): self._c.execute( 'BEGIN IMMEDIATE' )
|
||||
|
||||
def CommitBigJob( self ):
|
||||
|
||||
self._c.execute( 'COMMIT' )
|
||||
self._c.execute( 'VACUUM' )
|
||||
|
||||
|
||||
def AddMapping( self, hash, tag ):
|
||||
|
||||
hash_id = self._GetHashId( hash )
|
||||
tag_id = self._GetTagId( tag )
|
||||
|
||||
self._c.execute( 'INSERT OR IGNORE INTO mappings ( hash_id, tag_id ) VALUES ( ?, ? );', ( hash_id, tag_id ) )
|
||||
|
||||
|
||||
def DeleteMapping( self, hash, tag ):
|
||||
|
||||
hash_id = self._GetHashId( hash )
|
||||
tag_id = self._GetTagId( tag )
|
||||
|
||||
self._c.execute( 'DELETE FROM mappings WHERE hash_id = ? AND tag_id = ?;', ( hash_id, tag_id ) )
|
||||
|
||||
|
||||
def DeleteNamespaces( self ):
|
||||
|
||||
self._namespaces = {}
|
||||
|
||||
self._c.execute( 'DELETE FROM namespaces;' )
|
||||
|
||||
|
||||
def GetHashType( self ):
|
||||
|
||||
try: ( hash_type, ) = self._c.execute( 'SELECT hash_type FROM hash_type;' ).fetchone()
|
||||
except: raise Exception( 'This archive has no hash type set.' )
|
||||
|
||||
return hash_type
|
||||
|
||||
|
||||
def GetMappings( self, hash ):
|
||||
|
||||
try: hash_id = self._GetHashId( hash, read_only = True )
|
||||
except: return []
|
||||
|
||||
result = { tag for ( tag, ) in self._c.execute( 'SELECT tag FROM mappings, tags USING ( tag_id ) WHERE hash_id = ?;', ( hash_id, ) ) }
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def GetNamespaces( self ): return self._namespaces
|
||||
|
||||
def HasHash( self, hash ):
|
||||
|
||||
try:
|
||||
|
||||
hash_id = self._GetHashId( hash, read_only = True )
|
||||
|
||||
return True
|
||||
|
||||
except: return False
|
||||
|
||||
|
||||
def RebuildNamespaces( self, namespaces_to_exclude = set() ):
|
||||
|
||||
self._namespaces = set()
|
||||
|
||||
self._c.execute( 'DELETE FROM namespaces;' )
|
||||
|
||||
for ( tag, ) in self._c.execute( 'SELECT tag FROM tags;' ):
|
||||
|
||||
if ':' in tag:
|
||||
|
||||
( namespace, subtag ) = tag.split( ':', 1 )
|
||||
|
||||
if namespace != '' and namespace not in self._namespaces and namespace not in namespaces_to_exclude:
|
||||
|
||||
self._namespaces.add( namespace )
|
||||
|
||||
|
||||
|
||||
|
||||
self._c.executemany( 'INSERT INTO namespaces ( namespace ) VALUES ( ? );', ( ( namespace, ) for namespace in self._namespaces ) )
|
||||
|
||||
|
||||
def SetHashType( self, hash_type ):
|
||||
|
||||
self._c.execute( 'DELETE FROM hash_type;' )
|
||||
|
||||
self._c.execute( 'INSERT INTO hash_type ( hash_type ) VALUES ( ? );', ( hash_type, ) )
|
||||
|
||||
|
||||
def SetMappings( self, hash, tags ):
|
||||
|
||||
hash_id = self._GetHashId( hash )
|
||||
|
||||
self._c.execute( 'DELETE FROM mappings WHERE hash_id = ?;', ( hash_id, ) )
|
||||
|
||||
tag_ids = [ self._GetTagId( tag ) for tag in tags ]
|
||||
|
||||
for tag in tags:
|
||||
|
||||
tag_id = self._GetTagId( tag )
|
||||
|
||||
self._c.execute( 'INSERT INTO mappings ( hash_id, tag_id ) VALUES ( ?, ? );', ( hash_id, tag_id ) )
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue