From 426a70ad04a4179371937649c8e95df5ef9a0963 Mon Sep 17 00:00:00 2001
From: Hydrus <hydrus.admin@gmail.com>
Date: Wed, 5 Nov 2014 15:17:13 -0600
Subject: [PATCH] Adding HydrusTagArchive

---
 include/HydrusTagArchive.py | 239 ++++++++++++++++++++++++++++++++++++
 1 file changed, 239 insertions(+)
 create mode 100644 include/HydrusTagArchive.py

diff --git a/include/HydrusTagArchive.py b/include/HydrusTagArchive.py
new file mode 100644
index 00000000..a3eed7bb
--- /dev/null
+++ b/include/HydrusTagArchive.py
@@ -0,0 +1,239 @@
+import os
+import sqlite3
+
+HASH_TYPE_MD5 = 0 # 16 bytes long
+HASH_TYPE_SHA1 = 1 # 20 bytes long
+HASH_TYPE_SHA256 = 2 # 32 bytes long
+HASH_TYPE_SHA512 = 3 # 64 bytes long
+
+# Please feel free to use this file however you wish.
+# None of this is thread-safe, though, so don't try to do anything clever.
+
+
+# If you want to make a new tag archive for use in hydrus, you want to do something like:
+
+# import HydrusTagArchive
+# hta = HydrusTagArchive.HydrusTagArchive( 'my_little_archive.db' )
+# hta.SetHashType( HydrusTagArchive.HASH_TYPE_MD5 )
+# hta.BeginBigJob()
+# for ( hash, tags ) in my_complex_mappings_generator: hta.SetMappings( hash, tags )
+  # -or-
+# for ( hash, tag ) in my_simple_mapping_generator: hta.AddMapping( hash, tag )
+# hta.CommitBigJob()
+# del hta
+
+
+# If you are only adding a couple tags, you can exclude the BigJob stuff. It just makes millions of sequential writes more efficient.
+
+
+# Also, this manages hashes as bytes, not hex, so if you have something like:
+
+# hash = ab156e87c5d6e215ab156e87c5d6e215
+
+# Then go hash = hash.decode( 'hex' ) before you pass it to Add/Get/Has/SetMappings
+
+
+# If you have tags that are namespaced like hydrus (e.g. series:ghost in the shell), then check out:
+# GetNamespaces
+# DeleteNamespaces
+# and
+# RebuildNamespaces
+
+# RebuildNamespaces takes namespaces_to_exclude, if you want to curate your namespaces a little better.
+
+# If your GetNamespaces gives garbage, then just hit DeleteNamespaces. I'll be using the result of GetNamespaces to populate
+# the advanced tag options widget when people sync with these archives.
+
+
+# And also feel free to contact me directly at hydrus.admin@gmail.com if you need help.
+
+class HydrusTagArchive( object ):
+    
+    def __init__( self, path ):
+        
+        self._path = path
+        
+        if not os.path.exists( self._path ): create_db = True
+        else: create_db = False
+        
+        self._InitDBCursor()
+        
+        if create_db: self._InitDB()
+        
+        self._namespaces = { namespace for ( namespace, ) in self._c.execute( 'SELECT namespace FROM namespaces;' ) }
+        
+    
+    def _InitDB( self ):
+        
+        self._c.execute( 'CREATE TABLE hash_type ( hash_type INTEGER );', )
+        
+        self._c.execute( 'CREATE TABLE hashes ( hash_id INTEGER PRIMARY KEY, hash BLOB_BYTES );' )
+        self._c.execute( 'CREATE UNIQUE INDEX hashes_hash_index ON hashes ( hash );' )
+        
+        self._c.execute( 'CREATE TABLE mappings ( hash_id INTEGER, tag_id INTEGER );' )
+        self._c.execute( 'CREATE INDEX mappings_hash_id_index ON mappings ( hash_id );' )
+        
+        self._c.execute( 'CREATE TABLE namespaces ( namespace TEXT );' )
+        
+        self._c.execute( 'CREATE TABLE tags ( tag_id INTEGER PRIMARY KEY, tag TEXT );' )
+        self._c.execute( 'CREATE UNIQUE INDEX tags_tag_index ON tags ( tag );' )
+        
+    
+    def _InitDBCursor( self ):
+        
+        self._db = sqlite3.connect( self._path, isolation_level = None, detect_types = sqlite3.PARSE_DECLTYPES )
+        
+        self._c = self._db.cursor()
+        
+    
+    def _GetHashId( self, hash, read_only = False ):
+        
+        result = self._c.execute( 'SELECT hash_id FROM hashes WHERE hash = ?;', ( sqlite3.Binary( hash ), ) ).fetchone()
+        
+        if result is None:
+            
+            if read_only: raise Exception()
+            
+            self._c.execute( 'INSERT INTO hashes ( hash ) VALUES ( ? );', ( sqlite3.Binary( hash ), ) )
+            
+            hash_id = self._c.lastrowid
+            
+        else: ( hash_id, ) = result
+        
+        return hash_id
+        
+    
+    def _GetTagId( self, tag ):
+        
+        if ':' in tag:
+            
+            ( namespace, subtag ) = tag.split( ':', 1 )
+            
+            if namespace != '' and namespace not in self._namespaces:
+                
+                self._c.execute( 'INSERT INTO namespaces ( namespace ) VALUES ( ? );', ( namespace, ) )
+                
+                self._namespaces.add( namespace )
+                
+            
+        
+        result = self._c.execute( 'SELECT tag_id FROM tags WHERE tag = ?;', ( tag, ) ).fetchone()
+        
+        if result is None:
+            
+            self._c.execute( 'INSERT INTO tags ( tag ) VALUES ( ? );', ( tag, ) )
+            
+            tag_id = self._c.lastrowid
+            
+        else: ( tag_id, ) = result
+        
+        return tag_id
+        
+    
+    def BeginBigJob( self ): self._c.execute( 'BEGIN IMMEDIATE' )
+    
+    def CommitBigJob( self ):
+        
+        self._c.execute( 'COMMIT' )
+        self._c.execute( 'VACUUM' )
+        
+    
+    def AddMapping( self, hash, tag ):
+        
+        hash_id = self._GetHashId( hash )
+        tag_id = self._GetTagId( tag )
+        
+        self._c.execute( 'INSERT OR IGNORE INTO mappings ( hash_id, tag_id ) VALUES ( ?, ? );', ( hash_id, tag_id ) )
+        
+    
+    def DeleteMapping( self, hash, tag ):
+        
+        hash_id = self._GetHashId( hash )
+        tag_id = self._GetTagId( tag )
+        
+        self._c.execute( 'DELETE FROM mappings WHERE hash_id = ? AND tag_id = ?;', ( hash_id, tag_id ) )
+        
+    
+    def DeleteNamespaces( self ):
+        
+        self._namespaces = {}
+        
+        self._c.execute( 'DELETE FROM namespaces;' )
+        
+    
+    def GetHashType( self ):
+        
+        try: ( hash_type, ) = self._c.execute( 'SELECT hash_type FROM hash_type;' ).fetchone()
+        except: raise Exception( 'This archive has no hash type set.' )
+        
+        return hash_type
+        
+    
+    def GetMappings( self, hash ):
+        
+        try: hash_id = self._GetHashId( hash, read_only = True )
+        except: return []
+        
+        result = { tag for ( tag, ) in self._c.execute( 'SELECT tag FROM mappings, tags USING ( tag_id ) WHERE hash_id = ?;', ( hash_id, ) ) }
+        
+        return result
+        
+    
+    def GetNamespaces( self ): return self._namespaces
+    
+    def HasHash( self, hash ):
+        
+        try:
+            
+            hash_id = self._GetHashId( hash, read_only = True )
+            
+            return True
+            
+        except: return False
+        
+    
+    def RebuildNamespaces( self, namespaces_to_exclude = set() ):
+        
+        self._namespaces = set()
+        
+        self._c.execute( 'DELETE FROM namespaces;' )
+        
+        for ( tag, ) in self._c.execute( 'SELECT tag FROM tags;' ):
+            
+            if ':' in tag:
+                
+                ( namespace, subtag ) = tag.split( ':', 1 )
+                
+                if namespace != '' and namespace not in self._namespaces and namespace not in namespaces_to_exclude:
+                    
+                    self._namespaces.add( namespace )
+                    
+                
+            
+        
+        self._c.executemany( 'INSERT INTO namespaces ( namespace ) VALUES ( ? );', ( ( namespace, ) for namespace in self._namespaces ) )
+        
+    
+    def SetHashType( self, hash_type ):
+        
+        self._c.execute( 'DELETE FROM hash_type;' )
+        
+        self._c.execute( 'INSERT INTO hash_type ( hash_type ) VALUES ( ? );', ( hash_type, ) )
+        
+    
+    def SetMappings( self, hash, tags ):
+        
+        hash_id = self._GetHashId( hash )
+        
+        self._c.execute( 'DELETE FROM mappings WHERE hash_id = ?;', ( hash_id, ) )
+        
+        tag_ids = [ self._GetTagId( tag ) for tag in tags ]
+        
+        for tag in tags:
+            
+            tag_id = self._GetTagId( tag )
+            
+            self._c.execute( 'INSERT INTO mappings ( hash_id, tag_id ) VALUES ( ?, ? );', ( hash_id, tag_id ) )
+            
+        
+    
\ No newline at end of file