hydrus/include/HydrusTags.py

891 lines
31 KiB
Python

import collections
import HydrusConstants as HC
import itertools
import os
import threading
import time
import traceback
import wx
# important thing here, and reason why it is recursive, is because we want to preserve the parent-grandparent interleaving
def BuildServiceIdentifiersToChildrenToParents( service_identifiers_to_simple_children_to_parents ):
def AddParents( simple_children_to_parents, children_to_parents, child, parents ):
for parent in parents:
children_to_parents[ child ].append( parent )
if parent in simple_children_to_parents:
grandparents = simple_children_to_parents[ parent ]
AddParents( simple_children_to_parents, children_to_parents, child, grandparents )
service_identifiers_to_children_to_parents = collections.defaultdict( HC.default_dict_list )
for ( service_identifier, simple_children_to_parents ) in service_identifiers_to_simple_children_to_parents.items():
children_to_parents = service_identifiers_to_children_to_parents[ service_identifier ]
for ( child, parents ) in simple_children_to_parents.items(): AddParents( simple_children_to_parents, children_to_parents, child, parents )
return service_identifiers_to_children_to_parents
def BuildServiceIdentifiersToSimpleChildrenToParents( service_identifiers_to_pairs_flat ):
service_identifiers_to_simple_children_to_parents = collections.defaultdict( HC.default_dict_set )
for ( service_identifier, pairs ) in service_identifiers_to_pairs_flat.items():
service_identifiers_to_simple_children_to_parents[ service_identifier ] = BuildSimpleChildrenToParents( pairs )
return service_identifiers_to_simple_children_to_parents
def BuildSimpleChildrenToParents( pairs ):
simple_children_to_parents = HC.default_dict_set()
for ( child, parent ) in pairs:
if child == parent: continue
if LoopInSimpleChildrenToParents( simple_children_to_parents, child, parent ): continue
simple_children_to_parents[ child ].add( parent )
return simple_children_to_parents
def CollapseTagSiblingChains( processed_siblings ):
# now to collapse chains
# A -> B and B -> C goes to A -> C and B -> C
siblings = {}
for ( old_tag, new_tag ) in processed_siblings.items():
# adding A -> B
if new_tag in siblings:
# B -> F already calculated and added, so add A -> F
siblings[ old_tag ] = siblings[ new_tag ]
else:
while new_tag in processed_siblings: new_tag = processed_siblings[ new_tag ] # pursue endpoint F
siblings[ old_tag ] = new_tag
reverse_lookup = collections.defaultdict( list )
for ( old_tag, new_tag ) in siblings.items(): reverse_lookup[ new_tag ].append( old_tag )
return ( siblings, reverse_lookup )
def CombineTagParentPairs( tag_service_precedence, service_identifiers_to_statuses_to_pairs ):
service_identifiers_to_pairs_flat = HC.default_dict_set()
combined = service_identifiers_to_pairs_flat[ HC.COMBINED_TAG_SERVICE_IDENTIFIER ]
current_deleted_pairs = set()
for service_identifier in tag_service_precedence:
statuses_to_pairs = service_identifiers_to_statuses_to_pairs[ service_identifier ]
pairs = statuses_to_pairs[ HC.CURRENT ].union( statuses_to_pairs[ HC.PENDING ] )
service_identifiers_to_pairs_flat[ service_identifier ] = pairs
pairs.difference_update( current_deleted_pairs )
combined.update( pairs )
current_deleted_pairs.update( statuses_to_pairs[ HC.DELETED ] )
return service_identifiers_to_pairs_flat
def CombineTagSiblingPairs( tag_service_precedence, service_identifiers_to_statuses_to_pairs ):
# first combine the services
# go from high precedence to low, writing A -> B
# if A map already exists, don't overwrite
# if A -> B forms a loop, don't write it
processed_siblings = {}
current_deleted_pairs = set()
for service_identifier in tag_service_precedence:
statuses_to_pairs = service_identifiers_to_statuses_to_pairs[ service_identifier ]
pairs = statuses_to_pairs[ HC.CURRENT ].union( statuses_to_pairs[ HC.PENDING ] )
pairs.difference_update( current_deleted_pairs )
for ( old, new ) in pairs:
if old == new: continue
if old not in processed_siblings:
next_new = new
we_have_a_loop = False
while next_new in processed_siblings:
next_new = processed_siblings[ next_new ]
if next_new == old:
we_have_a_loop = True
break
if not we_have_a_loop: processed_siblings[ old ] = new
current_deleted_pairs.update( statuses_to_pairs[ HC.DELETED ] )
return processed_siblings
def LoopInSimpleChildrenToParents( simple_children_to_parents, child, parent ):
potential_loop_paths = { parent }
while len( potential_loop_paths.intersection( simple_children_to_parents.keys() ) ) > 0:
new_potential_loop_paths = set()
for potential_loop_path in potential_loop_paths.intersection( simple_children_to_parents.keys() ):
new_potential_loop_paths.update( simple_children_to_parents[ potential_loop_path ] )
potential_loop_paths = new_potential_loop_paths
if child in potential_loop_paths: return True
return False
def MergeTagsManagers( tag_service_precedence, tags_managers ):
def CurrentAndPendingFilter( items ):
for ( service_identifier, statuses_to_tags ) in items:
filtered = { status : tags for ( status, tags ) in statuses_to_tags.items() if status in ( HC.CURRENT, HC.PENDING ) }
yield ( service_identifier, filtered )
# [[( service_identifier, statuses_to_tags )]]
s_i_s_t_t_tupled = ( CurrentAndPendingFilter( tags_manager.GetServiceIdentifiersToStatusesToTags().items() ) for tags_manager in tags_managers )
# [(service_identifier, statuses_to_tags)]
flattened_s_i_s_t_t = itertools.chain.from_iterable( s_i_s_t_t_tupled )
# service_identifier : [ statuses_to_tags ]
s_i_s_t_t_dict = HC.BuildKeyToListDict( flattened_s_i_s_t_t )
# now let's merge so we have service_identifier : statuses_to_tags
merged_service_identifiers_to_statuses_to_tags = collections.defaultdict( HC.default_dict_set )
for ( service_identifier, several_statuses_to_tags ) in s_i_s_t_t_dict.items():
# [[( status, tags )]]
s_t_t_tupled = ( s_t_t.items() for s_t_t in several_statuses_to_tags )
# [( status, tags )]
flattened_s_t_t = itertools.chain.from_iterable( s_t_t_tupled )
statuses_to_tags = HC.default_dict_set()
for ( status, tags ) in flattened_s_t_t: statuses_to_tags[ status ].update( tags )
merged_service_identifiers_to_statuses_to_tags[ service_identifier ] = statuses_to_tags
return TagsManagerSimple( merged_service_identifiers_to_statuses_to_tags )
class NamespaceBlacklistsManager():
def __init__( self ):
self.RefreshData()
HC.pubsub.sub( self, 'RefreshData', 'notify_new_namespace_blacklists' )
def _GetPredicate( self, service_identifier ):
( blacklist, namespaces ) = self._service_identifiers_to_blacklists[ service_identifier ]
tag_matches = lambda tag: True in ( tag.startswith( namespace ) for namespace in namespaces )
if blacklist: predicate = lambda tag: not tag_matches( tag )
else: predicate = tag_matches
return predicate
def GetInfo( self, service_identifier ):
if service_identifier in self._service_identifiers_to_predicates: return self._service_identifiers_to_predicates[ service_identifier ]
else: return ( True, set() )
def RefreshData( self ):
info = HC.app.Read( 'namespace_blacklists' )
self._service_identifiers_to_predicates = {}
for ( service_identifier, blacklist, namespaces ) in info:
unnamespaced = '' in namespaces
ns = [ namespace for namespace in namespaces if namespace != '' ]
namespaced_match = lambda tag: True in ( tag.startswith( namespace ) for namespace in ns )
if unnamespaced:
unnamespaced_match = lambda tag: ':' not in tag
if len( ns ) > 0: tag_match = lambda tag: unnamespaced_match( tag ) or namespaced_match( tag )
else: tag_match = unnamespaced_match
else:
tag_match = namespaced_match
if blacklist: predicate = lambda tag: not tag_match( tag )
else: predicate = tag_match
self._service_identifiers_to_predicates[ service_identifier ] = predicate
def FilterServiceidentifiersToStatusesToTags( self, service_identifiers_to_statuses_to_tags ):
filtered_service_identifiers_to_statuses_to_tags = collections.defaultdict( HC.default_dict_set )
for ( service_identifier, statuses_to_tags ) in service_identifiers_to_statuses_to_tags.items():
if service_identifier in self._service_identifiers_to_predicates:
predicate = self._service_identifiers_to_predicates[ service_identifier ]
for ( status, tags ) in statuses_to_tags.items():
tags = { tag for tag in tags if predicate( tag ) }
filtered_service_identifiers_to_statuses_to_tags[ service_identifier ][ status ] = tags
else: filtered_service_identifiers_to_statuses_to_tags[ service_identifier ] = statuses_to_tags
return filtered_service_identifiers_to_statuses_to_tags
def FilterTags( self, service_identifier, tags ):
if service_identifier in self._service_identifiers_to_predicates:
predicate = self._service_identifiers_to_predicates[ service_identifier ]
tags = { tag for tag in tags if predicate( tag ) }
return tags
class TagsManagerSimple():
def __init__( self, service_identifiers_to_statuses_to_tags ):
namespace_blacklists_manager = HC.app.GetManager( 'namespace_blacklists' )
service_identifiers_to_statuses_to_tags = namespace_blacklists_manager.FilterServiceidentifiersToStatusesToTags( service_identifiers_to_statuses_to_tags )
self._service_identifiers_to_statuses_to_tags = service_identifiers_to_statuses_to_tags
self._combined_namespaces_cache = None
def GetCombinedNamespaces( self, namespaces ):
if self._combined_namespaces_cache is None:
combined_statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ HC.COMBINED_TAG_SERVICE_IDENTIFIER ]
combined_current = combined_statuses_to_tags[ HC.CURRENT ]
combined_pending = combined_statuses_to_tags[ HC.PENDING ]
self._combined_namespaces_cache = HC.BuildKeyToSetDict( tag.split( ':', 1 ) for tag in combined_current.union( combined_pending ) if ':' in tag )
only_int_allowed = ( 'volume', 'chapter', 'page' )
for namespace in only_int_allowed:
tags = self._combined_namespaces_cache[ namespace ]
int_tags = set()
for tag in tags:
try: tag = int( tag )
except: continue
int_tags.add( tag )
self._combined_namespaces_cache[ namespace ] = int_tags
result = { namespace : self._combined_namespaces_cache[ namespace ] for namespace in namespaces }
return result
def GetComparableNamespaceSlice( self, namespaces, collapse = True ):
combined_statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ HC.COMBINED_TAG_SERVICE_IDENTIFIER ]
combined_current = combined_statuses_to_tags[ HC.CURRENT ]
combined_pending = combined_statuses_to_tags[ HC.PENDING ]
combined = combined_current.union( combined_pending )
siblings_manager = HC.app.GetManager( 'tag_siblings' )
slice = []
for namespace in namespaces:
tags = [ tag for tag in combined if tag.startswith( namespace + ':' ) ]
if collapse:
tags = list( siblings_manager.CollapseTags( tags ) )
tags = [ tag.split( ':', 1 )[1] for tag in tags ]
def process_tag( t ):
try: return int( t )
except: return t
tags = [ process_tag( tag ) for tag in tags ]
tags.sort()
tags = tuple( tags )
slice.append( tags )
return tuple( slice )
def GetNamespaceSlice( self, namespaces, collapse = True ):
combined_statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ HC.COMBINED_TAG_SERVICE_IDENTIFIER ]
combined_current = combined_statuses_to_tags[ HC.CURRENT ]
combined_pending = combined_statuses_to_tags[ HC.PENDING ]
slice = { tag for tag in combined_current.union( combined_pending ) if True in ( tag.startswith( namespace + ':' ) for namespace in namespaces ) }
if collapse:
siblings_manager = HC.app.GetManager( 'tag_siblings' )
slice = siblings_manager.CollapseTags( slice )
slice = frozenset( slice )
return slice
class TagsManager( TagsManagerSimple ):
def __init__( self, tag_service_precedence, service_identifiers_to_statuses_to_tags ):
TagsManagerSimple.__init__( self, service_identifiers_to_statuses_to_tags )
self._tag_service_precedence = tag_service_precedence
self._RecalcCombined()
def _RecalcCombined( self ):
t_s_p = list( self._tag_service_precedence )
t_s_p.reverse()
combined_current = set()
combined_pending = set()
for service_identifier in t_s_p:
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
combined_current.update( statuses_to_tags[ HC.CURRENT ] )
combined_current.difference_update( statuses_to_tags[ HC.DELETED ] )
combined_pending.update( statuses_to_tags[ HC.PENDING ] )
combined_statuses_to_tags = collections.defaultdict( set )
combined_statuses_to_tags[ HC.CURRENT ] = combined_current
combined_statuses_to_tags[ HC.PENDING ] = combined_pending
self._service_identifiers_to_statuses_to_tags[ HC.COMBINED_TAG_SERVICE_IDENTIFIER ] = combined_statuses_to_tags
self._combined_namespaces_cache = None
def DeletePending( self, service_identifier ):
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
if len( statuses_to_tags[ HC.PENDING ] ) + len( statuses_to_tags[ HC.PETITIONED ] ) > 0:
statuses_to_tags[ HC.PENDING ] = set()
statuses_to_tags[ HC.PETITIONED ] = set()
self._RecalcCombined()
def GetCurrent( self, service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER ):
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
return set( statuses_to_tags[ HC.CURRENT ] )
def GetDeleted( self, service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER ):
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
return statuses_to_tags[ HC.DELETED ]
def GetNumTags( self, tag_service_identifier, include_current_tags = True, include_pending_tags = False ):
num_tags = 0
statuses_to_tags = self.GetStatusesToTags( tag_service_identifier )
if include_current_tags: num_tags += len( statuses_to_tags[ HC.CURRENT ] )
if include_pending_tags: num_tags += len( statuses_to_tags[ HC.PENDING ] )
return num_tags
def GetPending( self, service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER ):
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
return statuses_to_tags[ HC.PENDING ]
def GetPetitioned( self, service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER ):
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
return set( statuses_to_tags[ HC.PETITIONED ] )
def GetServiceIdentifiersToStatusesToTags( self ): return self._service_identifiers_to_statuses_to_tags
def GetStatusesToTags( self, service_identifier ): return self._service_identifiers_to_statuses_to_tags[ service_identifier ]
def HasTag( self, tag ):
combined_statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ HC.COMBINED_TAG_SERVICE_IDENTIFIER ]
return tag in combined_statuses_to_tags[ HC.CURRENT ] or tag in combined_statuses_to_tags[ HC.PENDING ]
def ProcessContentUpdate( self, service_identifier, content_update ):
statuses_to_tags = self._service_identifiers_to_statuses_to_tags[ service_identifier ]
( data_type, action, row ) = content_update.ToTuple()
if action == HC.CONTENT_UPDATE_PETITION: ( tag, hashes, reason ) = row
else: ( tag, hashes ) = row
if action == HC.CONTENT_UPDATE_ADD:
statuses_to_tags[ HC.CURRENT ].add( tag )
statuses_to_tags[ HC.DELETED ].discard( tag )
statuses_to_tags[ HC.PENDING ].discard( tag )
elif action == HC.CONTENT_UPDATE_DELETE:
statuses_to_tags[ HC.DELETED ].add( tag )
statuses_to_tags[ HC.CURRENT ].discard( tag )
statuses_to_tags[ HC.PETITIONED ].discard( tag )
elif action == HC.CONTENT_UPDATE_PENDING: statuses_to_tags[ HC.PENDING ].add( tag )
elif action == HC.CONTENT_UPDATE_RESCIND_PENDING: statuses_to_tags[ HC.PENDING ].discard( tag )
elif action == HC.CONTENT_UPDATE_PETITION: statuses_to_tags[ HC.PETITIONED ].add( tag )
elif action == HC.CONTENT_UPDATE_RESCIND_PETITION: statuses_to_tags[ HC.PETITIONED ].discard( tag )
self._RecalcCombined()
def ResetService( self, service_identifier ):
if service_identifier in self._service_identifiers_to_statuses_to_tags:
del self._service_identifiers_to_statuses_to_tags[ service_identifier ]
self._RecalcCombined()
class TagParentsManager():
def __init__( self ):
self._tag_service_precedence = HC.app.Read( 'tag_service_precedence' )
self._RefreshParents()
self._lock = threading.Lock()
HC.pubsub.sub( self, 'RefreshParents', 'notify_new_parents' )
def _RefreshParents( self ):
service_identifiers_to_statuses_to_pairs = HC.app.Read( 'tag_parents' )
#
sibling_manager = HC.app.GetManager( 'tag_siblings' )
result = collections.defaultdict( HC.default_dict_set )
for ( service_identifier, statuses_to_pairs ) in service_identifiers_to_statuses_to_pairs.items():
for ( status, pairs ) in statuses_to_pairs.items():
pairs = sibling_manager.CollapsePairs( pairs )
result[ service_identifier ][ status ] = pairs
service_identifiers_to_statuses_to_pairs = result
#
t_s_p = list( self._tag_service_precedence )
service_identifiers_to_pairs_flat = CombineTagParentPairs( t_s_p, service_identifiers_to_statuses_to_pairs )
service_identifiers_to_simple_children_to_parents = BuildServiceIdentifiersToSimpleChildrenToParents( service_identifiers_to_pairs_flat )
self._service_identifiers_to_children_to_parents = BuildServiceIdentifiersToChildrenToParents( service_identifiers_to_simple_children_to_parents )
def ExpandPredicates( self, service_identifier, predicates ):
# for now -- we will make an option, later
service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER
results = []
with self._lock:
for predicate in predicates:
results.append( predicate )
if predicate.GetPredicateType() == HC.PREDICATE_TYPE_TAG:
tag = predicate.GetTag()
parents = self._service_identifiers_to_children_to_parents[ service_identifier ][ tag ]
for parent in parents:
parent_predicate = HC.Predicate( HC.PREDICATE_TYPE_PARENT, parent, None )
results.append( parent_predicate )
return results
def ExpandTags( self, service_identifier, tags ):
with self._lock:
# for now -- we will make an option, later
service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER
tags_results = set( tags )
for tag in tags: tags_results.update( self._service_identifiers_to_children_to_parents[ service_identifier ][ tag ] )
return tags_results
def GetParents( self, service_identifier, tag ):
with self._lock:
# for now -- we will make an option, later
service_identifier = HC.COMBINED_TAG_SERVICE_IDENTIFIER
return self._service_identifiers_to_children_to_parents[ service_identifier ][ tag ]
def RefreshParents( self ):
with self._lock: self._RefreshParents()
class TagSiblingsManager():
def __init__( self ):
self._tag_service_precedence = HC.app.Read( 'tag_service_precedence' )
# I should offload this to a thread (rather than the gui thread), and have an event to say when it is ready
# gui requests should pause until it is ready, which should kick in during refreshes, too!
self._RefreshSiblings()
self._lock = threading.Lock()
HC.pubsub.sub( self, 'RefreshSiblings', 'notify_new_siblings' )
def _RefreshSiblings( self ):
service_identifiers_to_statuses_to_pairs = HC.app.Read( 'tag_siblings' )
tag_service_precedence = list( self._tag_service_precedence )
processed_siblings = CombineTagSiblingPairs( tag_service_precedence, service_identifiers_to_statuses_to_pairs )
( self._siblings, self._reverse_lookup ) = CollapseTagSiblingChains( processed_siblings )
HC.pubsub.pub( 'new_siblings_gui' )
def GetAutocompleteSiblings( self, half_complete_tag ):
with self._lock:
key_based_matching_values = { self._siblings[ key ] for key in self._siblings.keys() if HC.SearchEntryMatchesTag( half_complete_tag, key, search_siblings = False ) }
value_based_matching_values = { value for value in self._siblings.values() if HC.SearchEntryMatchesTag( half_complete_tag, value, search_siblings = False ) }
matching_values = key_based_matching_values.union( value_based_matching_values )
# all the matching values have a matching sibling somewhere in their network
# so now fetch the networks
lists_of_matching_keys = [ self._reverse_lookup[ value ] for value in matching_values ]
matching_keys = itertools.chain.from_iterable( lists_of_matching_keys )
matches = matching_values.union( matching_keys )
return matches
def GetSibling( self, tag ):
with self._lock:
if tag in self._siblings: return self._siblings[ tag ]
else: return None
def GetAllSiblings( self, tag ):
with self._lock:
if tag in self._siblings:
new_tag = self._siblings[ tag ]
elif tag in self._reverse_lookup: new_tag = tag
else: return [ tag ]
all_siblings = list( self._reverse_lookup[ new_tag ] )
all_siblings.append( new_tag )
return all_siblings
def RefreshSiblings( self ):
with self._lock: self._RefreshSiblings()
def CollapseNamespacedTags( self, namespace, tags ):
with self._lock:
results = set()
for tag in tags:
full_tag = namespace + ':' + tag
if full_tag in self._siblings:
sibling = self._siblings[ full_tag ]
if ':' in sibling: sibling = sibling.split( ':', 1 )[1]
results.add( sibling )
else: results.add( tag )
return results
def CollapsePredicates( self, predicates ):
with self._lock:
results = [ predicate for predicate in predicates if predicate.GetPredicateType() != HC.PREDICATE_TYPE_TAG ]
tag_predicates = [ predicate for predicate in predicates if predicate.GetPredicateType() == HC.PREDICATE_TYPE_TAG ]
tags_to_predicates = { predicate.GetTag() : predicate for predicate in predicates if predicate.GetPredicateType() == HC.PREDICATE_TYPE_TAG }
tags = tags_to_predicates.keys()
tags_to_include_in_results = set()
for tag in tags:
if tag in self._siblings:
old_tag = tag
old_predicate = tags_to_predicates[ old_tag ]
new_tag = self._siblings[ old_tag ]
if new_tag not in tags_to_predicates:
( old_operator, old_tag ) = old_predicate.GetValue()
new_predicate = HC.Predicate( HC.PREDICATE_TYPE_TAG, ( old_operator, new_tag ), 0 )
tags_to_predicates[ new_tag ] = new_predicate
tags_to_include_in_results.add( new_tag )
new_predicate = tags_to_predicates[ new_tag ]
count = old_predicate.GetCount()
new_predicate.AddToCount( count )
else: tags_to_include_in_results.add( tag )
results.extend( [ tags_to_predicates[ tag ] for tag in tags_to_include_in_results ] )
return results
def CollapsePairs( self, pairs ):
with self._lock:
result = set()
for ( a, b ) in pairs:
if a in self._siblings: a = self._siblings[ a ]
if b in self._siblings: b = self._siblings[ b ]
result.add( ( a, b ) )
return result
def CollapseTags( self, tags ):
with self._lock: return { self._siblings[ tag ] if tag in self._siblings else tag for tag in tags }
def CollapseTagsToCount( self, tags_to_count ):
with self._lock:
results = collections.Counter()
for ( tag, count ) in tags_to_count.items():
if tag in self._siblings: tag = self._siblings[ tag ]
results[ tag ] += count
return results