import collections
import cProfile
import io
import itertools
import os
import pstats
import psutil
import random
import re
import sqlite3
import struct
import subprocess
import sys
import threading
import time
import traceback
import typing
import yaml
from hydrus.core import HydrusBoot
from hydrus.core import HydrusConstants as HC
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusGlobals as HG
from hydrus.core import HydrusText
def default_dict_list(): return collections.defaultdict( list )
def default_dict_set(): return collections.defaultdict( set )
def BuildKeyToListDict( pairs ):
d = collections.defaultdict( list )
for ( key, value ) in pairs: d[ key ].append( value )
return d
def BuildKeyToSetDict( pairs ):
d = collections.defaultdict( set )
for ( key, value ) in pairs: d[ key ].add( value )
return d
def CalculateScoreFromRating( count, rating ):
# https://www.evanmiller.org/how-not-to-sort-by-average-rating.html
positive = count * rating
negative = count * ( 1.0 - rating )
# positive + negative = count
# I think I've parsed this correctly from the website! Not sure though!
score = ( ( positive + 1.9208 ) / count - 1.96 * ( ( ( positive * negative ) / count + 0.9604 ) ** 0.5 ) / count ) / ( 1 + 3.8416 / count )
return score
def CheckProgramIsNotShuttingDown():
if HG.model_shutdown:
raise HydrusExceptions.ShutdownException( 'Application is shutting down!' )
def CleanRunningFile( db_path, instance ):
# just to be careful
path = os.path.join( db_path, instance + '_running' )
os.remove( path )
def ConvertFloatToPercentage( f ):
return '{:.1f}%'.format( f * 100 )
def ConvertIntToPixels( i ):
if i == 1: return 'pixels'
elif i == 1000: return 'kilopixels'
elif i == 1000000: return 'megapixels'
else: return 'megapixels'
def ConvertIndexToPrettyOrdinalString( index: int ):
if index >= 0:
return ConvertIntToPrettyOrdinalString( index + 1 )
return ConvertIntToPrettyOrdinalString( index )
def ConvertIntToPrettyOrdinalString( num: int ):
if num == 0:
return 'unknown position'
remainder = abs( num ) % 10
if remainder == 1:
ordinal = 'st'
elif remainder == 2:
ordinal = 'nd'
elif remainder == 3:
ordinal = 'rd'
ordinal = 'th'
s = '{}{}'.format( ToHumanInt( abs( num ) ), ordinal )
if num < 0:
s = '{} from last'.format( s )
return s
def ConvertIntToUnit( unit ):
if unit == 1: return 'B'
elif unit == 1024: return 'KB'
elif unit == 1048576: return 'MB'
elif unit == 1073741824: return 'GB'
def ConvertMillisecondsToPrettyTime( ms ):
hours = ms // 3600000
if hours == 1: hours_result = '1 hour'
else: hours_result = str( hours ) + ' hours'
ms = ms % 3600000
minutes = ms // 60000
if minutes == 1: minutes_result = '1 minute'
else: minutes_result = str( minutes ) + ' minutes'
ms = ms % 60000
seconds = ms // 1000
if seconds == 1: seconds_result = '1 second'
else: seconds_result = str( seconds ) + ' seconds'
detailed_seconds = ms / 1000
detailed_seconds_result = '{:.1f} seconds'.format( detailed_seconds )
ms = ms % 1000
if hours > 0: return hours_result + ' ' + minutes_result
if minutes > 0: return minutes_result + ' ' + seconds_result
if seconds > 0: return detailed_seconds_result
ms = int( ms )
if ms == 1: milliseconds_result = '1 millisecond'
else: milliseconds_result = '{} milliseconds'.format( ms )
return milliseconds_result
def ConvertNumericalRatingToPrettyString( lower, upper, rating, rounded_result = False, out_of = True ):
rating_converted = ( rating * ( upper - lower ) ) + lower
if rounded_result:
rating_converted = round( rating_converted )
s = '{:.2f}'.format( rating_converted )
if out_of and lower in ( 0, 1 ):
s += '/{:.2f}'.format( upper )
return s
def ConvertPixelsToInt( unit ):
if unit == 'pixels': return 1
elif unit == 'kilopixels': return 1000
elif unit == 'megapixels': return 1000000
def ConvertPrettyStringsToUglyNamespaces( pretty_strings ):
result = { s for s in pretty_strings if s != 'no namespace' }
if 'no namespace' in pretty_strings: result.add( '' )
return result
def ConvertResolutionToPrettyString( resolution ):
( width, height ) = resolution
return ToHumanInt( width ) + 'x' + ToHumanInt( height )
def ConvertStatusToPrefix( status ):
if status == HC.CONTENT_STATUS_CURRENT: return ''
elif status == HC.CONTENT_STATUS_PENDING: return '(+) '
elif status == HC.CONTENT_STATUS_PETITIONED: return '(-) '
elif status == HC.CONTENT_STATUS_DELETED: return '(X) '
def TimeDeltaToPrettyTimeDelta( seconds, show_seconds = True ):
if seconds is None:
return 'per month'
if seconds == 0:
return '0 seconds'
if seconds < 0:
seconds = abs( seconds )
if seconds >= 60:
seconds = int( seconds )
DAY = 24 * HOUR
MONTH = 30 * DAY
lines = []
lines.append( ( 'year', YEAR ) )
lines.append( ( 'month', MONTH ) )
lines.append( ( 'day', DAY ) )
lines.append( ( 'hour', HOUR ) )
lines.append( ( 'minute', MINUTE ) )
if show_seconds:
lines.append( ( 'second', 1 ) )
result_components = []
for ( time_string, duration ) in lines:
time_quantity = seconds // duration
seconds %= duration
if time_quantity > 0:
s = ToHumanInt( time_quantity ) + ' ' + time_string
if time_quantity > 1:
s += 's'
result_components.append( s )
if len( result_components ) == 2: # we now have 1 month 2 days
if len( result_components ) > 0: # something like '1 year' -- in which case we do not care about the days and hours
result = ' '.join( result_components )
elif seconds > 1:
if int( seconds ) == seconds:
result = ToHumanInt( seconds ) + ' seconds'
result = '{:.1f} seconds'.format( seconds )
elif seconds == 1:
result = '1 second'
elif seconds > 0.1:
result = '{} milliseconds'.format( int( seconds * 1000 ) )
elif seconds > 0.01:
result = '{:.1f} milliseconds'.format( int( seconds * 1000 ) )
elif seconds > 0.001:
result = '{:.2f} milliseconds'.format( int( seconds * 1000 ) )
result = '{} microseconds'.format( int( seconds * 1000000 ) )
return result
def ConvertTimestampToPrettyExpires( timestamp ):
if timestamp is None:
return 'does not expire'
if timestamp == 0:
return 'unknown expiration'
time_delta_string = TimestampToPrettyTimeDelta( timestamp )
if TimeHasPassed( timestamp ):
return 'expired ' + time_delta_string
return 'expires ' + time_delta_string
return 'unparseable time {}'.format( timestamp )
def ConvertTimestampToPrettyTime( timestamp, in_utc = False, include_24h_time = True ):
if timestamp is None:
return 'no time given'
if include_24h_time:
phrase = '%Y-%m-%d %H:%M:%S'
phrase = '%Y-%m-%d'
if in_utc:
struct_time = time.gmtime( timestamp )
phrase = phrase + ' UTC'
struct_time = time.localtime( timestamp )
return time.strftime( phrase, struct_time )
return 'unparseable time {}'.format( timestamp )
def TimestampToPrettyTimeDelta( timestamp, just_now_string = 'now', just_now_threshold = 3, show_seconds = True ):
if timestamp is None:
timestamp = 0
if not show_seconds:
just_now_threshold = max( just_now_threshold, 60 )
time_delta = abs( timestamp - GetNow() )
if time_delta <= just_now_threshold:
return just_now_string
time_delta_string = TimeDeltaToPrettyTimeDelta( time_delta, show_seconds = show_seconds )
if TimeHasPassed( timestamp ):
return time_delta_string + ' ago'
return 'in ' + time_delta_string
return 'unparseable time {}'.format( timestamp )
def ConvertUglyNamespaceToPrettyString( namespace ):
if namespace is None or namespace == '':
return 'no namespace'
return namespace
def ConvertUglyNamespacesToPrettyStrings( namespaces ):
namespaces = sorted( namespaces )
result = [ ConvertUglyNamespaceToPrettyString( namespace ) for namespace in namespaces ]
return result
def ConvertUnitToInt( unit ):
if unit == 'B': return 1
elif unit == 'KB': return 1024
elif unit == 'MB': return 1048576
elif unit == 'GB': return 1073741824
def ConvertValueRangeToBytes( value, range ):
return ToHumanBytes( value ) + '/' + ToHumanBytes( range )
def ConvertValueRangeToPrettyString( value, range ):
return ToHumanInt( value ) + '/' + ToHumanInt( range )
def ConvertValueRangeToScanbarTimestampsMS( value_ms, range_ms ):
value_ms = int( round( value_ms ) )
range_hours = range_ms // 3600000
value_hours = value_ms // 3600000
range_minutes = ( range_ms % 3600000 ) // 60000
value_minutes = ( value_ms % 3600000 ) // 60000
range_seconds = ( range_ms % 60000 ) // 1000
value_seconds = ( value_ms % 60000 ) // 1000
range_ms = range_ms % 1000
value_ms = value_ms % 1000
if range_hours > 0:
# 0:01:23.033/1:12:57.067
time_phrase = '{}:{:0>2}:{:0>2}.{:0>3}'
args = ( value_hours, value_minutes, value_seconds, value_ms, range_hours, range_minutes, range_seconds, range_ms )
elif range_minutes > 0:
# 01:23.033/12:57.067 or 0:23.033/1:57.067
if range_minutes > 9:
time_phrase = '{:0>2}:{:0>2}.{:0>3}'
time_phrase = '{:0>1}:{:0>2}.{:0>3}'
args = ( value_minutes, value_seconds, value_ms, range_minutes, range_seconds, range_ms )
# 23.033/57.067 or 3.033/7.067 or 0.033/0.067
if range_seconds > 9:
time_phrase = '{:0>2}.{:0>3}'
time_phrase = '{:0>1}.{:0>3}'
args = ( value_seconds, value_ms, range_seconds, range_ms )
full_phrase = '{}/{}'.format( time_phrase, time_phrase )
result = full_phrase.format( *args )
return result
def DebugPrint( debug_info ):
Print( debug_info )
def DedupeList( xs ):
xs_seen = set()
xs_return = []
for x in xs:
if x in xs_seen:
xs_return.append( x )
xs_seen.add( x )
return xs_return
def GenerateKey():
return os.urandom( HC.HYDRUS_KEY_LENGTH )
def Get64BitHammingDistance( phash1, phash2 ):
# old way of doing this was:
#while xor > 0:
# distance += 1
# xor &= xor - 1
# convert to unsigned long long, then xor
# then through the power of stackexchange magic, we get number of bits in record time
# Here it is: https://stackoverflow.com/questions/9829578/fast-way-of-counting-non-zero-bits-in-positive-integer/9830282#9830282
n = struct.unpack( '!Q', phash1 )[0] ^ struct.unpack( '!Q', phash2 )[0]
n = ( n & 0x5555555555555555 ) + ( ( n & 0xAAAAAAAAAAAAAAAA ) >> 1 ) # 10101010, 01010101
n = ( n & 0x3333333333333333 ) + ( ( n & 0xCCCCCCCCCCCCCCCC ) >> 2 ) # 11001100, 00110011
n = ( n & 0x0F0F0F0F0F0F0F0F ) + ( ( n & 0xF0F0F0F0F0F0F0F0 ) >> 4 ) # 11110000, 00001111
n = ( n & 0x00FF00FF00FF00FF ) + ( ( n & 0xFF00FF00FF00FF00 ) >> 8 ) # etc...
n = ( n & 0x0000FFFF0000FFFF ) + ( ( n & 0xFFFF0000FFFF0000 ) >> 16 )
n = ( n & 0x00000000FFFFFFFF ) + ( n >> 32 )
# you technically are going n & 0xFFFFFFFF00000000 at the end, but that's a no-op with the >> 32 afterwards, so can be omitted
return n
def GetEmptyDataDict():
data = collections.defaultdict( default_dict_list )
return data
def GetNonDupeName( original_name, disallowed_names ):
i = 1
non_dupe_name = original_name
while non_dupe_name in disallowed_names:
non_dupe_name = original_name + ' (' + str( i ) + ')'
i += 1
return non_dupe_name
def GetNow():
return int( time.time() )
def GetNowFloat():
return time.time()
def GetNowPrecise():
return time.perf_counter()
def GetSiblingProcessPorts( db_path, instance ):
path = os.path.join( db_path, instance + '_running' )
if os.path.exists( path ):
with open( path, 'r', encoding = 'utf-8' ) as f:
file_text = f.read()
( pid, create_time ) = HydrusText.DeserialiseNewlinedTexts( file_text )
pid = int( pid )
create_time = float( create_time )
except ValueError:
return None
if psutil.pid_exists( pid ):
ports = []
p = psutil.Process( pid )
for conn in p.connections():
if conn.status == 'LISTEN':
ports.append( int( conn.laddr[1] ) )
return ports
except psutil.Error:
return None
return None
def GetSubprocessEnv():
if HG.subprocess_report_mode:
env = os.environ.copy()
ShowText( 'Your unmodified env is: {}'.format( env ) )
env = os.environ.copy()
if HydrusBoot.ORIGINAL_PATH is not None:
env[ 'PATH' ] = HydrusBoot.ORIGINAL_PATH
# let's make a proper env for subprocess that doesn't have pyinstaller woo woo in it
changes_made = False
orig_swaperoo_strings = [ 'LD_LIBRARY_PATH', 'XDG_DATA_DIRS' ]
ok_to_remove_absent_orig = [ 'LD_LIBRARY_PATH' ]
for key in orig_swaperoo_strings:
orig_key = '{}_ORIG'.format( key )
if orig_key in env:
env[ key ] = env[ orig_key ]
changes_made = True
elif key in env and key in ok_to_remove_absent_orig:
del env[ key ]
changes_made = True
remove_if_hydrus_base_dir = [ 'QT_PLUGIN_PATH', 'QML2_IMPORT_PATH', 'SSL_CERT_FILE' ]
hydrus_base_dir = HG.controller.GetDBDir()
for key in remove_if_hydrus_base_dir:
if key in env and env[ key ].startswith( hydrus_base_dir ):
del env[ key ]
changes_made = True
if 'PATH' in env:
# fix for pyinstaller, which drops this stuff for some reason and hence breaks ffmpeg
path = env[ 'PATH' ]
path_locations = set( path.split( ':' ) )
desired_path_locations = [ '/usr/bin', '/usr/local/bin' ]
for desired_path_location in desired_path_locations:
if desired_path_location not in path_locations:
path = desired_path_location + ':' + path
env[ 'PATH' ] = path
changes_made = True
if 'XDG_DATA_DIRS' in env:
xdg_data_dirs = env[ 'XDG_DATA_DIRS' ]
# pyinstaller can just replace this nice usually long str with multiple paths with base_dir/share
# absent the _orig above to rescue this, we'll populate with basic
if ':' not in xdg_data_dirs and HC.BASE_DIR in xdg_data_dirs:
xdg_data_dirs = '/usr/local/share:/usr/share'
changes_made = True
if not changes_made:
env = None
env = None
return env
def GetSubprocessHideTerminalStartupInfo():
# This suppresses the terminal window that tends to pop up when calling ffmpeg or whatever
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo = None
return startupinfo
def GetSubprocessKWArgs( hide_terminal = True, text = False ):
sbp_kwargs = {}
sbp_kwargs[ 'env' ] = GetSubprocessEnv()
if text:
# probably need to override the stdXXX pipes with i/o encoding wrappers in the case of 3.5 here
if sys.version_info.minor >= 6:
sbp_kwargs[ 'encoding' ] = 'utf-8'
if sys.version_info.minor >= 7:
sbp_kwargs[ 'text' ] = True
sbp_kwargs[ 'universal_newlines' ] = True
if hide_terminal:
sbp_kwargs[ 'startupinfo' ] = GetSubprocessHideTerminalStartupInfo()
if HG.subprocess_report_mode:
message = 'KWargs are: {}'.format( sbp_kwargs )
ShowText( message )
return sbp_kwargs
def GetTimeDeltaSinceTime( timestamp ):
time_since = timestamp - GetNow()
result = min( time_since, 0 )
return - result
def GetTimeDeltaUntilTime( timestamp ):
time_remaining = timestamp - GetNow()
return max( time_remaining, 0 )
def GetTimeDeltaUntilTimeFloat( timestamp ):
time_remaining = timestamp - GetNowFloat()
return max( time_remaining, 0.0 )
def GetTimeDeltaUntilTimePrecise( t ):
time_remaining = t - GetNowPrecise()
return max( time_remaining, 0.0 )
def GetTypeName( obj_type ):
if hasattr( obj_type, '__name__' ):
return obj_type.__name__
return repr( obj_type )
def GenerateHumanTextSortKey():
"""Solves the 19, 20, 200, 21, 22 issue when sorting 'Page 21.jpg' type strings.
Breaks the string into groups of text and int (i.e. ( "Page ", 21, ".jpg" ) )."""
int_convert = lambda t: int( t ) if t.isdecimal() else t
split_alphanum = lambda t: tuple( ( int_convert( sub_t ) for sub_t in re.split( '([0-9]+)', t.lower() ) ) )
return split_alphanum
HumanTextSortKey = GenerateHumanTextSortKey()
def HumanTextSort( texts ):
texts.sort( key = HumanTextSortKey )
def IntelligentMassIntersect( sets_to_reduce ):
answer = None
for set_to_reduce in sets_to_reduce:
if len( set_to_reduce ) == 0:
return set()
if answer is None:
answer = set( set_to_reduce )
if len( answer ) == 0:
return set()
answer.intersection_update( set_to_reduce )
if answer is None:
return set()
return answer
def IsAlreadyRunning( db_path, instance ):
path = os.path.join( db_path, instance + '_running' )
if os.path.exists( path ):
with open( path, 'r', encoding = 'utf-8' ) as f:
file_text = f.read()
( pid, create_time ) = HydrusText.DeserialiseNewlinedTexts( file_text )
pid = int( pid )
create_time = float( create_time )
except ValueError:
return False
me = psutil.Process()
if me.pid == pid and me.create_time() == create_time:
# this is me! there is no conflict, lol!
# this happens when a linux process restarts with os.execl(), for instance (unlike Windows, it keeps its pid)
return False
if psutil.pid_exists( pid ):
p = psutil.Process( pid )
if p.create_time() == create_time and p.is_running():
return True
except psutil.Error:
return False
return False
def IterateHexPrefixes():
hex_chars = '0123456789abcdef'
for ( one, two ) in itertools.product( hex_chars, hex_chars ):
prefix = one + two
yield prefix
def LastShutdownWasBad( db_path, instance ):
path = os.path.join( db_path, instance + '_running' )
if os.path.exists( path ):
return True
return False
def MassUnion( lists ):
return { item for item in itertools.chain.from_iterable( lists ) }
def MedianPop( population ):
# assume it has at least one and comes sorted
median_index = len( population ) // 2
row = population.pop( median_index )
return row
def MergeKeyToListDicts( key_to_list_dicts ):
result = collections.defaultdict( list )
for key_to_list_dict in key_to_list_dicts:
for ( key, value ) in list(key_to_list_dict.items()): result[ key ].extend( value )
return result
def PartitionIterator( pred: typing.Callable[ [ object ], bool ], stream: typing.Iterable[ object ] ):
( t1, t2 ) = itertools.tee( stream )
return ( itertools.filterfalse( pred, t1 ), filter( pred, t2 ) )
def PartitionIteratorIntoLists( pred: typing.Callable[ [ object ], bool ], stream: typing.Iterable[ object ] ):
( a, b ) = PartitionIterator( pred, stream )
return ( list( a ), list( b ) )
def ParseHashesFromRawHexText( hash_type, hex_hashes_raw ):
hash_type_to_hex_length = {
'md5' : 32,
'sha1' : 40,
'sha256' : 64,
'sha512' : 128
hex_hashes = HydrusText.DeserialiseNewlinedTexts( hex_hashes_raw )
# convert md5:abcd to abcd
hex_hashes = [ hex_hash.split( ':' )[-1] for hex_hash in hex_hashes ]
hex_hashes = [ HydrusText.HexFilter( hex_hash ) for hex_hash in hex_hashes ]
expected_hex_length = hash_type_to_hex_length[ hash_type ]
bad_hex_hashes = [ hex_hash for hex_hash in hex_hashes if len( hex_hash ) != expected_hex_length ]
if len( bad_hex_hashes ):
m = 'Sorry, {} hashes should have {} hex characters! These did not:'.format( hash_type, expected_hex_length )
m += os.linesep * 2
m += os.linesep.join( ( '{} ({} characters)'.format( bad_hex_hash, len( bad_hex_hash ) ) for bad_hex_hash in bad_hex_hashes ) )
raise Exception( m )
hex_hashes = [ hex_hash for hex_hash in hex_hashes if len( hex_hash ) % 2 == 0 ]
hex_hashes = DedupeList( hex_hashes )
hashes = tuple( [ bytes.fromhex( hex_hash ) for hex_hash in hex_hashes ] )
return hashes
def Print( text ):
print( str( text ) )
print( repr( text ) )
ShowText = Print
def PrintException( e, do_wait = True ):
if isinstance( e, HydrusExceptions.ShutdownException ):
etype = type( e )
( etype, value, tb ) = sys.exc_info()
if etype is None:
etype = type( e )
value = str( e )
trace = 'No error trace'
trace = ''.join( traceback.format_exception( etype, value, tb ) )
stack_list = traceback.format_stack()
stack = ''.join( stack_list )
message = str( etype.__name__ ) + ': ' + str( value ) + os.linesep + trace + os.linesep + stack
Print( '' )
Print( 'Exception:' )
DebugPrint( message )
if do_wait:
time.sleep( 1 )
ShowException = PrintException
def Profile( summary, code, g, l, min_duration_ms = 20 ):
profile = cProfile.Profile()
started = GetNowPrecise()
profile.runctx( code, g, l )
finished = GetNowPrecise()
time_took = finished - started
time_took_ms = int( time_took * 1000.0 )
if time_took_ms > min_duration_ms:
output = io.StringIO()
stats = pstats.Stats( profile, stream = output )
stats.sort_stats( 'tottime' )
output.write( 'Stats' )
output.write( os.linesep * 2 )
output.write( 'Callers' )
output.write( os.linesep * 2 )
output.seek( 0 )
details = output.read()
summary += ' - It took ' + TimeDeltaToPrettyTimeDelta( time_took ) + '.'
details = ''
HG.controller.PrintProfile( summary, details )
def PullNFromIterator( iterator, n ):
chunk = []
for item in iterator:
chunk.append( item )
if len( chunk ) == n:
return chunk
return chunk
def RandomPop( population ):
random_index = random.randint( 0, len( population ) - 1 )
row = population.pop( random_index )
return row
def RecordRunningStart( db_path, instance ):
path = os.path.join( db_path, instance + '_running' )
record_string = ''
me = psutil.Process()
record_string += str( me.pid )
record_string += os.linesep
record_string += str( me.create_time() )
except psutil.Error:
with open( path, 'w', encoding = 'utf-8' ) as f:
f.write( record_string )
def RestartProcess():
time.sleep( 1 ) # time for ports to unmap
exe = sys.executable
me = sys.argv[0]
# exe is python's exe, me is the script
args = [ sys.executable ] + sys.argv
# we are running a frozen release--both exe and me are the built exe
# wrap it in quotes because pyinstaller passes it on as raw text, breaking any path with spaces :/
if not me.startswith( '"' ):
me = '"{}"'.format( me )
args = [ me ] + sys.argv[1:]
os.execv( exe, args )
def SampleSetByGettingFirst( s: set, n ):
# sampling from a big set can be slow, so if we don't care about super random, let's just rip off the front and let __hash__ be our random
n = min( len( s ), n )
sample = set()
if n == 0:
return sample
for ( i, obj ) in enumerate( s ):
sample.add( obj )
if i >= n - 1:
return sample
def SetsIntersect( a, b ):
# not a.isdisjoint( b )
if not isinstance( a, set ):
a = set( a )
if not isinstance( b, set ):
b = set( b )
if len( a ) > len( b ):
( a, b ) = ( b, a )
return True in ( i in b for i in a )
def SmoothOutMappingIterator( xs, n ):
# de-spikifies mappings, so if there is ( tag, 20k files ), it breaks that up into manageable chunks
chunk_weight = 0
chunk = []
for ( tag_item, hash_items ) in xs:
for chunk_of_hash_items in SplitIteratorIntoChunks( hash_items, n ):
yield ( tag_item, chunk_of_hash_items )
def SplayListForDB( xs ):
return '(' + ','.join( ( str( x ) for x in xs ) ) + ')'
def SplitIteratorIntoChunks( iterator, n ):
chunk = []
for item in iterator:
chunk.append( item )
if len( chunk ) == n:
yield chunk
chunk = []
if len( chunk ) > 0:
yield chunk
def SplitIteratorIntoAutothrottledChunks( iterator, starting_n, precise_time_to_stop ):
n = starting_n
chunk = PullNFromIterator( iterator, n )
while len( chunk ) > 0:
time_work_started = GetNowPrecise()
yield chunk
work_time = GetNowPrecise() - time_work_started
items_per_second = n / work_time
time_remaining = precise_time_to_stop - GetNowPrecise()
if TimeHasPassedPrecise( precise_time_to_stop ):
n = 1
expected_items_in_remaining_time = max( 1, int( time_remaining * items_per_second ) )
quad_speed = n * 4
n = min( quad_speed, expected_items_in_remaining_time )
chunk = PullNFromIterator( iterator, n )
def SplitListIntoChunks( xs, n ):
if isinstance( xs, set ):
xs = list( xs )
for i in range( 0, len( xs ), n ):
yield xs[ i : i + n ]
def SplitMappingIteratorIntoAutothrottledChunks( iterator, starting_n, precise_time_to_stop ):
n = starting_n
chunk_weight = 0
chunk = []
for ( tag_item, hash_items ) in iterator:
chunk.append( ( tag_item, hash_items ) )
chunk_weight += len( hash_items )
if chunk_weight >= n:
time_work_started = GetNowPrecise()
yield chunk
work_time = GetNowPrecise() - time_work_started
chunk_weight = 0
chunk = []
items_per_second = n / work_time
time_remaining = precise_time_to_stop - GetNowPrecise()
if TimeHasPassedPrecise( precise_time_to_stop ):
n = 1
expected_items_in_remaining_time = max( 1, int( time_remaining * items_per_second ) )
quad_speed = n * 4
n = min( quad_speed, expected_items_in_remaining_time )
if len( chunk ) > 0:
yield chunk
def TimeHasPassed( timestamp ):
if timestamp is None:
return False
return GetNow() > timestamp
def TimeHasPassedFloat( timestamp ):
return GetNowFloat() > timestamp
def TimeHasPassedPrecise( precise_timestamp ):
return GetNowPrecise() > precise_timestamp
def TimeUntil( timestamp ):
return timestamp - GetNow()
def ToHumanBytes( size ):
if size is None:
return 'unknown size'
if size < 1024:
return ToHumanInt( size ) + 'B'
suffixes = ( '', 'K', 'M', 'G', 'T', 'P' )
suffix_index = 0
while size >= 1024:
size = size / 1024
suffix_index += 1
suffix = suffixes[ suffix_index ]
if size < 10.0:
# 3.1MB
return '{:.1f}{}B'.format( size, suffix )
# 23MB
return '{:.0f}{}B'.format( size, suffix )
def ToHumanInt( num ):
num = int( num )
# this got stomped on by mpv, which resets locale
#text = locale.format_string( '%d', num, grouping = True )
text = '{:,}'.format( num )
return text
def WaitForProcessToFinish( p, timeout ):
started = GetNow()
while p.poll() is None:
if TimeHasPassed( started + timeout ):
raise Exception( 'Process did not finish within ' + ToHumanInt( timeout ) + ' seconds!' )
time.sleep( 2 )
class HydrusYAMLBase( yaml.YAMLObject ):
yaml_loader = yaml.SafeLoader
yaml_dumper = yaml.SafeDumper
class AccountType( HydrusYAMLBase ):
yaml_tag = '!AccountType'
def __init__( self, title, permissions, max_monthly_data ):
HydrusYAMLBase.__init__( self )
self._title = title
self._permissions = permissions
self._max_monthly_data = max_monthly_data
def __repr__( self ): return self.ConvertToString()
def GetPermissions( self ): return self._permissions
def GetTitle( self ): return self._title
def GetMaxBytes( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
return max_num_bytes
def GetMaxRequests( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
return max_num_requests
def GetMaxBytesString( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
if max_num_bytes is None: max_num_bytes_string = 'No limit'
else: max_num_bytes_string = ToHumanBytes( max_num_bytes )
return max_num_bytes_string
def GetMaxRequestsString( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
if max_num_requests is None: max_num_requests_string = 'No limit'
else: max_num_requests_string = ToHumanInt( max_num_requests )
return max_num_requests_string
def ConvertToString( self ):
result_string = self._title + ' with '
if self._permissions == [ HC.UNKNOWN_PERMISSION ]: result_string += 'no permissions'
else: result_string += ', '.join( [ HC.permissions_string_lookup[ permission ] for permission in self._permissions ] ) + ' permissions'
return result_string
def IsUnknownAccountType( self ): return self._permissions == [ HC.UNKNOWN_PERMISSION ]
def HasPermission( self, permission ): return permission in self._permissions
sqlite3.register_adapter( AccountType, yaml.safe_dump )
class BigJobPauser( object ):
def __init__( self, period = 10, wait_time = 0.1 ):
self._period = period
self._wait_time = wait_time
self._next_pause = GetNow() + self._period
def Pause( self ):
if TimeHasPassed( self._next_pause ):
time.sleep( self._wait_time )
self._next_pause = GetNow() + self._period
class Call( object ):
def __init__( self, func, *args, **kwargs ):
self._func = func
self._args = args
self._kwargs = kwargs
def __call__( self ):
self._func( *self._args, **self._kwargs )
def __repr__( self ):
return 'Call: ' + repr( ( self._func, self._args, self._kwargs ) )
class ContentUpdate( object ):
def __init__( self, data_type, action, row, reason = None ):
self._data_type = data_type
self._action = action
self._row = row
self._reason = reason
def __eq__( self, other ):
if isinstance( other, ContentUpdate ):
return self.__hash__() == other.__hash__()
return NotImplemented
def __hash__( self ):
return hash( ( self._data_type, self._action, repr( self._row ) ) )
def __repr__( self ):
return 'Content Update: ' + str( ( self._data_type, self._action, self._row, self._reason ) )
def GetAction( self ):
return self._action
def GetDataType( self ):
return self._data_type
def GetHashes( self ):
hashes = set()
if self._data_type == HC.CONTENT_TYPE_FILES:
if self._action == HC.CONTENT_UPDATE_ADVANCED:
hashes = set()
elif self._action == HC.CONTENT_UPDATE_ADD:
( file_info_manager, timestamp ) = self._row
hashes = { file_info_manager.hash }
hashes = self._row
elif self._data_type == HC.CONTENT_TYPE_DIRECTORIES:
hashes = set()
elif self._data_type == HC.CONTENT_TYPE_URLS:
( urls, hashes ) = self._row
elif self._data_type == HC.CONTENT_TYPE_MAPPINGS:
if self._action == HC.CONTENT_UPDATE_ADVANCED:
hashes = set()
( tag, hashes ) = self._row
hashes = set()
elif self._data_type == HC.CONTENT_TYPE_RATINGS:
if self._action == HC.CONTENT_UPDATE_ADD:
( rating, hashes ) = self._row
elif self._data_type == HC.CONTENT_TYPE_NOTES:
if self._action == HC.CONTENT_UPDATE_SET:
( hash, name, note ) = self._row
hashes = { hash }
elif self._action == HC.CONTENT_UPDATE_DELETE:
( hash, name ) = self._row
hashes = { hash }
elif self._data_type == HC.CONTENT_TYPE_FILE_VIEWING_STATS:
if self._action == HC.CONTENT_UPDATE_ADD:
( hash, preview_views_delta, preview_viewtime_delta, media_views_delta, media_viewtime_delta ) = self._row
hashes = { hash }
elif self._action == HC.CONTENT_UPDATE_DELETE:
hashes = self._row
if not isinstance( hashes, set ):
hashes = set( hashes )
return hashes
def GetReason( self ):
if self._reason is None:
return 'No reason given.'
return self._reason
def GetRow( self ):
return self._row
def GetWeight( self ):
return len( self.GetHashes() )
def IsInboxRelated( self ):
def ToTuple( self ):
return ( self._data_type, self._action, self._row )
class JobDatabase( object ):
def __init__( self, job_type, synchronous, action, *args, **kwargs ):
self._type = job_type
self._synchronous = synchronous
self._action = action
self._args = args
self._kwargs = kwargs
self._result_ready = threading.Event()
def __str__( self ):
return 'DB Job: {}'.format( self.ToString() )
def _DoDelayedResultRelief( self ):
def GetCallableTuple( self ):
return ( self._action, self._args, self._kwargs )
def GetResult( self ):
time.sleep( 0.00001 ) # this one neat trick can save hassle on superquick jobs as event.wait can be laggy
while True:
if self._result_ready.wait( 2 ) == True:
elif HG.model_shutdown:
raise HydrusExceptions.ShutdownException( 'Application quit before db could serve result!' )
if isinstance( self._result, Exception ):
e = self._result
raise e
return self._result
def GetType( self ):
return self._type
def IsSynchronous( self ):
return self._synchronous
def PutResult( self, result ):
self._result = result
def ToString( self ):
return '{} {}'.format( self._type, self._action )
class ServiceUpdate( object ):
def __init__( self, action, row = None ):
self._action = action
self._row = row
def ToTuple( self ):
return ( self._action, self._row )