hydrus/include/HydrusData.py

1541 lines
36 KiB
Python
Raw Normal View History

2015-03-25 22:04:19 +00:00
import bs4
import collections
2015-11-25 22:00:57 +00:00
import cProfile
import cStringIO
2015-03-25 22:04:19 +00:00
import HydrusConstants as HC
import HydrusExceptions
2017-05-10 21:33:58 +00:00
import HydrusGlobals as HG
2015-06-03 21:05:13 +00:00
import HydrusSerialisable
2017-12-13 22:33:07 +00:00
import HydrusText
2015-03-25 22:04:19 +00:00
import locale
import os
2015-11-25 22:00:57 +00:00
import pstats
2015-08-19 21:48:21 +00:00
import psutil
2016-11-30 20:24:17 +00:00
import random
2018-07-18 21:07:15 +00:00
import re
2015-09-16 18:11:00 +00:00
import shutil
2015-03-25 22:04:19 +00:00
import sqlite3
2017-01-25 22:56:55 +00:00
import struct
2015-06-17 20:01:41 +00:00
import subprocess
2015-03-25 22:04:19 +00:00
import sys
import threading
import time
import traceback
import yaml
import itertools
def default_dict_list(): return collections.defaultdict( list )
def default_dict_set(): return collections.defaultdict( set )
def BuildKeyToListDict( pairs ):
d = collections.defaultdict( list )
for ( key, value ) in pairs: d[ key ].append( value )
return d
def BuildKeyToSetDict( pairs ):
d = collections.defaultdict( set )
for ( key, value ) in pairs: d[ key ].add( value )
return d
def CalculateScoreFromRating( count, rating ):
# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
count = float( count )
positive = count * rating
negative = count * ( 1.0 - rating )
# positive + negative = count
# I think I've parsed this correctly from the website! Not sure though!
score = ( ( positive + 1.9208 ) / count - 1.96 * ( ( ( positive * negative ) / count + 0.9604 ) ** 0.5 ) / count ) / ( 1 + 3.8416 / count )
return score
2016-06-15 18:59:44 +00:00
def CleanRunningFile( db_path, instance ):
path = os.path.join( db_path, instance + '_running' )
try:
os.remove( path )
except:
pass
2017-04-05 21:16:40 +00:00
def ConvertFloatToPercentage( f ):
return '%.1f' % ( f * 100 ) + '%'
2015-03-25 22:04:19 +00:00
def ConvertIntToBytes( size ):
2018-03-14 21:01:02 +00:00
if size is None:
return 'unknown size'
if size < 1024:
2018-07-04 20:48:28 +00:00
return ToHumanInt( size ) + 'B'
2018-03-14 21:01:02 +00:00
2015-03-25 22:04:19 +00:00
suffixes = ( '', 'K', 'M', 'G', 'T', 'P' )
suffix_index = 0
size = float( size )
2018-03-14 21:01:02 +00:00
while size >= 1024.0:
2015-03-25 22:04:19 +00:00
size = size / 1024.0
suffix_index += 1
2018-03-14 21:01:02 +00:00
if size < 10.0:
return '%.1f' % size + suffixes[ suffix_index ] + 'B'
else:
return '%.0f' % size + suffixes[ suffix_index ] + 'B'
2015-03-25 22:04:19 +00:00
2016-09-07 20:01:05 +00:00
def ConvertIntToFirst( n ):
# straight from stack, wew
return "%d%s" % (n,"tsnrhtdd"[(n/10%10!=1)*(n%10<4)*n%10::4])
2015-04-08 18:10:50 +00:00
def ConvertIntToPixels( i ):
if i == 1: return 'pixels'
elif i == 1000: return 'kilopixels'
elif i == 1000000: return 'megapixels'
2015-04-25 22:31:50 +00:00
else: return 'megapixels'
2015-04-08 18:10:50 +00:00
2016-10-19 20:02:56 +00:00
def ConvertIntToPrettyOrdinalString( num ):
remainder = num % 10
if remainder == 1:
ordinal = 'st'
elif remainder == 2:
ordinal = 'nd'
elif remainder == 3:
ordinal = 'rd'
else:
ordinal = 'th'
2018-07-04 20:48:28 +00:00
return ToHumanInt( num ) + ordinal
2015-11-11 21:20:41 +00:00
2015-04-22 22:57:25 +00:00
def ConvertIntToUnit( unit ):
if unit == 1: return 'B'
elif unit == 1024: return 'KB'
elif unit == 1048576: return 'MB'
elif unit == 1073741824: return 'GB'
2015-03-25 22:04:19 +00:00
def ConvertMillisecondsToPrettyTime( ms ):
hours = ms / 3600000
if hours == 1: hours_result = '1 hour'
2015-11-04 22:30:28 +00:00
else: hours_result = str( hours ) + ' hours'
2015-03-25 22:04:19 +00:00
ms = ms % 3600000
minutes = ms / 60000
if minutes == 1: minutes_result = '1 minute'
2015-11-04 22:30:28 +00:00
else: minutes_result = str( minutes ) + ' minutes'
2015-03-25 22:04:19 +00:00
ms = ms % 60000
seconds = ms / 1000
if seconds == 1: seconds_result = '1 second'
2015-11-04 22:30:28 +00:00
else: seconds_result = str( seconds ) + ' seconds'
2015-03-25 22:04:19 +00:00
detailed_seconds = float( ms ) / 1000.0
if detailed_seconds == 1.0: detailed_seconds_result = '1.0 seconds'
else:detailed_seconds_result = '%.1f' % detailed_seconds + ' seconds'
ms = ms % 1000
if ms == 1: milliseconds_result = '1 millisecond'
2015-11-04 22:30:28 +00:00
else: milliseconds_result = str( ms ) + ' milliseconds'
2015-03-25 22:04:19 +00:00
if hours > 0: return hours_result + ' ' + minutes_result
if minutes > 0: return minutes_result + ' ' + seconds_result
if seconds > 0: return detailed_seconds_result
return milliseconds_result
def ConvertNumericalRatingToPrettyString( lower, upper, rating, rounded_result = False, out_of = True ):
rating_converted = ( rating * ( upper - lower ) ) + lower
2015-11-04 22:30:28 +00:00
if rounded_result: s = '%.2f' % round( rating_converted )
else: s = '%.2f' % rating_converted
2015-03-25 22:04:19 +00:00
if out_of:
2015-11-04 22:30:28 +00:00
if lower in ( 0, 1 ): s += '/%.2f' % upper
2015-03-25 22:04:19 +00:00
return s
2015-04-22 22:57:25 +00:00
def ConvertPixelsToInt( unit ):
2015-04-08 18:10:50 +00:00
if unit == 'pixels': return 1
elif unit == 'kilopixels': return 1000
elif unit == 'megapixels': return 1000000
2015-03-25 22:04:19 +00:00
def ConvertPrettyStringsToUglyNamespaces( pretty_strings ):
result = { s for s in pretty_strings if s != 'no namespace' }
if 'no namespace' in pretty_strings: result.add( '' )
return result
2018-02-28 22:30:36 +00:00
def ConvertResolutionToPrettyString( ( width, height ) ):
2018-07-04 20:48:28 +00:00
return ToHumanInt( width ) + 'x' + ToHumanInt( height )
2018-02-28 22:30:36 +00:00
def ConvertStatusToPrefix( status ):
if status == HC.CONTENT_STATUS_CURRENT: return ''
elif status == HC.CONTENT_STATUS_PENDING: return '(+) '
elif status == HC.CONTENT_STATUS_PETITIONED: return '(-) '
elif status == HC.CONTENT_STATUS_DELETED: return '(X) '
2018-07-04 20:48:28 +00:00
def TimeDeltaToPrettyTimeDelta( seconds ):
2015-11-18 22:44:07 +00:00
2017-03-02 02:14:56 +00:00
if seconds is None:
2017-06-21 21:15:59 +00:00
return 'per month'
2017-03-02 02:14:56 +00:00
2018-07-04 20:48:28 +00:00
if seconds < 0:
seconds = abs( seconds )
2017-09-27 21:52:54 +00:00
if seconds >= 60:
2015-12-09 23:16:41 +00:00
seconds = int( seconds )
2018-07-04 20:48:28 +00:00
MINUTE = 60
HOUR = 60 * MINUTE
DAY = 24 * HOUR
MONTH = 30 * DAY
YEAR = 12 * MONTH
lines = []
lines.append( ( 'year', YEAR ) )
lines.append( ( 'month', MONTH ) )
lines.append( ( 'day', DAY ) )
lines.append( ( 'hour', HOUR ) )
lines.append( ( 'minute', MINUTE ) )
lines.append( ( 'second', 1 ) )
result_components = []
for ( time_string, duration ) in lines:
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
time_quantity = seconds // duration
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
seconds %= duration
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
if time_quantity > 0:
2017-06-21 21:15:59 +00:00
2018-07-04 20:48:28 +00:00
s = ToHumanInt( time_quantity ) + ' ' + time_string
2017-06-21 21:15:59 +00:00
2018-07-04 20:48:28 +00:00
if time_quantity > 1:
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
s += 's'
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
result_components.append( s )
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
if len( result_components ) == 2: # we now have 1 month 2 days
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
break
2018-01-31 22:58:15 +00:00
2017-06-21 21:15:59 +00:00
2018-07-04 20:48:28 +00:00
else:
2016-12-14 21:19:07 +00:00
2018-07-04 20:48:28 +00:00
if len( result_components ) > 0: # something like '1 year' -- in which case we do not care about the days and hours
2018-01-31 22:58:15 +00:00
2018-07-04 20:48:28 +00:00
break
2018-01-31 22:58:15 +00:00
2016-12-14 21:19:07 +00:00
2015-12-09 23:16:41 +00:00
2018-07-04 20:48:28 +00:00
result = ' '.join( result_components )
elif seconds > 1:
if int( seconds ) == seconds:
2016-12-14 21:19:07 +00:00
2018-07-04 20:48:28 +00:00
result = ToHumanInt( seconds ) + ' seconds'
2015-12-09 23:16:41 +00:00
else:
2018-07-04 20:48:28 +00:00
result = '%.1f' % seconds + ' seconds'
2015-12-09 23:16:41 +00:00
2017-06-21 21:15:59 +00:00
elif seconds == 1:
result = '1 second'
2015-11-18 22:44:07 +00:00
elif seconds > 0.1:
result = '%d' % ( seconds * 1000 ) + ' milliseconds'
elif seconds > 0.01:
result = '%.1f' % ( seconds * 1000 ) + ' milliseconds'
elif seconds > 0.001:
result = '%.2f' % ( seconds * 1000 ) + ' milliseconds'
else:
result = '%d' % ( seconds * 1000000 ) + ' microseconds'
return result
2015-03-25 22:04:19 +00:00
def ConvertTimestampToPrettyExpires( timestamp ):
2018-07-04 20:48:28 +00:00
if timestamp is None:
2015-03-25 22:04:19 +00:00
2018-07-04 20:48:28 +00:00
return 'does not expire'
2015-03-25 22:04:19 +00:00
2018-07-04 20:48:28 +00:00
if timestamp == 0:
2015-03-25 22:04:19 +00:00
2018-07-04 20:48:28 +00:00
return 'unknown expiration'
2015-03-25 22:04:19 +00:00
2018-07-04 20:48:28 +00:00
time_delta_string = TimestampToPrettyTimeDelta( timestamp )
2015-03-25 22:04:19 +00:00
2018-07-04 20:48:28 +00:00
if TimeHasPassed( timestamp ):
2018-02-14 21:47:18 +00:00
2018-07-04 20:48:28 +00:00
return 'expired ' + time_delta_string
2018-02-14 21:47:18 +00:00
2018-07-04 20:48:28 +00:00
else:
return 'expires ' + time_delta_string
2017-11-29 21:48:23 +00:00
2018-02-07 23:40:33 +00:00
def ConvertTimestampToPrettyTime( timestamp, in_gmt = False, include_24h_time = True ):
2017-09-20 19:47:31 +00:00
2018-01-31 22:58:15 +00:00
if include_24h_time:
phrase = '%Y/%m/%d %H:%M:%S'
else:
phrase = '%Y/%m/%d'
2018-02-07 23:40:33 +00:00
if in_gmt:
struct_time = time.gmtime( timestamp )
phrase = phrase + ' GMT'
else:
struct_time = time.localtime( timestamp )
return time.strftime( phrase, struct_time )
2017-09-20 19:47:31 +00:00
2018-07-18 21:07:15 +00:00
def TimestampToPrettyTimeDelta( timestamp, just_now_string = 'now', just_now_threshold = 3 ):
2015-03-25 22:04:19 +00:00
2018-06-27 19:27:05 +00:00
if HG.client_controller.new_options.GetBoolean( 'always_show_iso_time' ):
return ConvertTimestampToPrettyTime( timestamp )
2018-07-04 20:48:28 +00:00
time_delta = abs( timestamp - GetNow() )
2015-03-25 22:04:19 +00:00
2018-08-22 21:10:59 +00:00
if time_delta <= just_now_threshold:
2017-09-20 19:47:31 +00:00
2018-07-18 21:07:15 +00:00
return just_now_string
2017-09-20 19:47:31 +00:00
2018-07-04 20:48:28 +00:00
time_delta_string = TimeDeltaToPrettyTimeDelta( time_delta )
if TimeHasPassed( timestamp ):
2017-09-20 19:47:31 +00:00
2018-07-04 20:48:28 +00:00
return time_delta_string + ' ago'
2017-09-20 19:47:31 +00:00
else:
2018-07-04 20:48:28 +00:00
return 'in ' + time_delta_string
2017-09-20 19:47:31 +00:00
2015-03-25 22:04:19 +00:00
2017-05-31 21:50:53 +00:00
def ConvertUglyNamespaceToPrettyString( namespace ):
if namespace is None or namespace == '':
return 'no namespace'
else:
return namespace
2015-03-25 22:04:19 +00:00
def ConvertUglyNamespacesToPrettyStrings( namespaces ):
2017-05-31 21:50:53 +00:00
namespaces = list( namespaces )
2015-03-25 22:04:19 +00:00
2017-05-31 21:50:53 +00:00
namespaces.sort()
2015-03-25 22:04:19 +00:00
2017-05-31 21:50:53 +00:00
result = [ ConvertUglyNamespaceToPrettyString( namespace ) for namespace in namespaces ]
2015-03-25 22:04:19 +00:00
return result
2015-04-22 22:57:25 +00:00
def ConvertUnitToInt( unit ):
2015-03-25 22:04:19 +00:00
if unit == 'B': return 1
elif unit == 'KB': return 1024
elif unit == 'MB': return 1048576
elif unit == 'GB': return 1073741824
2016-03-23 19:42:56 +00:00
def ConvertValueRangeToBytes( value, range ):
return ConvertIntToBytes( value ) + '/' + ConvertIntToBytes( range )
2015-06-10 19:40:25 +00:00
def ConvertValueRangeToPrettyString( value, range ):
2018-07-04 20:48:28 +00:00
return ToHumanInt( value ) + '/' + ToHumanInt( range )
2015-06-10 19:40:25 +00:00
2015-03-25 22:04:19 +00:00
def DebugPrint( debug_info ):
2015-11-18 22:44:07 +00:00
Print( debug_info )
2015-03-25 22:04:19 +00:00
sys.stdout.flush()
sys.stderr.flush()
2018-07-04 20:48:28 +00:00
def DedupeList( xs ):
xs_seen = set()
xs_return = []
for x in xs:
if x in xs_seen:
continue
xs_return.append( x )
xs_seen.add( x )
return xs_return
2016-11-02 21:09:14 +00:00
def EncodeBytes( encoding, data ):
data = ToByteString( data )
if encoding == HC.ENCODING_RAW:
encoded_data = data
elif encoding == HC.ENCODING_HEX:
encoded_data = data.encode( 'hex' )
elif encoding == HC.ENCODING_BASE64:
encoded_data = data.encode( 'base64' )
return encoded_data
2015-07-01 22:02:07 +00:00
def GenerateKey():
return os.urandom( HC.HYDRUS_KEY_LENGTH )
2017-01-25 22:56:55 +00:00
def Get64BitHammingDistance( phash1, phash2 ):
2015-03-25 22:04:19 +00:00
2017-01-25 22:56:55 +00:00
# old way of doing this was:
#while xor > 0:
#
# distance += 1
# xor &= xor - 1
#
2015-03-25 22:04:19 +00:00
2017-01-25 22:56:55 +00:00
# convert to unsigned long long, then xor
# then through the power of stackexchange magic, we get number of bits in record time
2018-07-04 20:48:28 +00:00
# Here it is: https://stackoverflow.com/questions/9829578/fast-way-of-counting-non-zero-bits-in-positive-integer/9830282#9830282
2015-03-25 22:04:19 +00:00
2017-01-25 22:56:55 +00:00
n = struct.unpack( '!Q', phash1 )[0] ^ struct.unpack( '!Q', phash2 )[0]
n = ( n & 0x5555555555555555 ) + ( ( n & 0xAAAAAAAAAAAAAAAA ) >> 1 ) # 10101010, 01010101
n = ( n & 0x3333333333333333 ) + ( ( n & 0xCCCCCCCCCCCCCCCC ) >> 2 ) # 11001100, 00110011
n = ( n & 0x0F0F0F0F0F0F0F0F ) + ( ( n & 0xF0F0F0F0F0F0F0F0 ) >> 4 ) # 11110000, 00001111
n = ( n & 0x00FF00FF00FF00FF ) + ( ( n & 0xFF00FF00FF00FF00 ) >> 8 ) # etc...
n = ( n & 0x0000FFFF0000FFFF ) + ( ( n & 0xFFFF0000FFFF0000 ) >> 16 )
2018-07-04 20:48:28 +00:00
n = ( n & 0x00000000FFFFFFFF ) + ( n >> 32 )
# you technically are going n & 0xFFFFFFFF00000000 at the end, but that's a no-op with the >> 32 afterwards, so can be omitted
2015-03-25 22:04:19 +00:00
2017-01-25 22:56:55 +00:00
return n
2015-03-25 22:04:19 +00:00
2017-06-07 22:05:15 +00:00
def GetEmptyDataDict():
data = collections.defaultdict( default_dict_list )
return data
def GetHideTerminalSubprocessStartupInfo():
if HC.PLATFORM_WINDOWS:
# This suppresses the terminal window that tends to pop up when calling ffmpeg or whatever
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
else:
startupinfo = None
return startupinfo
2017-06-14 21:19:11 +00:00
def GetNow():
return int( time.time() )
2018-02-14 21:47:18 +00:00
def GetNowFloat():
return time.time()
2015-03-25 22:04:19 +00:00
def GetNowPrecise():
2017-06-14 21:19:11 +00:00
if HC.PLATFORM_WINDOWS:
return time.clock()
else:
return time.time()
2015-03-25 22:04:19 +00:00
2016-06-15 18:59:44 +00:00
def GetSiblingProcessPorts( db_path, instance ):
2015-09-16 18:11:00 +00:00
2016-06-15 18:59:44 +00:00
path = os.path.join( db_path, instance + '_running' )
2015-09-16 18:11:00 +00:00
if os.path.exists( path ):
with open( path, 'rb' ) as f:
result = f.read()
try:
2017-12-13 22:33:07 +00:00
( pid, create_time ) = HydrusText.DeserialiseNewlinedTexts( result )
2015-09-16 18:11:00 +00:00
pid = int( pid )
create_time = float( create_time )
except ValueError:
return None
try:
if psutil.pid_exists( pid ):
ports = []
p = psutil.Process( pid )
for conn in p.connections():
if conn.status == 'LISTEN':
ports.append( int( conn.laddr[1] ) )
return ports
except psutil.Error:
return None
return None
2018-07-04 20:48:28 +00:00
def GetTimeDeltaSinceTime( timestamp ):
time_since = timestamp - GetNow()
result = min( time_since, 0 )
return - result
2018-02-14 21:47:18 +00:00
def GetTimeDeltaUntilTime( timestamp ):
time_remaining = timestamp - GetNow()
return max( time_remaining, 0 )
def GetTimeDeltaUntilTimeFloat( timestamp ):
time_remaining = timestamp - GetNowFloat()
return max( time_remaining, 0.0 )
def GetTimeDeltaUntilTimePrecise( t ):
time_remaining = t - GetNowPrecise()
return max( time_remaining, 0.0 )
2018-06-20 20:20:22 +00:00
def GetTypeName( obj_type ):
if hasattr( obj_type, '__name__' ):
return obj_type.__name__
else:
return repr( obj_type )
2018-07-18 21:07:15 +00:00
def HumanTextSort( texts ):
"""Solves the 19, 20, 200, 21, 22 issue when sorting 'Page 21.jpg' type strings.
Breaks the string into groups of text and int."""
2018-09-12 21:36:26 +00:00
convert = lambda text: int( text ) if text.isdecimal() else text
2018-07-18 21:07:15 +00:00
2018-09-12 21:36:26 +00:00
alphanum = lambda key: [ convert( c ) for c in re.split( '([0-9]+)', key.lower(), flags = re.UNICODE ) ]
2018-07-18 21:07:15 +00:00
texts.sort( key = alphanum )
2015-03-25 22:04:19 +00:00
def IntelligentMassIntersect( sets_to_reduce ):
answer = None
2017-03-08 23:23:12 +00:00
def get_len( item ):
return len( item )
2015-03-25 22:04:19 +00:00
for set_to_reduce in sets_to_reduce:
2018-05-09 20:23:00 +00:00
if len( set_to_reduce ) == 0:
return set()
2015-03-25 22:04:19 +00:00
2018-05-09 20:23:00 +00:00
if answer is None:
answer = set( set_to_reduce )
2015-03-25 22:04:19 +00:00
else:
2018-05-09 20:23:00 +00:00
if len( answer ) == 0:
return set()
else:
answer.intersection_update( set_to_reduce )
2015-03-25 22:04:19 +00:00
2018-05-09 20:23:00 +00:00
if answer is None:
return set()
else:
return answer
2015-03-25 22:04:19 +00:00
2016-06-15 18:59:44 +00:00
def IsAlreadyRunning( db_path, instance ):
2015-08-19 21:48:21 +00:00
2016-06-15 18:59:44 +00:00
path = os.path.join( db_path, instance + '_running' )
2015-08-19 21:48:21 +00:00
2015-09-02 23:16:09 +00:00
if os.path.exists( path ):
2015-08-19 21:48:21 +00:00
2015-09-02 23:16:09 +00:00
with open( path, 'rb' ) as f:
result = f.read()
2015-08-19 21:48:21 +00:00
2015-09-02 23:16:09 +00:00
try:
2015-08-26 21:18:39 +00:00
2017-12-13 22:33:07 +00:00
( pid, create_time ) = HydrusText.DeserialiseNewlinedTexts( result )
2015-09-02 23:16:09 +00:00
pid = int( pid )
create_time = float( create_time )
except ValueError:
return False
2015-08-26 21:18:39 +00:00
2015-08-19 21:48:21 +00:00
2015-09-02 23:16:09 +00:00
try:
2015-08-26 21:18:39 +00:00
2015-12-23 22:51:04 +00:00
me = psutil.Process()
if me.pid == pid and me.create_time() == create_time:
# this is me! there is no conflict, lol!
# this happens when a linux process restarts with os.execl(), for instance (unlike Windows, it keeps its pid)
return False
2015-09-02 23:16:09 +00:00
if psutil.pid_exists( pid ):
2015-08-26 21:18:39 +00:00
2015-09-02 23:16:09 +00:00
p = psutil.Process( pid )
2015-08-26 21:18:39 +00:00
2015-09-02 23:16:09 +00:00
if p.create_time() == create_time and p.is_running():
return True
2015-08-26 21:18:39 +00:00
2015-09-02 23:16:09 +00:00
except psutil.Error:
2015-08-19 21:48:21 +00:00
2015-09-02 23:16:09 +00:00
return False
2015-08-19 21:48:21 +00:00
return False
2015-12-02 22:32:18 +00:00
def IterateHexPrefixes():
hex_chars = '0123456789abcdef'
for ( one, two ) in itertools.product( hex_chars, hex_chars ):
prefix = one + two
yield prefix
2016-08-31 19:55:14 +00:00
def LastShutdownWasBad( db_path, instance ):
path = os.path.join( db_path, instance + '_running' )
if os.path.exists( path ):
return True
else:
return False
2017-01-04 22:48:23 +00:00
def MassUnion( lists ):
return { item for item in itertools.chain.from_iterable( lists ) }
2016-08-31 19:55:14 +00:00
2016-12-07 22:12:52 +00:00
def MedianPop( population ):
# assume it has at least one and comes sorted
median_index = len( population ) / 2
row = population.pop( median_index )
return row
2015-03-25 22:04:19 +00:00
def MergeKeyToListDicts( key_to_list_dicts ):
result = collections.defaultdict( list )
for key_to_list_dict in key_to_list_dicts:
for ( key, value ) in key_to_list_dict.items(): result[ key ].extend( value )
return result
2015-11-18 22:44:07 +00:00
def Print( text ):
2017-09-20 19:47:31 +00:00
try:
print( ToUnicode( text ) )
except:
print( repr( text ) )
2015-11-18 22:44:07 +00:00
2016-03-23 19:42:56 +00:00
ShowText = Print
2017-01-04 22:48:23 +00:00
def PrintException( e, do_wait = True ):
2016-03-23 19:42:56 +00:00
if isinstance( e, HydrusExceptions.ShutdownException ):
return
etype = type( e )
value = ToUnicode( e )
2017-12-13 22:33:07 +00:00
( etype, value, tb ) = sys.exc_info()
if etype is None:
etype = type( e )
value = ToUnicode( e )
trace = 'No error trace'
else:
trace = ''.join( traceback.format_exception( etype, value, tb ) )
stack_list = traceback.format_stack()
2016-03-23 19:42:56 +00:00
2017-12-13 22:33:07 +00:00
stack = ''.join( stack_list )
2016-03-23 19:42:56 +00:00
2017-12-13 22:33:07 +00:00
message = ToUnicode( etype.__name__ ) + ': ' + ToUnicode( value ) + os.linesep + ToUnicode( trace ) + os.linesep + ToUnicode( stack )
2016-03-23 19:42:56 +00:00
Print( '' )
Print( 'Exception:' )
DebugPrint( message )
2017-01-04 22:48:23 +00:00
if do_wait:
time.sleep( 1 )
2016-03-23 19:42:56 +00:00
ShowException = PrintException
2017-06-14 21:19:11 +00:00
def Profile( summary, code, g, l, min_duration_ms = 20 ):
2015-11-25 22:00:57 +00:00
profile = cProfile.Profile()
2017-06-14 21:19:11 +00:00
started = GetNowPrecise()
2015-11-25 22:00:57 +00:00
2017-06-14 21:19:11 +00:00
profile.runctx( code, g, l )
2015-11-25 22:00:57 +00:00
2017-06-14 21:19:11 +00:00
finished = GetNowPrecise()
2015-11-25 22:00:57 +00:00
2017-07-27 00:47:13 +00:00
time_took = finished - started
time_took_ms = int( time_took * 1000.0 )
if time_took_ms > min_duration_ms:
2017-06-14 21:19:11 +00:00
output = cStringIO.StringIO()
stats = pstats.Stats( profile, stream = output )
stats.strip_dirs()
stats.sort_stats( 'tottime' )
output.write( 'Stats' )
output.write( os.linesep * 2 )
stats.print_stats()
output.write( 'Callers' )
output.write( os.linesep * 2 )
stats.print_callers()
output.seek( 0 )
details = output.read()
else:
2018-08-08 20:29:54 +00:00
summary += ' - It took ' + TimeDeltaToPrettyTimeDelta( time_took ) + '.'
2017-07-27 00:47:13 +00:00
details = ''
2017-06-14 21:19:11 +00:00
2015-11-25 22:00:57 +00:00
2017-06-14 21:19:11 +00:00
HG.controller.PrintProfile( summary, details )
2015-11-25 22:00:57 +00:00
2016-11-30 20:24:17 +00:00
def RandomPop( population ):
random_index = random.randint( 0, len( population ) - 1 )
row = population.pop( random_index )
return row
2016-06-15 18:59:44 +00:00
def RecordRunningStart( db_path, instance ):
2015-09-02 23:16:09 +00:00
2016-06-15 18:59:44 +00:00
path = os.path.join( db_path, instance + '_running' )
2015-09-02 23:16:09 +00:00
record_string = ''
try:
me = psutil.Process()
record_string += str( me.pid )
record_string += os.linesep
record_string += str( me.create_time() )
except psutil.Error:
return
with open( path, 'wb' ) as f:
2015-11-18 22:44:07 +00:00
f.write( ToByteString( record_string ) )
2015-09-02 23:16:09 +00:00
2015-12-16 22:41:06 +00:00
def RestartProcess():
time.sleep( 1 ) # time for ports to unmap
2016-11-30 20:24:17 +00:00
exe = sys.executable
me = sys.argv[0]
2016-12-07 22:12:52 +00:00
if HC.RUNNING_FROM_SOURCE:
2016-11-30 20:24:17 +00:00
2016-12-07 22:12:52 +00:00
# exe is python's exe, me is the script
2016-11-30 20:24:17 +00:00
args = [ sys.executable ] + sys.argv
else:
# we are running a frozen release--both exe and me are the built exe
# wrap it in quotes because pyinstaller passes it on as raw text, breaking any path with spaces :/
args = [ '"' + me + '"' ] + sys.argv[1:]
os.execv( exe, args )
2015-12-16 22:41:06 +00:00
2017-12-13 22:33:07 +00:00
def SplayListForDB( xs ):
2015-03-25 22:04:19 +00:00
2017-12-13 22:33:07 +00:00
return '(' + ','.join( ( str( x ) for x in xs ) ) + ')'
2015-11-04 22:30:28 +00:00
2015-12-30 23:44:09 +00:00
def SplitIteratorIntoChunks( iterator, n ):
chunk = []
for item in iterator:
chunk.append( item )
if len( chunk ) == n:
yield chunk
chunk = []
if len( chunk ) > 0:
yield chunk
2015-04-08 18:10:50 +00:00
def SplitListIntoChunks( xs, n ):
2016-04-20 20:42:21 +00:00
if isinstance( xs, set ):
xs = list( xs )
2017-03-02 02:14:56 +00:00
for i in xrange( 0, len( xs ), n ):
yield xs[ i : i + n ]
def SplitMappingListIntoChunks( xs, n ):
chunk_weight = 0
chunk = []
for ( tag_item, hash_items ) in xs:
for chunk_of_hash_items in SplitListIntoChunks( hash_items, n ):
chunk.append( ( tag_item, chunk_of_hash_items ) )
chunk_weight += len( chunk_of_hash_items )
if chunk_weight > n:
yield chunk
chunk_weight = 0
chunk = []
if len( chunk ) > 0:
yield chunk
2015-04-08 18:10:50 +00:00
2015-06-24 22:10:14 +00:00
def TimeHasPassed( timestamp ):
2016-03-30 22:56:50 +00:00
if timestamp is None:
return False
2015-06-24 22:10:14 +00:00
return GetNow() > timestamp
2018-02-14 21:47:18 +00:00
def TimeHasPassedFloat( timestamp ):
return GetNowFloat() > timestamp
2015-07-15 20:28:26 +00:00
def TimeHasPassedPrecise( precise_timestamp ):
return GetNowPrecise() > precise_timestamp
2015-08-05 18:42:35 +00:00
def TimeUntil( timestamp ):
return timestamp - GetNow()
2015-11-04 22:30:28 +00:00
def ToByteString( text_producing_object ):
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
if isinstance( text_producing_object, unicode ):
2015-11-04 22:30:28 +00:00
return text_producing_object.encode( 'utf-8' )
2016-01-13 22:08:19 +00:00
elif isinstance( text_producing_object, str ):
2015-11-04 22:30:28 +00:00
return text_producing_object
else:
try:
return str( text_producing_object )
except:
return str( repr( text_producing_object ) )
2015-03-25 22:04:19 +00:00
2018-07-04 20:48:28 +00:00
def ToHumanInt( num ):
# don't feed this a unicode string u'%d'--locale can't handle it
text = locale.format( '%d', num, grouping = True )
try:
text = text.decode( locale.getpreferredencoding() )
text = ToUnicode( text )
except:
text = ToUnicode( text )
return text
2015-11-04 22:30:28 +00:00
def ToUnicode( text_producing_object ):
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
if isinstance( text_producing_object, ( str, unicode, bs4.element.NavigableString ) ):
2015-11-04 22:30:28 +00:00
text = text_producing_object
2015-03-25 22:04:19 +00:00
else:
2015-11-04 22:30:28 +00:00
try:
text = str( text_producing_object ) # dealing with exceptions, etc...
except:
2017-03-29 19:39:34 +00:00
try:
text = unicode( text_producing_object )
except:
text = repr( text_producing_object )
2015-11-04 22:30:28 +00:00
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
if not isinstance( text, unicode ):
2015-03-25 22:04:19 +00:00
2015-11-04 22:30:28 +00:00
try:
text = text.decode( 'utf-8' )
2018-01-17 22:52:10 +00:00
except UnicodeDecodeError:
2015-11-04 22:30:28 +00:00
try:
2018-03-07 22:48:29 +00:00
text = text.decode( locale.getpreferredencoding() )
2015-11-04 22:30:28 +00:00
except:
2018-01-17 22:52:10 +00:00
try:
2018-03-07 22:48:29 +00:00
text = text.decode( 'utf-16' )
2018-01-17 22:52:10 +00:00
except:
text = unicode( repr( text ) )
2015-11-04 22:30:28 +00:00
2015-03-25 22:04:19 +00:00
2015-11-04 22:30:28 +00:00
return text
2016-06-22 20:59:24 +00:00
def WaitForProcessToFinish( p, timeout ):
started = GetNow()
while p.poll() is None:
if TimeHasPassed( started + timeout ):
p.kill()
2018-07-04 20:48:28 +00:00
raise Exception( 'Process did not finish within ' + ToHumanInt( timeout ) + ' seconds!' )
2016-06-22 20:59:24 +00:00
time.sleep( 2 )
2015-03-25 22:04:19 +00:00
class HydrusYAMLBase( yaml.YAMLObject ):
yaml_loader = yaml.SafeLoader
yaml_dumper = yaml.SafeDumper
2015-10-14 21:02:25 +00:00
class AccountIdentifier( HydrusSerialisable.SerialisableBase ):
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_ACCOUNT_IDENTIFIER
2017-11-29 21:48:23 +00:00
SERIALISABLE_NAME = 'Account Identifier'
2015-10-14 21:02:25 +00:00
SERIALISABLE_VERSION = 1
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
TYPE_ACCOUNT_KEY = 1
TYPE_CONTENT = 2
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
def __init__( self, account_key = None, content = None ):
2015-03-25 22:04:19 +00:00
HydrusYAMLBase.__init__( self )
if account_key is not None:
self._type = self.TYPE_ACCOUNT_KEY
self._data = account_key
2015-10-14 21:02:25 +00:00
elif content is not None:
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
self._type = self.TYPE_CONTENT
self._data = content
2015-03-25 22:04:19 +00:00
def __eq__( self, other ): return self.__hash__() == other.__hash__()
def __hash__( self ): return ( self._type, self._data ).__hash__()
def __ne__( self, other ): return self.__hash__() != other.__hash__()
2015-11-04 22:30:28 +00:00
def __repr__( self ): return 'Account Identifier: ' + ToUnicode( ( self._type, self._data ) )
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
def _GetSerialisableInfo( self ):
if self._type == self.TYPE_ACCOUNT_KEY:
serialisable_data = self._data.encode( 'hex' )
elif self._type == self.TYPE_CONTENT:
serialisable_data = self._data.GetSerialisableTuple()
return ( self._type, serialisable_data )
def _InitialiseFromSerialisableInfo( self, serialisable_info ):
( self._type, serialisable_data ) = serialisable_info
if self._type == self.TYPE_ACCOUNT_KEY:
self._data = serialisable_data.decode( 'hex' )
elif self._type == self.TYPE_CONTENT:
self._data = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_data )
2015-03-25 22:04:19 +00:00
def GetData( self ): return self._data
def HasAccountKey( self ): return self._type == self.TYPE_ACCOUNT_KEY
2015-10-14 21:02:25 +00:00
def HasContent( self ): return self._type == self.TYPE_CONTENT
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_ACCOUNT_IDENTIFIER ] = AccountIdentifier
2015-03-25 22:04:19 +00:00
class AccountType( HydrusYAMLBase ):
yaml_tag = u'!AccountType'
def __init__( self, title, permissions, max_monthly_data ):
HydrusYAMLBase.__init__( self )
self._title = title
self._permissions = permissions
self._max_monthly_data = max_monthly_data
def __repr__( self ): return self.ConvertToString()
def GetPermissions( self ): return self._permissions
def GetTitle( self ): return self._title
def GetMaxBytes( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
return max_num_bytes
def GetMaxRequests( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
return max_num_requests
def GetMaxBytesString( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
if max_num_bytes is None: max_num_bytes_string = 'No limit'
else: max_num_bytes_string = ConvertIntToBytes( max_num_bytes )
return max_num_bytes_string
def GetMaxRequestsString( self ):
( max_num_bytes, max_num_requests ) = self._max_monthly_data
if max_num_requests is None: max_num_requests_string = 'No limit'
2018-07-04 20:48:28 +00:00
else: max_num_requests_string = ToHumanInt( max_num_requests )
2015-03-25 22:04:19 +00:00
return max_num_requests_string
def ConvertToString( self ):
result_string = self._title + ' with '
if self._permissions == [ HC.UNKNOWN_PERMISSION ]: result_string += 'no permissions'
else: result_string += ', '.join( [ HC.permissions_string_lookup[ permission ] for permission in self._permissions ] ) + ' permissions'
return result_string
def IsUnknownAccountType( self ): return self._permissions == [ HC.UNKNOWN_PERMISSION ]
def HasPermission( self, permission ): return permission in self._permissions
sqlite3.register_adapter( AccountType, yaml.safe_dump )
2016-01-06 21:17:20 +00:00
class BigJobPauser( object ):
def __init__( self, period = 10, wait_time = 0.1 ):
self._period = period
self._wait_time = wait_time
self._next_pause = GetNow() + self._period
def Pause( self ):
if TimeHasPassed( self._next_pause ):
time.sleep( self._wait_time )
self._next_pause = GetNow() + self._period
2017-01-25 22:56:55 +00:00
class Call( object ):
def __init__( self, func, *args, **kwargs ):
self._func = func
self._args = args
self._kwargs = kwargs
def __call__( self ):
self._func( *self._args, **self._kwargs )
2018-02-14 21:47:18 +00:00
def __repr__( self ):
return 'Call: ' + repr( ( self._func, self._args, self._kwargs ) )
2015-03-25 22:04:19 +00:00
class ContentUpdate( object ):
def __init__( self, data_type, action, row ):
self._data_type = data_type
self._action = action
self._row = row
2017-04-26 21:58:12 +00:00
def __eq__( self, other ):
return hash( self ) == hash( other )
2015-03-25 22:04:19 +00:00
def __ne__( self, other ): return not self.__eq__( other )
2017-04-26 21:58:12 +00:00
def __hash__( self ):
return hash( ( self._data_type, self._action, repr( self._row ) ) )
def __repr__( self ):
return 'Content Update: ' + ToUnicode( ( self._data_type, self._action, self._row ) )
2015-03-25 22:04:19 +00:00
2017-07-27 00:47:13 +00:00
def GetAction( self ):
return self._action
def GetDataType( self ):
return self._data_type
2015-03-25 22:04:19 +00:00
def GetHashes( self ):
2015-10-14 21:02:25 +00:00
if self._data_type == HC.CONTENT_TYPE_FILES:
2015-03-25 22:04:19 +00:00
2016-02-17 22:06:47 +00:00
if self._action == HC.CONTENT_UPDATE_ADVANCED:
hashes = set()
elif self._action == HC.CONTENT_UPDATE_ADD:
2015-03-25 22:04:19 +00:00
2017-05-31 21:50:53 +00:00
( file_info_manager, timestamp ) = self._row
2017-12-06 22:06:56 +00:00
hashes = { file_info_manager.hash }
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
elif self._action in ( HC.CONTENT_UPDATE_ARCHIVE, HC.CONTENT_UPDATE_DELETE, HC.CONTENT_UPDATE_UNDELETE, HC.CONTENT_UPDATE_INBOX, HC.CONTENT_UPDATE_PEND, HC.CONTENT_UPDATE_RESCIND_PEND, HC.CONTENT_UPDATE_RESCIND_PETITION ):
hashes = self._row
elif self._action == HC.CONTENT_UPDATE_PETITION:
( hashes, reason ) = self._row
2015-03-25 22:04:19 +00:00
2016-05-25 21:54:03 +00:00
elif self._data_type == HC.CONTENT_TYPE_DIRECTORIES:
hashes = set()
2017-05-10 21:33:58 +00:00
elif self._data_type == HC.CONTENT_TYPE_URLS:
2018-05-09 20:23:00 +00:00
( urls, hashes ) = self._row
2017-05-10 21:33:58 +00:00
2015-10-14 21:02:25 +00:00
elif self._data_type == HC.CONTENT_TYPE_MAPPINGS:
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
if self._action == HC.CONTENT_UPDATE_ADVANCED:
hashes = set()
elif self._action == HC.CONTENT_UPDATE_PETITION:
( tag, hashes, reason ) = self._row
else:
( tag, hashes ) = self._row
elif self._data_type in ( HC.CONTENT_TYPE_TAG_PARENTS, HC.CONTENT_TYPE_TAG_SIBLINGS ):
hashes = set()
2015-03-25 22:04:19 +00:00
2015-10-14 21:02:25 +00:00
elif self._data_type == HC.CONTENT_TYPE_RATINGS:
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
if self._action == HC.CONTENT_UPDATE_ADD:
( rating, hashes ) = self._row
2015-03-25 22:04:19 +00:00
2018-03-07 22:48:29 +00:00
elif self._data_type == HC.CONTENT_TYPE_NOTES:
if self._action == HC.CONTENT_UPDATE_SET:
( notes, hash ) = self._row
hashes = { hash }
2015-03-25 22:04:19 +00:00
2016-01-13 22:08:19 +00:00
if not isinstance( hashes, set ):
hashes = set( hashes )
2015-03-25 22:04:19 +00:00
return hashes
2016-01-06 21:17:20 +00:00
def GetWeight( self ):
return len( self.GetHashes() )
2015-11-18 22:44:07 +00:00
def IsInboxRelated( self ):
return self._action in ( HC.CONTENT_UPDATE_ARCHIVE, HC.CONTENT_UPDATE_INBOX )
def ToTuple( self ):
return ( self._data_type, self._action, self._row )
2015-03-25 22:04:19 +00:00
class JobDatabase( object ):
2016-03-30 22:56:50 +00:00
def __init__( self, job_type, synchronous, action, *args, **kwargs ):
2015-03-25 22:04:19 +00:00
self._type = job_type
self._synchronous = synchronous
2016-03-30 22:56:50 +00:00
self._action = action
2015-03-25 22:04:19 +00:00
self._args = args
self._kwargs = kwargs
self._result_ready = threading.Event()
2016-03-30 22:56:50 +00:00
def GetCallableTuple( self ):
return ( self._action, self._args, self._kwargs )
2015-03-25 22:04:19 +00:00
def GetResult( self ):
2018-08-08 20:29:54 +00:00
time.sleep( 0.00001 ) # this one neat trick can save hassle on superquick jobs as event.wait can be laggy
2015-03-25 22:04:19 +00:00
while True:
2016-09-28 18:48:01 +00:00
if self._result_ready.wait( 2 ) == True:
break
2017-05-10 21:33:58 +00:00
elif HG.model_shutdown:
2016-09-28 18:48:01 +00:00
raise HydrusExceptions.ShutdownException( 'Application quit before db could serve result!' )
2015-03-25 22:04:19 +00:00
2015-11-11 21:20:41 +00:00
if isinstance( self._result, Exception ):
2015-06-03 21:05:13 +00:00
2016-09-28 18:48:01 +00:00
e = self._result
raise e
2015-03-25 22:04:19 +00:00
2016-01-20 23:57:33 +00:00
else:
return self._result
2015-03-25 22:04:19 +00:00
2016-09-28 18:48:01 +00:00
def GetType( self ):
return self._type
2015-03-25 22:04:19 +00:00
2018-08-08 20:29:54 +00:00
def IsSynchronous( self ):
return self._synchronous
2015-03-25 22:04:19 +00:00
def PutResult( self, result ):
self._result = result
self._result_ready.set()
2016-03-30 22:56:50 +00:00
def ToString( self ):
return self._type + ' ' + self._action
2015-03-25 22:04:19 +00:00
class ServiceUpdate( object ):
def __init__( self, action, row = None ):
self._action = action
self._row = row
2017-03-02 02:14:56 +00:00
def ToTuple( self ):
return ( self._action, self._row )
2015-03-25 22:04:19 +00:00