2020-05-20 21:36:02 +00:00
from functools import reduce
2019-05-08 21:06:42 +00:00
import numpy
2015-11-18 22:44:07 +00:00
import numpy . core . multiarray # important this comes before cv!
2020-05-20 21:36:02 +00:00
2015-11-18 22:44:07 +00:00
import cv2
2020-05-20 21:36:02 +00:00
from hydrus . client import ClientConstants as CC
2020-04-22 21:00:35 +00:00
from hydrus . core import HydrusData
from hydrus . core import HydrusImageHandling
from hydrus . core import HydrusGlobals as HG
2023-04-19 20:38:13 +00:00
from hydrus . core import HydrusTime
2015-11-18 22:44:07 +00:00
2016-09-21 19:54:04 +00:00
cv_interpolation_enum_lookup = { }
cv_interpolation_enum_lookup [ CC . ZOOM_NEAREST ] = cv2 . INTER_NEAREST
cv_interpolation_enum_lookup [ CC . ZOOM_LINEAR ] = cv2 . INTER_LINEAR
cv_interpolation_enum_lookup [ CC . ZOOM_AREA ] = cv2 . INTER_AREA
cv_interpolation_enum_lookup [ CC . ZOOM_CUBIC ] = cv2 . INTER_CUBIC
cv_interpolation_enum_lookup [ CC . ZOOM_LANCZOS4 ] = cv2 . INTER_LANCZOS4
2017-11-08 22:07:12 +00:00
2021-12-15 22:16:22 +00:00
def DiscardBlankPerceptualHashes ( perceptual_hashes ) :
2019-05-15 20:35:00 +00:00
2021-12-15 22:16:22 +00:00
perceptual_hashes = { perceptual_hash for perceptual_hash in perceptual_hashes if HydrusData . Get64BitHammingDistance ( perceptual_hash , CC . BLANK_PERCEPTUAL_HASH ) > 4 }
2019-05-15 20:35:00 +00:00
2021-12-15 22:16:22 +00:00
return perceptual_hashes
2019-05-15 20:35:00 +00:00
2023-05-24 20:44:12 +00:00
2019-05-08 21:06:42 +00:00
def GenerateNumPyImage ( path , mime ) :
2015-11-18 22:44:07 +00:00
2019-05-08 21:06:42 +00:00
force_pil = HG . client_controller . new_options . GetBoolean ( ' load_images_with_pil ' )
2015-11-18 22:44:07 +00:00
2019-05-08 21:06:42 +00:00
return HydrusImageHandling . GenerateNumPyImage ( path , mime , force_pil = force_pil )
2015-11-18 22:44:07 +00:00
2023-05-24 20:44:12 +00:00
2017-08-23 21:34:25 +00:00
def GenerateShapePerceptualHashes ( path , mime ) :
2015-11-18 22:44:07 +00:00
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: loading image ' )
2023-08-12 02:57:58 +00:00
try :
numpy_image = GenerateNumPyImage ( path , mime )
return GenerateShapePerceptualHashesNumPy ( numpy_image )
2015-11-18 22:44:07 +00:00
2023-08-12 02:57:58 +00:00
except :
return set ( )
2023-05-24 20:44:12 +00:00
def GenerateShapePerceptualHashesNumPy ( numpy_image ) :
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: image shape: {} ' . format ( numpy_image . shape ) )
2015-11-18 22:44:07 +00:00
( y , x , depth ) = numpy_image . shape
if depth == 4 :
2017-05-10 21:33:58 +00:00
# doing this on 10000x10000 pngs eats ram like mad
2022-02-02 22:14:01 +00:00
# we don't want to do GetThumbnailResolutionAndClipRegion as for extremely wide or tall images, we'll then scale below 32 pixels for one dimension, losing information!
2021-08-11 21:14:12 +00:00
# however, it does not matter if we stretch the image a bit, since we'll be coercing 32x32 in a minute
2019-05-08 21:06:42 +00:00
2021-08-11 21:14:12 +00:00
new_x = min ( 256 , x )
new_y = min ( 256 , y )
numpy_image = cv2 . resize ( numpy_image , ( new_x , new_y ) , interpolation = cv2 . INTER_AREA )
2017-05-10 21:33:58 +00:00
( y , x , depth ) = numpy_image . shape
2015-11-18 22:44:07 +00:00
# create weight and transform numpy_image to greyscale
numpy_alpha = numpy_image [ : , : , 3 ]
2021-08-11 21:14:12 +00:00
numpy_image_rgb = numpy_image [ : , : , : 3 ]
2015-11-18 22:44:07 +00:00
2021-08-11 21:14:12 +00:00
numpy_image_gray_bare = cv2 . cvtColor ( numpy_image_rgb , cv2 . COLOR_RGB2GRAY )
2015-11-18 22:44:07 +00:00
2016-08-24 18:36:56 +00:00
# create a white greyscale canvas
2015-11-18 22:44:07 +00:00
2021-08-11 21:14:12 +00:00
white = numpy . full ( ( y , x ) , 255.0 )
# paste the grayscale image onto the white canvas using: pixel * alpha_float + white * ( 1 - alpha_float )
# note alpha 255 = opaque, alpha 0 = transparent
2015-11-18 22:44:07 +00:00
2021-08-11 21:14:12 +00:00
# also, note:
# white * ( 1 - alpha_float )
# =
# 255 * ( 1 - ( alpha / 255 ) )
# =
# 255 - alpha
2015-11-18 22:44:07 +00:00
2021-08-11 21:14:12 +00:00
numpy_image_gray = numpy . uint8 ( ( numpy_image_gray_bare * ( numpy_alpha / 255.0 ) ) + ( white - numpy_alpha ) )
2015-11-18 22:44:07 +00:00
else :
2021-08-11 21:14:12 +00:00
# this single step is nice and fast, so we won't scale to 256x256 beforehand
2016-11-16 20:21:43 +00:00
numpy_image_gray = cv2 . cvtColor ( numpy_image , cv2 . COLOR_RGB2GRAY )
2015-11-18 22:44:07 +00:00
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: grey image shape: {} ' . format ( numpy_image_gray . shape ) )
2015-11-18 22:44:07 +00:00
numpy_image_tiny = cv2 . resize ( numpy_image_gray , ( 32 , 32 ) , interpolation = cv2 . INTER_AREA )
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: tiny image shape: {} ' . format ( numpy_image_tiny . shape ) )
2015-11-18 22:44:07 +00:00
# convert to float and calc dct
numpy_image_tiny_float = numpy . float32 ( numpy_image_tiny )
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: tiny float image shape: {} ' . format ( numpy_image_tiny_float . shape ) )
HydrusData . ShowText ( ' phash generation: generating dct ' )
2015-11-18 22:44:07 +00:00
dct = cv2 . dct ( numpy_image_tiny_float )
# take top left 8x8 of dct
dct_88 = dct [ : 8 , : 8 ]
2017-01-18 22:52:39 +00:00
# get median of dct
# exclude [0,0], which represents flat colour
# this [0,0] exclusion is apparently important for mean, but maybe it ain't so important for median--w/e
2015-11-18 22:44:07 +00:00
2017-01-18 22:52:39 +00:00
# old mean code
# mask = numpy.ones( ( 8, 8 ) )
# mask[0,0] = 0
# average = numpy.average( dct_88, weights = mask )
2015-11-18 22:44:07 +00:00
2017-01-18 22:52:39 +00:00
median = numpy . median ( dct_88 . reshape ( 64 ) [ 1 : ] )
2015-11-18 22:44:07 +00:00
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: median: {} ' . format ( median ) )
2017-01-18 22:52:39 +00:00
# make a monochromatic, 64-bit hash of whether the entry is above or below the median
2015-11-18 22:44:07 +00:00
2017-01-18 22:52:39 +00:00
dct_88_boolean = dct_88 > median
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
HydrusData . ShowText ( ' phash generation: collapsing bytes ' )
2017-01-18 22:52:39 +00:00
# convert TTTFTFTF to 11101010 by repeatedly shifting answer and adding 0 or 1
2017-03-08 23:23:12 +00:00
# you can even go ( a << 1 ) + b and leave out the initial param on the reduce call as bools act like ints for this
2017-01-18 22:52:39 +00:00
# but let's not go crazy for another two nanoseconds
2017-03-08 23:23:12 +00:00
def collapse_bools_to_binary_uint ( a , b ) :
return ( a << 1 ) + int ( b )
2015-11-18 22:44:07 +00:00
2019-01-09 22:59:03 +00:00
list_of_bytes = [ ]
2015-11-18 22:44:07 +00:00
for i in range ( 8 ) :
2017-01-18 22:52:39 +00:00
'''
# old way of doing it, which compared value to median every time
2015-11-18 22:44:07 +00:00
byte = 0
for j in range ( 8 ) :
byte << = 1 # shift byte one left
value = dct_88 [ i , j ]
2017-01-18 22:52:39 +00:00
if value > median :
byte | = 1
2015-11-18 22:44:07 +00:00
2017-01-18 22:52:39 +00:00
'''
2019-01-09 22:59:03 +00:00
# this is a 0-255 int
2017-01-18 22:52:39 +00:00
byte = reduce ( collapse_bools_to_binary_uint , dct_88_boolean [ i ] , 0 )
2015-11-18 22:44:07 +00:00
2019-01-09 22:59:03 +00:00
list_of_bytes . append ( byte )
2015-11-18 22:44:07 +00:00
2021-12-15 22:16:22 +00:00
perceptual_hash = bytes ( list_of_bytes ) # this works!
2016-11-30 20:24:17 +00:00
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
2021-12-15 22:16:22 +00:00
HydrusData . ShowText ( ' phash generation: perceptual_hash: {} ' . format ( perceptual_hash . hex ( ) ) )
2019-07-17 22:10:19 +00:00
2017-01-18 22:52:39 +00:00
# now discard the blank hash, which is 1000000... and not useful
2021-12-15 22:16:22 +00:00
perceptual_hashes = set ( )
2017-01-18 22:52:39 +00:00
2021-12-15 22:16:22 +00:00
perceptual_hashes . add ( perceptual_hash )
2017-01-18 22:52:39 +00:00
2021-12-15 22:16:22 +00:00
perceptual_hashes = DiscardBlankPerceptualHashes ( perceptual_hashes )
2015-11-18 22:44:07 +00:00
2019-07-17 22:10:19 +00:00
if HG . phash_generation_report_mode :
2021-12-15 22:16:22 +00:00
HydrusData . ShowText ( ' phash generation: final perceptual_hashes: {} ' . format ( len ( perceptual_hashes ) ) )
2019-07-17 22:10:19 +00:00
2015-11-18 22:44:07 +00:00
# we good
2021-12-15 22:16:22 +00:00
return perceptual_hashes
2016-09-21 19:54:04 +00:00
2019-05-08 21:06:42 +00:00
def ResizeNumPyImageForMediaViewer ( mime , numpy_image , target_resolution ) :
2016-09-21 19:54:04 +00:00
2019-04-03 22:45:57 +00:00
( target_width , target_height ) = target_resolution
2017-12-06 22:06:56 +00:00
new_options = HG . client_controller . new_options
2016-09-21 19:54:04 +00:00
( scale_up_quality , scale_down_quality ) = new_options . GetMediaZoomQuality ( mime )
2021-05-12 20:49:20 +00:00
( image_height , image_width , depth ) = numpy_image . shape
2016-09-21 19:54:04 +00:00
2019-04-03 22:45:57 +00:00
if ( target_width , target_height ) == ( image_height , image_width ) :
2016-09-21 19:54:04 +00:00
return numpy_image
else :
2021-05-12 20:49:20 +00:00
if target_width > image_width or target_height > image_height :
2016-09-21 19:54:04 +00:00
interpolation = cv_interpolation_enum_lookup [ scale_up_quality ]
else :
interpolation = cv_interpolation_enum_lookup [ scale_down_quality ]
2019-04-03 22:45:57 +00:00
return cv2 . resize ( numpy_image , ( target_width , target_height ) , interpolation = interpolation )
2016-09-21 19:54:04 +00:00
2017-01-18 22:52:39 +00:00