diff --git a/hydrus/core/HydrusConstants.py b/hydrus/core/HydrusConstants.py index 865d757b..eb64f763 100644 --- a/hydrus/core/HydrusConstants.py +++ b/hydrus/core/HydrusConstants.py @@ -713,13 +713,14 @@ UNDETERMINED_MP4 = 50 APPLICATION_CBOR = 51 APPLICATION_WINDOWS_EXE = 52 AUDIO_WAVPACK = 53 -APPLICATION_SAI2 = 54 +APPLICATION_SAI2 = 54, +IMAGE_SVG = 55, APPLICATION_OCTET_STREAM = 100 APPLICATION_UNKNOWN = 101 GENERAL_FILETYPES = { GENERAL_APPLICATION, GENERAL_AUDIO, GENERAL_IMAGE, GENERAL_VIDEO, GENERAL_ANIMATION } -SEARCHABLE_MIMES = { IMAGE_JPEG, IMAGE_PNG, IMAGE_APNG, IMAGE_GIF, IMAGE_WEBP, IMAGE_TIFF, IMAGE_ICON, APPLICATION_FLASH, VIDEO_AVI, VIDEO_FLV, VIDEO_MOV, VIDEO_MP4, VIDEO_MKV, VIDEO_REALMEDIA, VIDEO_WEBM, VIDEO_OGV, VIDEO_MPEG, APPLICATION_CLIP, APPLICATION_PSD, APPLICATION_SAI2, APPLICATION_PDF, APPLICATION_ZIP, APPLICATION_RAR, APPLICATION_7Z, AUDIO_M4A, AUDIO_MP3, AUDIO_REALMEDIA, AUDIO_OGG, AUDIO_FLAC, AUDIO_WAVE, AUDIO_TRUEAUDIO, AUDIO_WMA, VIDEO_WMV, AUDIO_MKV, AUDIO_MP4, AUDIO_WAVPACK } +SEARCHABLE_MIMES = { IMAGE_JPEG, IMAGE_PNG, IMAGE_APNG, IMAGE_GIF, IMAGE_WEBP, IMAGE_TIFF, IMAGE_ICON, IMAGE_SVG, APPLICATION_FLASH, VIDEO_AVI, VIDEO_FLV, VIDEO_MOV, VIDEO_MP4, VIDEO_MKV, VIDEO_REALMEDIA, VIDEO_WEBM, VIDEO_OGV, VIDEO_MPEG, APPLICATION_CLIP, APPLICATION_PSD, APPLICATION_SAI2, APPLICATION_PDF, APPLICATION_ZIP, APPLICATION_RAR, APPLICATION_7Z, AUDIO_M4A, AUDIO_MP3, AUDIO_REALMEDIA, AUDIO_OGG, AUDIO_FLAC, AUDIO_WAVE, AUDIO_TRUEAUDIO, AUDIO_WMA, VIDEO_WMV, AUDIO_MKV, AUDIO_MP4, AUDIO_WAVPACK } STORABLE_MIMES = set( SEARCHABLE_MIMES ).union( { APPLICATION_HYDRUS_UPDATE_CONTENT, APPLICATION_HYDRUS_UPDATE_DEFINITIONS } ) @@ -727,7 +728,7 @@ ALLOWED_MIMES = set( STORABLE_MIMES ).union( { IMAGE_BMP } ) DECOMPRESSION_BOMB_IMAGES = { IMAGE_JPEG, IMAGE_PNG } -IMAGES = { IMAGE_JPEG, IMAGE_PNG, IMAGE_BMP, IMAGE_WEBP, IMAGE_TIFF, IMAGE_ICON } +IMAGES = { IMAGE_JPEG, IMAGE_PNG, IMAGE_BMP, IMAGE_WEBP, IMAGE_TIFF, IMAGE_ICON, IMAGE_SVG } ANIMATIONS = { IMAGE_GIF, IMAGE_APNG } @@ -768,8 +769,8 @@ FILES_THAT_CAN_HAVE_EXIF = { IMAGE_JPEG, IMAGE_TIFF } # images and animations that PIL can handle FILES_THAT_CAN_HAVE_HUMAN_READABLE_EMBEDDED_METADATA = { IMAGE_JPEG, IMAGE_PNG, IMAGE_BMP, IMAGE_WEBP, IMAGE_TIFF, IMAGE_ICON, IMAGE_GIF, IMAGE_APNG } -FILES_THAT_CAN_HAVE_PIXEL_HASH = set( IMAGES ).union( { IMAGE_GIF } ) -FILES_THAT_HAVE_PERCEPTUAL_HASH = set( IMAGES ) +FILES_THAT_CAN_HAVE_PIXEL_HASH = set( IMAGES ).union( { IMAGE_GIF } ).difference( { IMAGE_SVG } ) +FILES_THAT_HAVE_PERCEPTUAL_HASH = set( IMAGES ).difference( { IMAGE_SVG } ) HYDRUS_UPDATE_FILES = ( APPLICATION_HYDRUS_UPDATE_DEFINITIONS, APPLICATION_HYDRUS_UPDATE_CONTENT ) @@ -786,6 +787,7 @@ mime_enum_lookup = { 'image/webp' : IMAGE_WEBP, 'image/tiff' : IMAGE_TIFF, 'image/x-icon' : IMAGE_ICON, + 'image/svg+xml': IMAGE_SVG, 'image/vnd.microsoft.icon' : IMAGE_ICON, 'image' : IMAGES, 'application/x-shockwave-flash' : APPLICATION_FLASH, @@ -844,6 +846,7 @@ mime_string_lookup = { IMAGE_WEBP : 'webp', IMAGE_TIFF : 'tiff', IMAGE_ICON : 'icon', + IMAGE_SVG : 'svg', APPLICATION_FLASH : 'flash', APPLICATION_OCTET_STREAM : 'application/octet-stream', APPLICATION_YAML : 'yaml', @@ -904,6 +907,7 @@ mime_mimetype_string_lookup = { IMAGE_WEBP : 'image/webp', IMAGE_TIFF : 'image/tiff', IMAGE_ICON : 'image/x-icon', + IMAGE_SVG : 'image/svg+xml', APPLICATION_FLASH : 'application/x-shockwave-flash', APPLICATION_OCTET_STREAM : 'application/octet-stream', APPLICATION_YAML : 'application/x-yaml', @@ -965,6 +969,7 @@ mime_ext_lookup = { IMAGE_WEBP : '.webp', IMAGE_TIFF : '.tiff', IMAGE_ICON : '.ico', + IMAGE_SVG : '.svg', APPLICATION_FLASH : '.swf', APPLICATION_OCTET_STREAM : '.bin', APPLICATION_YAML : '.yaml', diff --git a/hydrus/core/HydrusFileHandling.py b/hydrus/core/HydrusFileHandling.py index efebc342..45d7f621 100644 --- a/hydrus/core/HydrusFileHandling.py +++ b/hydrus/core/HydrusFileHandling.py @@ -460,6 +460,9 @@ def GetMime( path, ok_to_look_for_hydrus_updates = False ): return HC.TEXT_HTML + if HydrusText.LooksLikeSVG( bit_to_check ): + + return HC.IMAGE_SVG # it is important this goes at the end, because ffmpeg has a billion false positives! # for instance, it once thought some hydrus update files were mpegs diff --git a/hydrus/core/HydrusText.py b/hydrus/core/HydrusText.py index 4504e525..77ec56c4 100644 --- a/hydrus/core/HydrusText.py +++ b/hydrus/core/HydrusText.py @@ -96,11 +96,33 @@ def LooksLikeHTML( file_data ): if isinstance( file_data, bytes ): - search_elements = ( b'