Add thumbnails for PDFs (#1421)

* Add thumbnails for PDFs

* Update HydrusPDFHandling.py

* Handle password protected and otherwise unsupported PDFs

* Fix DPI calculation

* Use new exception for encrypted PDFs

* Formatting

* Update filetype docs

* Add notes about PDF handling to filetypes doc

* Handle scale to fill thumbnails for PDFs, ROUND THE RESOLUTION
This commit is contained in:
Paul Friederichsen 2023-09-02 14:36:17 -05:00 committed by GitHub
parent 368309645f
commit 8360fcc527
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 183 additions and 15 deletions

View File

@ -70,7 +70,7 @@ This is a list of all filetypes Hydrus can import. Hydrus determines the filetyp
| Filetype | Extension | MIME type | Thumbnails | Viewable in Hydrus | Notes |
| -------- | --------- | ------------------------------- | :--------: | :----------------: | ----- |
| flash | `.swf` | `application/x-shockwave-flash` | ✅ | ❌ | |
| pdf | `.pdf` | `application/pdf` | ❌ | ❌ | |
| pdf | `.pdf` | `application/pdf` | ✅ | ❌ | 300 DPI assumed for resolution. No thumbnails for encrypted PDFs. |
## Image Project Files

View File

@ -23,6 +23,7 @@ from hydrus.client import ClientFilesPhysical
from hydrus.client import ClientImageHandling
from hydrus.client import ClientPaths
from hydrus.client import ClientSVGHandling # important to keep this in, despite not being used, since there's initialisation stuff in here
from hydrus.client import ClientPDFHandling # important to keep this in, despite not being used, since there's initialisation stuff in here
from hydrus.client import ClientThreading
from hydrus.client.metadata import ClientTags

View File

@ -0,0 +1,131 @@
import typing
from qtpy import QtPdf
from qtpy import QtGui as QG
from qtpy import QtCore as QC
from hydrus.core import HydrusExceptions
from hydrus.core import HydrusImageHandling
from hydrus.core import HydrusPDFHandling
from hydrus.core import HydrusData
from hydrus.client.gui import ClientGUIFunctions
def LoadPDF( path: str ):
try:
document = QtPdf.QPdfDocument()
document.load(path)
except:
raise HydrusExceptions.DamagedOrUnusualFileException( 'Could not load PDF file.' )
status = document.status()
if status is not QtPdf.QPdfDocument.Status.Ready:
if status is QtPdf.QPdfDocument.Status.Error:
error = document.error()
if error is QtPdf.QPdfDocument.Error.IncorrectPassword:
raise HydrusExceptions.EncryptedFileException( f'PDF is password protected!' )
elif error is QtPdf.QPdfDocument.Error.UnsupportedSecurityScheme:
raise HydrusExceptions.EncryptedFileException( f'PDF uses an unsupported security scheme' )
else:
raise HydrusExceptions.DamagedOrUnusualFileException( f'PDF document error: {document.error()}!' )
else:
raise HydrusExceptions.DamagedOrUnusualFileException( f'PDF document status: {status}!' )
return document
def GenerateThumbnailBytesFromPDFPath( path: str, target_resolution: typing.Tuple[int, int], clip_rect = None ) -> bytes:
document = LoadPDF( path )
if clip_rect is None:
( target_width, target_height ) = target_resolution
resolution = QC.QSize( target_width, target_height )
else:
( pdf_width, pdf_height ) = _GetPDFResolution(document)
resolution = QC.QSize( pdf_width, pdf_height )
try:
qt_image = document.render(0, resolution)
# ClientGUIFunctions.ConvertQtImageToNumPy doesn't handle other formats well
qt_image.convertToFormat(QG.QImage.Format_RGBA8888)
numpy_image = ClientGUIFunctions.ConvertQtImageToNumPy( qt_image )
document.close()
if clip_rect is None:
thumbnail_numpy_image = numpy_image
else:
numpy_image = HydrusImageHandling.ClipNumPyImage( numpy_image, clip_rect )
thumbnail_numpy_image = HydrusImageHandling.ResizeNumPyImage( numpy_image, target_resolution )
return HydrusImageHandling.GenerateThumbnailBytesNumPy( thumbnail_numpy_image )
except:
raise HydrusExceptions.DamagedOrUnusualFileException()
HydrusPDFHandling.GenerateThumbnailBytesFromPDFPath = GenerateThumbnailBytesFromPDFPath
PDF_ASSUMED_DPI = 300
def GetPDFResolution( path: str ):
document = LoadPDF( path )
resolution = _GetPDFResolution(document)
document.close()
return resolution
def _GetPDFResolution( document: QtPdf.QPdfDocument ):
try:
pointSize = document.pagePointSize(0)
# pointSize is in pts which are 1/72 of an inch.
# this calculates the "resolution" assuming PDF_ASSUMED_DPI dpi
width = pointSize.width() * (PDF_ASSUMED_DPI/72)
height = pointSize.height() * (PDF_ASSUMED_DPI/72)
return (round(width), round(height))
except HydrusExceptions.EncryptedFileException:
return (None, None)
HydrusPDFHandling.GetPDFResolution = GetPDFResolution

View File

@ -61,20 +61,7 @@ class FileInfoManager( object ):
if mime is None:
mime = HC.APPLICATION_UNKNOWN
if mime in HC.MIMES_WITH_THUMBNAILS:
if width is None or width <= 0:
width = 1
if height is None or height <= 0:
height = 1
self.hash_id = hash_id
self.hash = hash
self.size = size

View File

@ -916,7 +916,7 @@ PIL_HEIF_MIMES = {
MIMES_THAT_DEFINITELY_HAVE_AUDIO = tuple( [ APPLICATION_FLASH ] + list( AUDIO ) )
MIMES_THAT_MAY_HAVE_AUDIO = tuple( list( MIMES_THAT_DEFINITELY_HAVE_AUDIO ) + list( VIDEO ) )
APPLICATIONS_WITH_THUMBNAILS = set( { IMAGE_SVG, APPLICATION_FLASH, APPLICATION_CLIP, APPLICATION_KRITA, APPLICATION_PROCREATE } ).union( VIEWABLE_IMAGE_PROJECT_FILES )
APPLICATIONS_WITH_THUMBNAILS = set( { IMAGE_SVG, APPLICATION_PDF, APPLICATION_FLASH, APPLICATION_CLIP, APPLICATION_KRITA, APPLICATION_PROCREATE } ).union( VIEWABLE_IMAGE_PROJECT_FILES )
MIMES_WITH_THUMBNAILS = set( IMAGES ).union( ANIMATIONS ).union( VIDEO ).union( APPLICATIONS_WITH_THUMBNAILS )

View File

@ -57,6 +57,7 @@ class FileImportBlockException( HydrusException ): pass
class UnsupportedFileException( HydrusException ): pass
class ZeroSizeFileException( UnsupportedFileException ): pass
class DamagedOrUnusualFileException( UnsupportedFileException ): pass
class EncryptedFileException( UnsupportedFileException ): pass
class VetoException( HydrusException ): pass

View File

@ -16,6 +16,7 @@ from hydrus.core import HydrusProcreateHandling
from hydrus.core import HydrusPaths
from hydrus.core import HydrusSerialisable
from hydrus.core import HydrusSVGHandling
from hydrus.core import HydrusPDFHandling
from hydrus.core import HydrusTemp
from hydrus.core import HydrusText
from hydrus.core import HydrusVideoHandling
@ -229,6 +230,24 @@ def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames,
thumb_path = os.path.join( HC.STATIC_DIR, 'svg.png' )
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
elif mime == HC.APPLICATION_PDF:
try:
thumbnail_bytes = HydrusPDFHandling.GenerateThumbnailBytesFromPDFPath( path, target_resolution, clip_rect = clip_rect )
except Exception as e:
if not isinstance( e, HydrusExceptions.UnsupportedFileException ):
HydrusData.Print( 'Problem generating thumbnail for "{}":'.format( path ) )
HydrusData.PrintException( e )
thumb_path = os.path.join( HC.STATIC_DIR, 'pdf.png' )
thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect )
elif mime == HC.APPLICATION_FLASH:
@ -424,6 +443,10 @@ def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ):
elif mime == HC.IMAGE_SVG:
( width, height ) = HydrusSVGHandling.GetSVGResolution( path )
elif mime == HC.APPLICATION_PDF:
( width, height ) = HydrusPDFHandling.GetPDFResolution( path )
elif mime == HC.APPLICATION_FLASH:

View File

@ -793,6 +793,15 @@ def GetThumbnailResolutionAndClipRegion( image_resolution: typing.Tuple[ int, in
bounding_height = int( bounding_height * thumbnail_dpr )
bounding_width = int( bounding_width * thumbnail_dpr )
if im_width is None:
im_width = bounding_width
if im_height is None:
im_height = bounding_height
# TODO SVG thumbs should always scale up to the bounding dimensions

View File

@ -0,0 +1,16 @@
import typing
from hydrus.core import HydrusExceptions
def BaseGenerateThumbnailBytesFromPDFPath( path: str, target_resolution: typing.Tuple[int, int], clip_rect = None ) -> bytes:
raise HydrusExceptions.UnsupportedFileException()
def BaseGetPDFResolution( path: str ):
return ( None, None )
GenerateThumbnailBytesFromPDFPath = BaseGenerateThumbnailBytesFromPDFPath
GetPDFResolution = BaseGetPDFResolution