From 8360fcc52751176d70ca51db5602a2f559b582b4 Mon Sep 17 00:00:00 2001 From: Paul Friederichsen Date: Sat, 2 Sep 2023 14:36:17 -0500 Subject: [PATCH] Add thumbnails for PDFs (#1421) * Add thumbnails for PDFs * Update HydrusPDFHandling.py * Handle password protected and otherwise unsupported PDFs * Fix DPI calculation * Use new exception for encrypted PDFs * Formatting * Update filetype docs * Add notes about PDF handling to filetypes doc * Handle scale to fill thumbnails for PDFs, ROUND THE RESOLUTION --- docs/filetypes.md | 2 +- hydrus/client/ClientFiles.py | 1 + hydrus/client/ClientPDFHandling.py | 131 +++++++++++++++++++++ hydrus/client/media/ClientMediaManagers.py | 13 -- hydrus/core/HydrusConstants.py | 2 +- hydrus/core/HydrusExceptions.py | 1 + hydrus/core/HydrusFileHandling.py | 23 ++++ hydrus/core/HydrusImageHandling.py | 9 ++ hydrus/core/HydrusPDFHandling.py | 16 +++ 9 files changed, 183 insertions(+), 15 deletions(-) create mode 100644 hydrus/client/ClientPDFHandling.py create mode 100644 hydrus/core/HydrusPDFHandling.py diff --git a/docs/filetypes.md b/docs/filetypes.md index c09370cc..818b5464 100644 --- a/docs/filetypes.md +++ b/docs/filetypes.md @@ -70,7 +70,7 @@ This is a list of all filetypes Hydrus can import. Hydrus determines the filetyp | Filetype | Extension | MIME type | Thumbnails | Viewable in Hydrus | Notes | | -------- | --------- | ------------------------------- | :--------: | :----------------: | ----- | | flash | `.swf` | `application/x-shockwave-flash` | ✅ | ❌ | | -| pdf | `.pdf` | `application/pdf` | ❌ | ❌ | | +| pdf | `.pdf` | `application/pdf` | ✅ | ❌ | 300 DPI assumed for resolution. No thumbnails for encrypted PDFs. | ## Image Project Files diff --git a/hydrus/client/ClientFiles.py b/hydrus/client/ClientFiles.py index 49f23c00..e0ba660e 100644 --- a/hydrus/client/ClientFiles.py +++ b/hydrus/client/ClientFiles.py @@ -23,6 +23,7 @@ from hydrus.client import ClientFilesPhysical from hydrus.client import ClientImageHandling from hydrus.client import ClientPaths from hydrus.client import ClientSVGHandling # important to keep this in, despite not being used, since there's initialisation stuff in here +from hydrus.client import ClientPDFHandling # important to keep this in, despite not being used, since there's initialisation stuff in here from hydrus.client import ClientThreading from hydrus.client.metadata import ClientTags diff --git a/hydrus/client/ClientPDFHandling.py b/hydrus/client/ClientPDFHandling.py new file mode 100644 index 00000000..5c1e7080 --- /dev/null +++ b/hydrus/client/ClientPDFHandling.py @@ -0,0 +1,131 @@ +import typing + +from qtpy import QtPdf +from qtpy import QtGui as QG +from qtpy import QtCore as QC + +from hydrus.core import HydrusExceptions +from hydrus.core import HydrusImageHandling +from hydrus.core import HydrusPDFHandling +from hydrus.core import HydrusData + +from hydrus.client.gui import ClientGUIFunctions + +def LoadPDF( path: str ): + + try: + + document = QtPdf.QPdfDocument() + + document.load(path) + + except: + + raise HydrusExceptions.DamagedOrUnusualFileException( 'Could not load PDF file.' ) + + status = document.status() + + if status is not QtPdf.QPdfDocument.Status.Ready: + + if status is QtPdf.QPdfDocument.Status.Error: + + error = document.error() + + if error is QtPdf.QPdfDocument.Error.IncorrectPassword: + + raise HydrusExceptions.EncryptedFileException( f'PDF is password protected!' ) + + elif error is QtPdf.QPdfDocument.Error.UnsupportedSecurityScheme: + + raise HydrusExceptions.EncryptedFileException( f'PDF uses an unsupported security scheme' ) + + else: + + raise HydrusExceptions.DamagedOrUnusualFileException( f'PDF document error: {document.error()}!' ) + + else: + + raise HydrusExceptions.DamagedOrUnusualFileException( f'PDF document status: {status}!' ) + + return document + + +def GenerateThumbnailBytesFromPDFPath( path: str, target_resolution: typing.Tuple[int, int], clip_rect = None ) -> bytes: + + document = LoadPDF( path ) + + if clip_rect is None: + + ( target_width, target_height ) = target_resolution + + resolution = QC.QSize( target_width, target_height ) + + else: + + ( pdf_width, pdf_height ) = _GetPDFResolution(document) + + resolution = QC.QSize( pdf_width, pdf_height ) + + try: + + qt_image = document.render(0, resolution) + + # ClientGUIFunctions.ConvertQtImageToNumPy doesn't handle other formats well + qt_image.convertToFormat(QG.QImage.Format_RGBA8888) + + + numpy_image = ClientGUIFunctions.ConvertQtImageToNumPy( qt_image ) + + document.close() + + if clip_rect is None: + + thumbnail_numpy_image = numpy_image + + else: + + numpy_image = HydrusImageHandling.ClipNumPyImage( numpy_image, clip_rect ) + + thumbnail_numpy_image = HydrusImageHandling.ResizeNumPyImage( numpy_image, target_resolution ) + + return HydrusImageHandling.GenerateThumbnailBytesNumPy( thumbnail_numpy_image ) + + except: + + raise HydrusExceptions.DamagedOrUnusualFileException() + + +HydrusPDFHandling.GenerateThumbnailBytesFromPDFPath = GenerateThumbnailBytesFromPDFPath + +PDF_ASSUMED_DPI = 300 + +def GetPDFResolution( path: str ): + + document = LoadPDF( path ) + + resolution = _GetPDFResolution(document) + + document.close() + + return resolution + + +def _GetPDFResolution( document: QtPdf.QPdfDocument ): + + try: + + pointSize = document.pagePointSize(0) + + # pointSize is in pts which are 1/72 of an inch. + # this calculates the "resolution" assuming PDF_ASSUMED_DPI dpi + width = pointSize.width() * (PDF_ASSUMED_DPI/72) + height = pointSize.height() * (PDF_ASSUMED_DPI/72) + + return (round(width), round(height)) + + except HydrusExceptions.EncryptedFileException: + + return (None, None) + + +HydrusPDFHandling.GetPDFResolution = GetPDFResolution diff --git a/hydrus/client/media/ClientMediaManagers.py b/hydrus/client/media/ClientMediaManagers.py index 312f8a57..318696ad 100644 --- a/hydrus/client/media/ClientMediaManagers.py +++ b/hydrus/client/media/ClientMediaManagers.py @@ -61,20 +61,7 @@ class FileInfoManager( object ): if mime is None: mime = HC.APPLICATION_UNKNOWN - - - if mime in HC.MIMES_WITH_THUMBNAILS: - - if width is None or width <= 0: - width = 1 - - - if height is None or height <= 0: - - height = 1 - - self.hash_id = hash_id self.hash = hash self.size = size diff --git a/hydrus/core/HydrusConstants.py b/hydrus/core/HydrusConstants.py index f39d107e..5f8d9330 100644 --- a/hydrus/core/HydrusConstants.py +++ b/hydrus/core/HydrusConstants.py @@ -916,7 +916,7 @@ PIL_HEIF_MIMES = { MIMES_THAT_DEFINITELY_HAVE_AUDIO = tuple( [ APPLICATION_FLASH ] + list( AUDIO ) ) MIMES_THAT_MAY_HAVE_AUDIO = tuple( list( MIMES_THAT_DEFINITELY_HAVE_AUDIO ) + list( VIDEO ) ) -APPLICATIONS_WITH_THUMBNAILS = set( { IMAGE_SVG, APPLICATION_FLASH, APPLICATION_CLIP, APPLICATION_KRITA, APPLICATION_PROCREATE } ).union( VIEWABLE_IMAGE_PROJECT_FILES ) +APPLICATIONS_WITH_THUMBNAILS = set( { IMAGE_SVG, APPLICATION_PDF, APPLICATION_FLASH, APPLICATION_CLIP, APPLICATION_KRITA, APPLICATION_PROCREATE } ).union( VIEWABLE_IMAGE_PROJECT_FILES ) MIMES_WITH_THUMBNAILS = set( IMAGES ).union( ANIMATIONS ).union( VIDEO ).union( APPLICATIONS_WITH_THUMBNAILS ) diff --git a/hydrus/core/HydrusExceptions.py b/hydrus/core/HydrusExceptions.py index 1a19d3d0..0b4e7f61 100644 --- a/hydrus/core/HydrusExceptions.py +++ b/hydrus/core/HydrusExceptions.py @@ -57,6 +57,7 @@ class FileImportBlockException( HydrusException ): pass class UnsupportedFileException( HydrusException ): pass class ZeroSizeFileException( UnsupportedFileException ): pass class DamagedOrUnusualFileException( UnsupportedFileException ): pass +class EncryptedFileException( UnsupportedFileException ): pass class VetoException( HydrusException ): pass diff --git a/hydrus/core/HydrusFileHandling.py b/hydrus/core/HydrusFileHandling.py index 3c24a67f..a1271860 100644 --- a/hydrus/core/HydrusFileHandling.py +++ b/hydrus/core/HydrusFileHandling.py @@ -16,6 +16,7 @@ from hydrus.core import HydrusProcreateHandling from hydrus.core import HydrusPaths from hydrus.core import HydrusSerialisable from hydrus.core import HydrusSVGHandling +from hydrus.core import HydrusPDFHandling from hydrus.core import HydrusTemp from hydrus.core import HydrusText from hydrus.core import HydrusVideoHandling @@ -229,6 +230,24 @@ def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames, thumb_path = os.path.join( HC.STATIC_DIR, 'svg.png' ) thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect ) + + elif mime == HC.APPLICATION_PDF: + + try: + + thumbnail_bytes = HydrusPDFHandling.GenerateThumbnailBytesFromPDFPath( path, target_resolution, clip_rect = clip_rect ) + + except Exception as e: + + if not isinstance( e, HydrusExceptions.UnsupportedFileException ): + + HydrusData.Print( 'Problem generating thumbnail for "{}":'.format( path ) ) + HydrusData.PrintException( e ) + + + thumb_path = os.path.join( HC.STATIC_DIR, 'pdf.png' ) + + thumbnail_bytes = HydrusImageHandling.GenerateThumbnailBytesFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG, clip_rect = clip_rect ) elif mime == HC.APPLICATION_FLASH: @@ -424,6 +443,10 @@ def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ): elif mime == HC.IMAGE_SVG: ( width, height ) = HydrusSVGHandling.GetSVGResolution( path ) + + elif mime == HC.APPLICATION_PDF: + + ( width, height ) = HydrusPDFHandling.GetPDFResolution( path ) elif mime == HC.APPLICATION_FLASH: diff --git a/hydrus/core/HydrusImageHandling.py b/hydrus/core/HydrusImageHandling.py index 9a8c8403..586a32d0 100644 --- a/hydrus/core/HydrusImageHandling.py +++ b/hydrus/core/HydrusImageHandling.py @@ -793,6 +793,15 @@ def GetThumbnailResolutionAndClipRegion( image_resolution: typing.Tuple[ int, in bounding_height = int( bounding_height * thumbnail_dpr ) bounding_width = int( bounding_width * thumbnail_dpr ) + + if im_width is None: + + im_width = bounding_width + + if im_height is None: + + im_height = bounding_height + # TODO SVG thumbs should always scale up to the bounding dimensions diff --git a/hydrus/core/HydrusPDFHandling.py b/hydrus/core/HydrusPDFHandling.py new file mode 100644 index 00000000..e75060e0 --- /dev/null +++ b/hydrus/core/HydrusPDFHandling.py @@ -0,0 +1,16 @@ +import typing + +from hydrus.core import HydrusExceptions + +def BaseGenerateThumbnailBytesFromPDFPath( path: str, target_resolution: typing.Tuple[int, int], clip_rect = None ) -> bytes: + + raise HydrusExceptions.UnsupportedFileException() + + +def BaseGetPDFResolution( path: str ): + + return ( None, None ) + + +GenerateThumbnailBytesFromPDFPath = BaseGenerateThumbnailBytesFromPDFPath +GetPDFResolution = BaseGetPDFResolution