Improvements to MS Office filetype support (#1528)
* Update icons for MS Office filetypes * Remove unused old PDF word count code * Add more strict office file detection * Remove old HydrusDocumentHandling import * Add word count for docx and pptx and thumbnails for pptx * Update filetypes doc * Update filetype docs with pptx dpi * Remove prints and clean up formatting * Center default thumbnails inside target resolution
This commit is contained in:
parent
31211945d9
commit
77ab38e50e
|
@ -70,16 +70,16 @@ The filetype for a file can be overridden with `manage -> force filetype` in the
|
|||
|
||||
## Applications
|
||||
|
||||
| Filetype | Extension | MIME type | Thumbnails | Viewable in Hydrus | Notes |
|
||||
|----------|-----------|-----------------------------------------------------------------------------| :--------: | :----------------: | ----------------------------------------------------------------- |
|
||||
| flash | `.swf` | `application/x-shockwave-flash` | ✅ | ❌ | |
|
||||
| pdf | `.pdf` | `application/pdf` | ✅ | ❌ | 300 DPI assumed for resolution. No thumbnails for encrypted PDFs. |
|
||||
| epub | `.epub` | `application/epub+zip` | ❌ | ❌ | |
|
||||
| djvu | `.djvu` | `image/vnd.djvu` | ❌ | ❌ | |
|
||||
| docx | `.docx` | `application/vnd.openxmlformats-officedocument.wordprocessingml.document` | ❌ | ❌ | |
|
||||
| xlsx | `.xlsx` | `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` | ❌ | ❌ | |
|
||||
| docx | `.pptx` | `application/vnd.openxmlformats-officedocument.presentationml.presentation` | ❌ | ❌ | |
|
||||
| rtf | `.rtf` | `application/rtf` | ❌ | ❌ | |
|
||||
| Filetype | Extension | MIME type | Thumbnails | Viewable in Hydrus | Notes |
|
||||
| -------- | --------- | --------------------------------------------------------------------------- | :--------: | :----------------: | -------------------------------------------------------------------------- |
|
||||
| flash | `.swf` | `application/x-shockwave-flash` | ✅ | ❌ | |
|
||||
| pdf | `.pdf` | `application/pdf` | ✅ | ❌ | 300 DPI assumed for resolution. No thumbnails for encrypted PDFs. |
|
||||
| epub | `.epub` | `application/epub+zip` | ❌ | ❌ | |
|
||||
| djvu | `.djvu` | `image/vnd.djvu` | ❌ | ❌ | |
|
||||
| docx | `.docx` | `application/vnd.openxmlformats-officedocument.wordprocessingml.document` | ❌ | ❌ | |
|
||||
| xlsx | `.xlsx` | `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` | ❌ | ❌ | |
|
||||
| pptx | `.pptx` | `application/vnd.openxmlformats-officedocument.presentationml.presentation` | ✅ | ❌ | 300 DPI assumed for resolution. Thumbnail only if embedded in the document |
|
||||
| rtf | `.rtf` | `application/rtf` | ❌ | ❌ | |
|
||||
|
||||
|
||||
## Image Project Files
|
||||
|
|
|
@ -741,9 +741,9 @@ APPLICATION_DJVU = 72
|
|||
APPLICATION_CBZ = 73
|
||||
ANIMATION_UGOIRA = 74
|
||||
APPLICATION_RTF = 75
|
||||
APPLICATION_MICROSOFT_OPEN_XML_DOCX = 76
|
||||
APPLICATION_MICROSOFT_OPEN_XML_XLSX = 77
|
||||
APPLICATION_MICROSOFT_OPEN_XML_PPTX = 78
|
||||
APPLICATION_DOCX = 76
|
||||
APPLICATION_XLSX = 77
|
||||
APPLICATION_PPTX = 78
|
||||
APPLICATION_OCTET_STREAM = 100
|
||||
APPLICATION_UNKNOWN = 101
|
||||
|
||||
|
@ -794,9 +794,9 @@ SEARCHABLE_MIMES = {
|
|||
APPLICATION_XCF,
|
||||
APPLICATION_PROCREATE,
|
||||
APPLICATION_PDF,
|
||||
APPLICATION_MICROSOFT_OPEN_XML_DOCX,
|
||||
APPLICATION_MICROSOFT_OPEN_XML_XLSX,
|
||||
APPLICATION_MICROSOFT_OPEN_XML_PPTX,
|
||||
APPLICATION_DOCX,
|
||||
APPLICATION_XLSX,
|
||||
APPLICATION_PPTX,
|
||||
APPLICATION_EPUB,
|
||||
APPLICATION_DJVU,
|
||||
APPLICATION_RTF,
|
||||
|
@ -895,9 +895,9 @@ APPLICATIONS = [
|
|||
APPLICATION_PDF,
|
||||
APPLICATION_EPUB,
|
||||
APPLICATION_DJVU,
|
||||
APPLICATION_MICROSOFT_OPEN_XML_DOCX,
|
||||
APPLICATION_MICROSOFT_OPEN_XML_XLSX,
|
||||
APPLICATION_MICROSOFT_OPEN_XML_PPTX,
|
||||
APPLICATION_DOCX,
|
||||
APPLICATION_XLSX,
|
||||
APPLICATION_PPTX,
|
||||
APPLICATION_RTF
|
||||
]
|
||||
|
||||
|
@ -925,7 +925,7 @@ VIEWABLE_IMAGE_PROJECT_FILES = { APPLICATION_PSD, APPLICATION_KRITA }
|
|||
OPEN_DOCUMENT_ZIPS = { APPLICATION_KRITA, APPLICATION_EPUB }
|
||||
|
||||
# zip files that have a `[Content_Types].xml` file inside
|
||||
MICROSOFT_OPEN_XML_DOCUMENT_ZIPS = { APPLICATION_MICROSOFT_OPEN_XML_DOCX, APPLICATION_MICROSOFT_OPEN_XML_XLSX, APPLICATION_MICROSOFT_OPEN_XML_PPTX }
|
||||
MICROSOFT_OPEN_XML_DOCUMENT_ZIPS = { APPLICATION_DOCX, APPLICATION_XLSX, APPLICATION_PPTX }
|
||||
|
||||
general_mimetypes_to_mime_groups = {
|
||||
GENERAL_APPLICATION : APPLICATIONS,
|
||||
|
@ -988,7 +988,7 @@ MIMES_THAT_MAY_THEORETICALLY_HAVE_TRANSPARENCY = MIMES_THAT_WE_CAN_CHECK_FOR_TRA
|
|||
ANIMATION_APNG
|
||||
} )
|
||||
|
||||
APPLICATIONS_WITH_THUMBNAILS = { IMAGE_SVG, APPLICATION_PDF, APPLICATION_FLASH, APPLICATION_CLIP, APPLICATION_PROCREATE }.union( VIEWABLE_IMAGE_PROJECT_FILES ).union( { APPLICATION_CBZ } )
|
||||
APPLICATIONS_WITH_THUMBNAILS = { IMAGE_SVG, APPLICATION_PDF, APPLICATION_FLASH, APPLICATION_CLIP, APPLICATION_PROCREATE, APPLICATION_CBZ, APPLICATION_PPTX }.union( VIEWABLE_IMAGE_PROJECT_FILES )
|
||||
|
||||
MIMES_WITH_THUMBNAILS = set( IMAGES ).union( ANIMATIONS ).union( VIDEO ).union( APPLICATIONS_WITH_THUMBNAILS )
|
||||
|
||||
|
@ -1041,9 +1041,9 @@ mime_enum_lookup = {
|
|||
'application/x-yaml' : APPLICATION_YAML,
|
||||
'PDF document' : APPLICATION_PDF,
|
||||
'application/pdf' : APPLICATION_PDF,
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' : APPLICATION_MICROSOFT_OPEN_XML_DOCX,
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' : APPLICATION_MICROSOFT_OPEN_XML_XLSX,
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation' : APPLICATION_MICROSOFT_OPEN_XML_PPTX,
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' : APPLICATION_DOCX,
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' : APPLICATION_XLSX,
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation' : APPLICATION_PPTX,
|
||||
'application/epub+zip' : APPLICATION_EPUB,
|
||||
'image/vnd.djvu' : APPLICATION_DJVU,
|
||||
'image/vnd.djvu+multipage' : APPLICATION_DJVU,
|
||||
|
@ -1116,9 +1116,9 @@ mime_string_lookup = {
|
|||
APPLICATION_JSON : 'json',
|
||||
APPLICATION_CBOR : 'cbor',
|
||||
APPLICATION_PDF : 'pdf',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_DOCX : 'docx',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_XLSX : 'xlsx',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_PPTX : 'pptx',
|
||||
APPLICATION_DOCX : 'docx',
|
||||
APPLICATION_XLSX : 'xlsx',
|
||||
APPLICATION_PPTX : 'pptx',
|
||||
APPLICATION_EPUB : 'epub',
|
||||
APPLICATION_DJVU : 'djvu',
|
||||
APPLICATION_RTF : 'rtf',
|
||||
|
@ -1201,9 +1201,9 @@ mime_mimetype_string_lookup = {
|
|||
APPLICATION_JSON : 'application/json',
|
||||
APPLICATION_CBOR : 'application/cbor',
|
||||
APPLICATION_PDF : 'application/pdf',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_DOCX : 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_XLSX : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_PPTX : 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
APPLICATION_DOCX : 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
APPLICATION_XLSX : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
APPLICATION_PPTX : 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
APPLICATION_EPUB : 'application/epub+zip',
|
||||
APPLICATION_DJVU : 'image/vnd.djvu',
|
||||
APPLICATION_RTF: 'application/rtf',
|
||||
|
@ -1284,9 +1284,9 @@ mime_ext_lookup = {
|
|||
APPLICATION_YAML : '.yaml',
|
||||
APPLICATION_JSON : '.json',
|
||||
APPLICATION_PDF : '.pdf',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_DOCX : '.docx',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_XLSX : '.xlsx',
|
||||
APPLICATION_MICROSOFT_OPEN_XML_PPTX : '.pptx',
|
||||
APPLICATION_DOCX : '.docx',
|
||||
APPLICATION_XLSX : '.xlsx',
|
||||
APPLICATION_PPTX : '.pptx',
|
||||
APPLICATION_EPUB : '.epub',
|
||||
APPLICATION_DJVU : '.djvu',
|
||||
APPLICATION_RTF : '.rtf',
|
||||
|
|
|
@ -306,31 +306,5 @@ def MimeFromOpenDocument( path ):
|
|||
|
||||
|
||||
|
||||
def MimeFromMicrosoftOpenXMLDocument( path ):
|
||||
|
||||
try:
|
||||
|
||||
content_types = GetZipAsPath( path, '[Content_Types].xml' ).read_text()
|
||||
|
||||
if 'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml' in content_types:
|
||||
|
||||
return HC.APPLICATION_MICROSOFT_OPEN_XML_DOCX
|
||||
|
||||
elif 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml' in content_types:
|
||||
|
||||
return HC.APPLICATION_MICROSOFT_OPEN_XML_XLSX
|
||||
|
||||
elif 'application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml' in content_types:
|
||||
|
||||
return HC.APPLICATION_MICROSOFT_OPEN_XML_PPTX
|
||||
|
||||
else:
|
||||
|
||||
return None
|
||||
|
||||
|
||||
except:
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
#import PyPDF2
|
||||
import re
|
||||
|
||||
def GetNumWordsFromString( s ):
|
||||
|
||||
s = re.sub( r'[\s]+', ' ', s ) # turns multiple spaces into single spaces
|
||||
|
||||
num_words = len( s.split( ' ' ) )
|
||||
|
||||
return num_words
|
||||
|
||||
def GetPDFNumWords( path ):
|
||||
|
||||
# I discovered a pdf that pulled this into an infinite loop due to malformed header.
|
||||
# This gives bunk data anyway, so let's just cut it out until we have a better solution here all around
|
||||
|
||||
return None
|
||||
|
||||
num_words = None
|
||||
|
||||
try:
|
||||
|
||||
pass
|
||||
'''
|
||||
with open( path, 'rb' ) as f:
|
||||
|
||||
pdf_object = PyPDF2.PdfFileReader( f, strict = False )
|
||||
|
||||
# get.extractText() gives kooky and unreliable results
|
||||
# num_words = sum( [ GetNumWordsFromString( page.extractText() ) for page in pdf_object.pages ] )
|
||||
|
||||
# so let's just estimate
|
||||
|
||||
return pdf_object.numPages * 350
|
||||
|
||||
'''
|
||||
except:
|
||||
|
||||
num_words = 0
|
||||
|
||||
|
||||
return num_words
|
||||
|
|
@ -14,7 +14,6 @@ from hydrus.core import HydrusTime
|
|||
from hydrus.core.files import HydrusAnimationHandling
|
||||
from hydrus.core.files import HydrusArchiveHandling
|
||||
from hydrus.core.files import HydrusClipHandling
|
||||
from hydrus.core.files import HydrusDocumentHandling
|
||||
from hydrus.core.files import HydrusFlashHandling
|
||||
from hydrus.core.files import HydrusKritaHandling
|
||||
from hydrus.core.files import HydrusPDFHandling
|
||||
|
@ -23,6 +22,7 @@ from hydrus.core.files import HydrusPSDHandling
|
|||
from hydrus.core.files import HydrusSVGHandling
|
||||
from hydrus.core.files import HydrusUgoiraHandling
|
||||
from hydrus.core.files import HydrusVideoHandling
|
||||
from hydrus.core.files import HydrusOfficeOpenXMLHandling
|
||||
from hydrus.core.files.images import HydrusImageHandling
|
||||
from hydrus.core.networking import HydrusNetwork
|
||||
|
||||
|
@ -75,9 +75,9 @@ for mime in HC.IMAGES:
|
|||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_UNKNOWN ] = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
|
||||
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PDF ] = os.path.join( HC.STATIC_DIR, 'pdf.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_MICROSOFT_OPEN_XML_DOCX ] = os.path.join( HC.STATIC_DIR, 'docx.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_MICROSOFT_OPEN_XML_XLSX ] = os.path.join( HC.STATIC_DIR, 'xlsx.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_MICROSOFT_OPEN_XML_PPTX ] = os.path.join( HC.STATIC_DIR, 'pptx.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_DOCX ] = os.path.join( HC.STATIC_DIR, 'docx.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_XLSX ] = os.path.join( HC.STATIC_DIR, 'xlsx.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PPTX ] = os.path.join( HC.STATIC_DIR, 'pptx.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_EPUB ] = os.path.join( HC.STATIC_DIR, 'epub.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_DJVU ] = os.path.join( HC.STATIC_DIR, 'djvu.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_PSD ] = os.path.join( HC.STATIC_DIR, 'psd.png' )
|
||||
|
@ -90,6 +90,13 @@ mimes_to_default_thumbnail_paths[ HC.APPLICATION_PROCREATE ] = os.path.join( HC.
|
|||
mimes_to_default_thumbnail_paths[ HC.APPLICATION_RTF ] = os.path.join( HC.STATIC_DIR, 'rtf.png' )
|
||||
mimes_to_default_thumbnail_paths[ HC.IMAGE_SVG ] = os.path.join( HC.STATIC_DIR, 'svg.png' )
|
||||
|
||||
def GenerateDefaultThumbnail( mime: int, target_resolution: typing.Tuple[ int, int ] ):
|
||||
|
||||
thumb_path = mimes_to_default_thumbnail_paths[mime]
|
||||
|
||||
return HydrusImageHandling.GenerateDefaultThumbnailNumPyFromPath( thumb_path, target_resolution )
|
||||
|
||||
|
||||
def GenerateThumbnailBytes( path, target_resolution, mime, duration, num_frames, percentage_in = 35 ):
|
||||
|
||||
thumbnail_numpy = GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, percentage_in = percentage_in )
|
||||
|
@ -130,9 +137,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'zip.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
finally:
|
||||
|
||||
|
@ -153,9 +158,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'clip.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
finally:
|
||||
|
||||
|
@ -171,10 +174,8 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
except Exception as e:
|
||||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'krita.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
|
||||
elif mime == HC.APPLICATION_PROCREATE:
|
||||
|
@ -191,9 +192,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'procreate.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
finally:
|
||||
|
||||
|
@ -224,9 +223,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Secondary problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'psd.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
finally:
|
||||
|
||||
|
@ -244,9 +241,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'svg.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
|
||||
elif mime == HC.APPLICATION_PDF:
|
||||
|
@ -259,10 +254,20 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'pdf.png' )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
|
||||
elif mime == HC.APPLICATION_PPTX:
|
||||
|
||||
try:
|
||||
|
||||
thumbnail_numpy = HydrusOfficeOpenXMLHandling.GenerateThumbnailNumPyFromOfficePath( path, target_resolution )
|
||||
|
||||
except Exception as e:
|
||||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
elif mime == HC.APPLICATION_FLASH:
|
||||
|
||||
|
@ -278,9 +283,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'flash.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
finally:
|
||||
|
||||
|
@ -299,9 +302,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
|
||||
elif mime == HC.ANIMATION_UGOIRA:
|
||||
|
@ -322,9 +323,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description )
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'zip.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
finally:
|
||||
|
||||
|
@ -379,9 +378,7 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames,
|
|||
|
||||
if numpy_image is None:
|
||||
|
||||
thumb_path = os.path.join( HC.STATIC_DIR, 'hydrus.png' )
|
||||
|
||||
thumbnail_numpy = HydrusImageHandling.GenerateThumbnailNumPyFromStaticImagePath( thumb_path, target_resolution, HC.IMAGE_PNG )
|
||||
thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution)
|
||||
|
||||
else:
|
||||
|
||||
|
@ -539,14 +536,33 @@ def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ):
|
|||
pass
|
||||
|
||||
|
||||
elif mime == HC.APPLICATION_PPTX:
|
||||
|
||||
try:
|
||||
|
||||
( num_words, ( width, height ) ) = HydrusOfficeOpenXMLHandling.GetPPTXInfo( path )
|
||||
|
||||
except HydrusExceptions.LimitedSupportFileException:
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
||||
elif mime == HC.APPLICATION_DOCX:
|
||||
|
||||
try:
|
||||
|
||||
( num_words ) = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path )
|
||||
|
||||
except HydrusExceptions.LimitedSupportFileException:
|
||||
|
||||
pass
|
||||
|
||||
|
||||
elif mime == HC.APPLICATION_FLASH:
|
||||
|
||||
( ( width, height ), duration, num_frames ) = HydrusFlashHandling.GetFlashProperties( path )
|
||||
|
||||
elif mime == HC.APPLICATION_PDF:
|
||||
|
||||
num_words = HydrusDocumentHandling.GetPDFNumWords( path ) # this now give None until a better solution can be found
|
||||
|
||||
elif mime == HC.APPLICATION_PSD:
|
||||
|
||||
try:
|
||||
|
@ -775,7 +791,7 @@ def GetMime( path, ok_to_look_for_hydrus_updates = False ):
|
|||
return opendoc_mime
|
||||
|
||||
|
||||
microsoft_mime = HydrusArchiveHandling.MimeFromMicrosoftOpenXMLDocument( path )
|
||||
microsoft_mime = HydrusOfficeOpenXMLHandling.MimeFromMicrosoftOpenXMLDocument( path )
|
||||
|
||||
if microsoft_mime is not None:
|
||||
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
import typing
|
||||
|
||||
from hydrus.core import HydrusConstants as HC
|
||||
from hydrus.core.files.HydrusArchiveHandling import GetZipAsPath
|
||||
from hydrus.core.files.images import HydrusImageHandling
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from PIL import Image as PILImage
|
||||
|
||||
|
||||
DOCX_XPATH = ".//{*}Override[@PartName='/word/document.xml'][@ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml']"
|
||||
XLSX_XPATH = ".//{*}Override[@PartName='/xl/workbook.xml'][@ContentType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml']"
|
||||
PPTX_XPATH = ".//{*}Override[@PartName='/ppt/presentation.xml'][@ContentType='application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml']"
|
||||
|
||||
def MimeFromMicrosoftOpenXMLDocument(path: str):
|
||||
|
||||
try:
|
||||
|
||||
file = GetZipAsPath( path, '[Content_Types].xml' ).open( 'rb' )
|
||||
|
||||
root = ET.parse( file )
|
||||
|
||||
if root.find(DOCX_XPATH) is not None:
|
||||
|
||||
return HC.APPLICATION_DOCX
|
||||
|
||||
elif root.find(XLSX_XPATH) is not None:
|
||||
|
||||
return HC.APPLICATION_XLSX
|
||||
|
||||
elif root.find(PPTX_XPATH) is not None:
|
||||
|
||||
return HC.APPLICATION_PPTX
|
||||
|
||||
else:
|
||||
|
||||
return None
|
||||
|
||||
except:
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def GenerateThumbnailNumPyFromOfficePath( path: str, target_resolution: typing.Tuple[ int, int ] ) -> bytes:
|
||||
|
||||
zip_path_file_obj = GetZipAsPath( path, 'docProps/thumbnail.jpeg' ).open( 'rb' )
|
||||
|
||||
pil_image = HydrusImageHandling.GeneratePILImage( zip_path_file_obj )
|
||||
|
||||
thumbnail_pil_image = pil_image.resize( target_resolution, PILImage.LANCZOS )
|
||||
|
||||
numpy_image = HydrusImageHandling.GenerateNumPyImageFromPILImage( thumbnail_pil_image )
|
||||
|
||||
return numpy_image
|
||||
|
||||
|
||||
PPTX_ASSUMED_DPI = 300
|
||||
|
||||
# https://startbigthinksmall.wordpress.com/2010/01/04/points-inches-and-emus-measuring-units-in-office-open-xml/
|
||||
# PowerPoint uses English Metric Unit (EMU) for vector coordinates
|
||||
# 1 inch = 914400 EMU
|
||||
|
||||
PPTX_PIXEL_PER_EMU = PPTX_ASSUMED_DPI / 914400
|
||||
|
||||
def PowerPointResolution( path: str ):
|
||||
|
||||
file = GetZipAsPath( path, 'ppt/presentation.xml' ).open( 'rb' )
|
||||
|
||||
root = ET.parse( file )
|
||||
|
||||
sldSz = root.find('./p:sldSz', {'p': 'http://schemas.openxmlformats.org/presentationml/2006/main'})
|
||||
|
||||
x_emu = int(sldSz.get('cx'))
|
||||
|
||||
y_emu = int(sldSz.get('cy'))
|
||||
|
||||
width = round(x_emu * PPTX_PIXEL_PER_EMU)
|
||||
|
||||
height = round(y_emu * PPTX_PIXEL_PER_EMU)
|
||||
|
||||
return ( width, height)
|
||||
|
||||
|
||||
def OfficeDocumentWordCount( path: str ):
|
||||
|
||||
file = GetZipAsPath( path, 'docProps/app.xml' ).open( 'rb' )
|
||||
|
||||
root = ET.parse( file )
|
||||
|
||||
words = root.findtext('./ep:Words', namespaces = {'ep' : 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties'})
|
||||
|
||||
num_words = int(words)
|
||||
|
||||
return num_words
|
||||
|
||||
|
||||
def GetPPTXInfo( path: str ):
|
||||
|
||||
try:
|
||||
|
||||
( width, height ) = PowerPointResolution( path )
|
||||
|
||||
except:
|
||||
|
||||
( width, height ) = ( None, None )
|
||||
|
||||
try:
|
||||
|
||||
num_words = OfficeDocumentWordCount( path )
|
||||
|
||||
except:
|
||||
|
||||
num_words = None
|
||||
|
||||
return ( num_words, ( width, height ) )
|
||||
|
||||
|
||||
def GetDOCXInfo( path:str ):
|
||||
|
||||
try:
|
||||
|
||||
num_words = OfficeDocumentWordCount( path )
|
||||
|
||||
except:
|
||||
|
||||
num_words = None
|
||||
|
||||
return ( num_words )
|
||||
|
|
@ -9,6 +9,7 @@ import warnings
|
|||
|
||||
from PIL import ImageFile as PILImageFile
|
||||
from PIL import Image as PILImage
|
||||
from PIL import ImageOps as PILImageOps
|
||||
|
||||
try:
|
||||
|
||||
|
@ -663,3 +664,12 @@ def ResizeNumPyImage( numpy_image: numpy.array, target_resolution, forced_interp
|
|||
|
||||
return cv2.resize( numpy_image, ( target_width, target_height ), interpolation = interpolation )
|
||||
|
||||
|
||||
def GenerateDefaultThumbnailNumPyFromPath( path: str, target_resolution: typing.Tuple[ int, int ] ):
|
||||
|
||||
thumb_image = GeneratePILImage( path )
|
||||
|
||||
pil_image = PILImageOps.pad( thumb_image, target_resolution, PILImage.Resampling.LANCZOS )
|
||||
|
||||
return GenerateNumPyImageFromPILImage( pil_image, strip_useless_alpha = False )
|
||||
|
||||
|
|
BIN
static/docx.png
BIN
static/docx.png
Binary file not shown.
Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 1.6 KiB |
BIN
static/pptx.png
BIN
static/pptx.png
Binary file not shown.
Before Width: | Height: | Size: 8.3 KiB After Width: | Height: | Size: 1.5 KiB |
BIN
static/xlsx.png
BIN
static/xlsx.png
Binary file not shown.
Before Width: | Height: | Size: 8.5 KiB After Width: | Height: | Size: 1.3 KiB |
Loading…
Reference in New Issue