Broaden exception handling in FontInfo extraction

ashariyar · ashariyar · commit 6c791ff11965 · 2025-11-04T22:10:06.000-05:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,5 @@
 # NEXT RELEASE
+* Broaden exception handling in `FontInfo` extraction
 
 ### 1.17.8
 * Handle `AttributeError` in `FontInfo` extraction
diff --git a/pdfalyzer/decorators/pdf_file.py b/pdfalyzer/decorators/pdf_file.py
@@ -173,8 +173,8 @@ def extract_text(
         except EmptyFileError:
             log.warning("Skipping empty file!")
         except PdfStreamError as e:
-            print_error(f"Error parsing PDF file '{self.file_path}': {e}")
             stderr_console.print_exception()
+            print_error(f"Error parsing PDF file '{self.file_path}': {e}")
 
         return "\n\n".join(extracted_pages).strip()
 
diff --git a/pdfalyzer/helpers/rich_text_helper.py b/pdfalyzer/helpers/rich_text_helper.py
@@ -106,4 +106,5 @@ def attention_getting_panel(text: Text, title: str, style: str = 'white on red')
 
 
 def print_error(text: Union[str, Text]) -> Text:
+    console.line()
     console.print(error_text(text))
diff --git a/pdfalyzer/output/pdfalyzer_presenter.py b/pdfalyzer/output/pdfalyzer_presenter.py
@@ -20,19 +20,27 @@
 from pdfalyzer.config import PdfalyzerConfig
 from pdfalyzer.decorators.pdf_tree_node import DECODE_FAILURE_LEN
 from pdfalyzer.detection.yaralyzer_helper import get_bytes_yaralyzer, get_file_yaralyzer
+from pdfalyzer.helpers.rich_text_helper import print_error
 from pdfalyzer.helpers.string_helper import pp
 from pdfalyzer.output.layout import (print_fatal_error_panel, print_section_header, print_section_subheader,
      print_section_sub_subheader)
 from pdfalyzer.output.tables.decoding_stats_table import build_decoding_stats_table
 from pdfalyzer.output.tables.pdf_node_rich_table import generate_rich_tree, get_symlink_representation
 from pdfalyzer.output.tables.stream_objects_table import stream_objects_table
 from pdfalyzer.pdfalyzer import Pdfalyzer
-# from pdfalyzer.util.adobe_strings import *
 
 INTERNAL_YARA_ERROR_MSG = "Internal YARA error! YARA's error codes can be checked here: https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/error.h"  # noqa: E501
 
 
 class PdfalyzerPresenter:
+    """
+    Handles formatting of console text output for Pdfalyzer class.
+
+    Attributes:
+        pdfalyzer (Pdfalyzer): Pdfalyzer for a given PDF file
+        yaralyzer (Yaralyzer): Yaralyzer for a given PDF file
+    """
+
     def __init__(self, pdfalyzer: Pdfalyzer):
         self.pdfalyzer = pdfalyzer
         self.yaralyzer = get_file_yaralyzer(self.pdfalyzer.pdf_path)
@@ -83,6 +91,9 @@ def print_font_info(self, font_idnum=None) -> None:
         """Print informatin about all fonts that appear in this PDF."""
         print_section_header(f'{len(self.pdfalyzer.font_infos)} fonts found in {self.pdfalyzer.pdf_basename}')
 
+        if self.pdfalyzer.font_info_extraction_error:
+            print_error(f"Failed to extract font information (error: {self.pdfalyzer.font_info_extraction_error})")
+
         for font_info in [fi for fi in self.pdfalyzer.font_infos if font_idnum is None or font_idnum == fi.idnum]:
             font_info.print_summary()
 
diff --git a/pdfalyzer/pdfalyzer.py b/pdfalyzer/pdfalyzer.py
@@ -19,6 +19,7 @@
 from pdfalyzer.decorators.pdf_tree_node import PdfTreeNode
 from pdfalyzer.decorators.pdf_tree_verifier import PdfTreeVerifier
 from pdfalyzer.font_info import FontInfo
+from pdfalyzer.helpers.rich_text_helper import print_error
 from pdfalyzer.pdf_object_relationship import PdfObjectRelationship
 from pdfalyzer.util.adobe_strings import *
 from pdfalyzer.util.exceptions import PdfWalkError
@@ -37,6 +38,7 @@ class Pdfalyzer:
 
     Attributes:
         font_infos (List[FontInfo]): Font summary objects
+        font_info_extraction_error (Optional[Exception]): Error encountered extracting FontInfo (if any)
         max_generation (int): Max revision number ("generation") encounted in this PDF.
         nodes_encountered (Dict[int, PdfTreeNode]): Nodes we've traversed already.
         pdf_basename (str): The base name of the PDF file (with extension).
@@ -70,6 +72,7 @@ def __init__(self, pdf_path: str):
 
         # Initialize tracking variables
         self.font_infos: List[FontInfo] = []  # Font summary objects
+        self.font_info_extraction_error: Optional[Exception] = None
         self.max_generation = 0  # PDF revisions are "generations"; this is the max generation encountered
         self.nodes_encountered: Dict[int, PdfTreeNode] = {}  # Nodes we've seen already
         self._indeterminate_ids = set()  # See INDETERMINATE_REF_KEYS comment
@@ -231,9 +234,11 @@ def _extract_font_infos(self) -> None:
                     fi for fi in FontInfo.extract_font_infos(node.obj)
                     if fi.idnum not in known_font_ids
                 ]
-            except AttributeError as e:
-                console.print_exception()
-                log.error(f"Failed to extract font information from node: {node}")
+            except Exception as e:
+                self.font_info_extraction_error = e
+                console.line()
+                log.warning(f"Failed to extract font information from node: {node} (error: {e})")
+                console.line()
 
     def _build_or_find_node(self, relationship: IndirectObject, relationship_key: str) -> PdfTreeNode:
         """If node in self.nodes_encountered already then return it, otherwise build a node and store it."""

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# NEXT RELEASE`
	`2`	+* Broaden exception handling in `FontInfo` extraction
`2`	`3`
`3`	`4`	`### 1.17.8`
`4`	`5`	* Handle `AttributeError` in `FontInfo` extraction
Original file line number	Diff line number	Diff line change
`@@ -106,4 +106,5 @@ def attention_getting_panel(text: Text, title: str, style: str = 'white on red')`
`106`	`106`
`107`	`107`
`108`	`108`	`def print_error(text: Union[str, Text]) -> Text:`
	`109`	`+ console.line()`
`109`	`110`	`console.print(error_text(text))`