From a6fe676c56b3a27d09e0cd31f4b448b6bf0fb9c8 Mon Sep 17 00:00:00 2001 From: "Jorj X. McKie" Date: Fri, 11 Jul 2025 07:30:41 -0400 Subject: [PATCH] Rework / remove method reload_page We previously refreshed a Page in memory by simulating navigating away to another page and returning. This has been necessary after certain updates or inserts of annotations and links. This fix basically replaces it with the feature provided by mupdf function pdf_sync_page(), which is now what Page method refresh() does. This is a tentative update yet: As MuPDF propagates calling pdf_sync_page(9 over time, we should reduce and eventually retire Page.refresh() and Document.reload_page(). --- src/__init__.py | 53 ++------------------------------- src/utils.py | 4 +++ tests/test_annots.py | 1 - tests/test_general.py | 2 +- tests/test_mupdf_regressions.py | 2 +- tests/test_named_links.py | 2 -- tests/test_textbox.py | 1 - tests/test_widgets.py | 8 ----- 8 files changed, 9 insertions(+), 64 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index 016473169..20a0a5987 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -5232,35 +5232,8 @@ def prev_location(self, page_id): def reload_page(self, page: Page) -> Page: """Make a fresh copy of a page.""" - old_annots = {} # copy annot references to here pno = page.number # save the page number - for k, v in page._annot_refs.items(): # save the annot dictionary - old_annots[k] = v - - # When we call `self.load_page()` below, it will end up in - # fz_load_chapter_page(), which will return any matching page in the - # document's list of non-ref-counted loaded pages, instead of actually - # reloading the page. - # - # We want to assert that we have actually reloaded the fz_page, and not - # simply returned the same `fz_page*` pointer from the document's list - # of non-ref-counted loaded pages. - # - # So we first remove our reference to the `fz_page*`. This will - # decrement .refs, and if .refs was 1, this is guaranteed to free the - # `fz_page*` and remove it from the document's list if it was there. So - # we are guaranteed that our returned `fz_page*` is from a genuine - # reload, even if it happens to reuse the original block of memory. - # - # However if the original .refs is greater than one, there must be - # other references to the `fz_page` somewhere, and we require that - # these other references are not keeping the page in the document's - # list. We check that we are returning a newly loaded page by - # asserting that our returned `fz_page*` is different from the original - # `fz_page*` - the original was not freed, so a new `fz_page` cannot - # reuse the same block of memory. - # - + page.refresh() # synchronize links & annotations refs_old = page.this.m_internal.refs m_internal_old = page.this.m_internal_value() @@ -5269,24 +5242,6 @@ def reload_page(self, page: Page) -> Page: page = None TOOLS.store_shrink(100) page = self.load_page(pno) # reload the page - - # copy annot refs over to the new dictionary - #page_proxy = weakref.proxy(page) - for k, v in old_annots.items(): - annot = old_annots[k] - #annot.parent = page_proxy # refresh parent to new page - page._annot_refs[k] = annot - if refs_old == 1: - # We know that `page.this = None` will have decremented the ref - # count to zero so we are guaranteed that the new `fz_page` is a - # new page even if it happens to have reused the same block of - # memory. - pass - else: - # Check that the new `fz_page*` is different from the original. - m_internal_new = page.this.m_internal_value() - assert m_internal_new != m_internal_old, \ - f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}' return page def resolve_link(self, uri=None, chapters=0): @@ -9742,10 +9697,8 @@ def read_contents(self): def refresh(self): """Refresh page after link/annot/widget updates.""" CheckParent(self) - doc = self.parent - page = doc.reload_page(self) - # fixme this looks wrong. - self.this = page + pdf_page = _as_pdf_page(self) + mupdf.pdf_sync_page(pdf_page) @property def rotation(self): diff --git a/src/utils.py b/src/utils.py index cefa8a4dd..48ac7bd1b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -2244,6 +2244,7 @@ def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None: if annot == "": raise ValueError("link kind not supported") page._addAnnot_FromString((annot,)) + page.refresh() def insert_textbox( @@ -2494,6 +2495,8 @@ def rect_function(*args): link["from"] *= mat page.insert_link(link) + # ensure any new links are available + page.refresh() return spare_height, scale @@ -4271,6 +4274,7 @@ def center_rect(annot_rect, new_text, font, fsize): ) fsize -= 0.5 # reduce font if unsuccessful shape.commit() # append new contents object + page.refresh() # synchronize links and annotations return True diff --git a/tests/test_annots.py b/tests/test_annots.py index 2c67f906d..f986c1ef1 100644 --- a/tests/test_annots.py +++ b/tests/test_annots.py @@ -151,7 +151,6 @@ def test_stamp(): annot_xref = annot.xref page.load_annot(annot_id) page.load_annot(annot_xref) - page = doc.reload_page(page) def test_image_stamp(): diff --git a/tests/test_general.py b/tests/test_general.py index 1288c2279..ca4ab4c4e 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -811,7 +811,7 @@ def test_2957_1(): page.apply_redactions() # reload page to finalize updates - page = doc.reload_page(page) + #page = doc.reload_page(page) # the two string must retain their positions (except rounding errors) rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba") diff --git a/tests/test_mupdf_regressions.py b/tests/test_mupdf_regressions.py index 8816260f7..d8b6c259d 100644 --- a/tests/test_mupdf_regressions.py +++ b/tests/test_mupdf_regressions.py @@ -44,7 +44,7 @@ def test_707727(): page = doc[0] pix0 = page.get_pixmap() page.clean_contents(sanitize=True) - page = doc.reload_page(page) # required to prevent re-use + # page = doc.reload_page(page) # required to prevent re-use pix1 = page.get_pixmap() rms = gentle_compare.pixmaps_rms(pix0, pix1) print(f'{rms=}', flush=1) diff --git a/tests/test_named_links.py b/tests/test_named_links.py index 0ff070da4..8ffe82566 100644 --- a/tests/test_named_links.py +++ b/tests/test_named_links.py @@ -20,8 +20,6 @@ def test_2886(): # insert this link in an arbitrary page & rect page = doc[-1] page.insert_link(link) - # need this to update the internal MuPDF annotations array - page = doc.reload_page(page) # our new link must be the last in the following list links = page.get_links() diff --git a/tests/test_textbox.py b/tests/test_textbox.py index d8003624d..0db54764d 100644 --- a/tests/test_textbox.py +++ b/tests/test_textbox.py @@ -184,7 +184,6 @@ def test_htmlbox1(): spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0) assert spare_height == 0 assert 0 < scale < 1 - page = doc.reload_page(page) link = page.get_links()[0] # extracts the links on the page assert link["uri"] == "https://www.artifex.com" diff --git a/tests/test_widgets.py b/tests/test_widgets.py index 9eafd0246..e9619e09f 100644 --- a/tests/test_widgets.py +++ b/tests/test_widgets.py @@ -398,8 +398,6 @@ def test_4055(): w.field_value = w.on_state() w.update() - page = doc.reload_page(page) # reload page to make sure we start fresh - # Round 2: confirm that fields contain the PDF's own on values for w in page.widgets(types=[2]): # confirm each value coincides with the "Yes" value @@ -407,16 +405,12 @@ def test_4055(): w.field_value = False # switch to "Off" using False w.update() - page = doc.reload_page(page) - # Round 3: confirm that 'False' achieved "Off" values for w in page.widgets(types=[2]): assert w.field_value == "Off" w.field_value = True # use True for the next round w.update() - page = doc.reload_page(page) - # Round 4: confirm that setting to True also worked for w in page.widgets(types=[2]): assert w.field_value == w.on_state() @@ -425,8 +419,6 @@ def test_4055(): w.field_value = "Yes" w.update() - page = doc.reload_page(page) - # Round 5: final check: setting to "Yes" also does work for w in page.widgets(types=[2]): assert w.field_value == w.on_state()