From a6fe676c56b3a27d09e0cd31f4b448b6bf0fb9c8 Mon Sep 17 00:00:00 2001
From: "Jorj X. McKie" <jorj.x.mckie@outlook.de>
Date: Fri, 11 Jul 2025 07:30:41 -0400
Subject: [PATCH] Rework / remove method reload_page

We previously refreshed a Page in memory by simulating navigating away to another page and returning.
This has been necessary after certain updates or inserts of annotations and links.
This fix basically replaces it with the feature provided by mupdf function pdf_sync_page(), which is now what Page method refresh() does.

This is a tentative update yet: As MuPDF propagates calling pdf_sync_page(9 over time, we should reduce and eventually retire Page.refresh() and Document.reload_page().
---
 src/__init__.py                 | 53 ++-------------------------------
 src/utils.py                    |  4 +++
 tests/test_annots.py            |  1 -
 tests/test_general.py           |  2 +-
 tests/test_mupdf_regressions.py |  2 +-
 tests/test_named_links.py       |  2 --
 tests/test_textbox.py           |  1 -
 tests/test_widgets.py           |  8 -----
 8 files changed, 9 insertions(+), 64 deletions(-)

diff --git a/src/__init__.py b/src/__init__.py
index 016473169..20a0a5987 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -5232,35 +5232,8 @@ def prev_location(self, page_id):
 
     def reload_page(self, page: Page) -> Page:
         """Make a fresh copy of a page."""
-        old_annots = {}  # copy annot references to here
         pno = page.number  # save the page number
-        for k, v in page._annot_refs.items():  # save the annot dictionary
-            old_annots[k] = v
-        
-        # When we call `self.load_page()` below, it will end up in
-        # fz_load_chapter_page(), which will return any matching page in the
-        # document's list of non-ref-counted loaded pages, instead of actually
-        # reloading the page.
-        #
-        # We want to assert that we have actually reloaded the fz_page, and not
-        # simply returned the same `fz_page*` pointer from the document's list
-        # of non-ref-counted loaded pages.
-        #
-        # So we first remove our reference to the `fz_page*`. This will
-        # decrement .refs, and if .refs was 1, this is guaranteed to free the
-        # `fz_page*` and remove it from the document's list if it was there. So
-        # we are guaranteed that our returned `fz_page*` is from a genuine
-        # reload, even if it happens to reuse the original block of memory.
-        #
-        # However if the original .refs is greater than one, there must be
-        # other references to the `fz_page` somewhere, and we require that
-        # these other references are not keeping the page in the document's
-        # list.  We check that we are returning a newly loaded page by
-        # asserting that our returned `fz_page*` is different from the original
-        # `fz_page*` - the original was not freed, so a new `fz_page` cannot
-        # reuse the same block of memory.
-        #
-        
+        page.refresh()  # synchronize links & annotations
         refs_old = page.this.m_internal.refs
         m_internal_old = page.this.m_internal_value()
         
@@ -5269,24 +5242,6 @@ def reload_page(self, page: Page) -> Page:
         page = None
         TOOLS.store_shrink(100)
         page = self.load_page(pno)  # reload the page
-
-        # copy annot refs over to the new dictionary
-        #page_proxy = weakref.proxy(page)
-        for k, v in old_annots.items():
-            annot = old_annots[k]
-            #annot.parent = page_proxy  # refresh parent to new page
-            page._annot_refs[k] = annot
-        if refs_old == 1:
-            # We know that `page.this = None` will have decremented the ref
-            # count to zero so we are guaranteed that the new `fz_page` is a
-            # new page even if it happens to have reused the same block of
-            # memory.
-            pass
-        else:
-            # Check that the new `fz_page*` is different from the original.
-            m_internal_new = page.this.m_internal_value()
-            assert m_internal_new != m_internal_old, \
-                    f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
         return page
 
     def resolve_link(self, uri=None, chapters=0):
@@ -9742,10 +9697,8 @@ def read_contents(self):
     def refresh(self):
         """Refresh page after link/annot/widget updates."""
         CheckParent(self)
-        doc = self.parent
-        page = doc.reload_page(self)
-        # fixme this looks wrong.
-        self.this = page
+        pdf_page = _as_pdf_page(self)
+        mupdf.pdf_sync_page(pdf_page)
 
     @property
     def rotation(self):
diff --git a/src/utils.py b/src/utils.py
index cefa8a4dd..48ac7bd1b 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -2244,6 +2244,7 @@ def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None:
     if annot == "":
         raise ValueError("link kind not supported")
     page._addAnnot_FromString((annot,))
+    page.refresh()
 
 
 def insert_textbox(
@@ -2494,6 +2495,8 @@ def rect_function(*args):
         link["from"] *= mat
         page.insert_link(link)
 
+    # ensure any new links are available
+    page.refresh()
     return spare_height, scale
 
 
@@ -4271,6 +4274,7 @@ def center_rect(annot_rect, new_text, font, fsize):
                 )
                 fsize -= 0.5  # reduce font if unsuccessful
     shape.commit()  # append new contents object
+    page.refresh()  # synchronize links and annotations
     return True
 
 
diff --git a/tests/test_annots.py b/tests/test_annots.py
index 2c67f906d..f986c1ef1 100644
--- a/tests/test_annots.py
+++ b/tests/test_annots.py
@@ -151,7 +151,6 @@ def test_stamp():
     annot_xref = annot.xref
     page.load_annot(annot_id)
     page.load_annot(annot_xref)
-    page = doc.reload_page(page)
 
 
 def test_image_stamp():
diff --git a/tests/test_general.py b/tests/test_general.py
index 1288c2279..ca4ab4c4e 100644
--- a/tests/test_general.py
+++ b/tests/test_general.py
@@ -811,7 +811,7 @@ def test_2957_1():
     page.apply_redactions()
 
     # reload page to finalize updates
-    page = doc.reload_page(page)
+    #page = doc.reload_page(page)
 
     # the two string must retain their positions (except rounding errors)
     rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba")
diff --git a/tests/test_mupdf_regressions.py b/tests/test_mupdf_regressions.py
index 8816260f7..d8b6c259d 100644
--- a/tests/test_mupdf_regressions.py
+++ b/tests/test_mupdf_regressions.py
@@ -44,7 +44,7 @@ def test_707727():
     page = doc[0]
     pix0 = page.get_pixmap()
     page.clean_contents(sanitize=True)
-    page = doc.reload_page(page)  # required to prevent re-use
+    # page = doc.reload_page(page)  # required to prevent re-use
     pix1 = page.get_pixmap()
     rms = gentle_compare.pixmaps_rms(pix0, pix1)
     print(f'{rms=}', flush=1)
diff --git a/tests/test_named_links.py b/tests/test_named_links.py
index 0ff070da4..8ffe82566 100644
--- a/tests/test_named_links.py
+++ b/tests/test_named_links.py
@@ -20,8 +20,6 @@ def test_2886():
     # insert this link in an arbitrary page & rect
     page = doc[-1]
     page.insert_link(link)
-    # need this to update the internal MuPDF annotations array
-    page = doc.reload_page(page)
 
     # our new link must be the last in the following list
     links = page.get_links()
diff --git a/tests/test_textbox.py b/tests/test_textbox.py
index d8003624d..0db54764d 100644
--- a/tests/test_textbox.py
+++ b/tests/test_textbox.py
@@ -184,7 +184,6 @@ def test_htmlbox1():
         spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
         assert spare_height == 0
         assert 0 < scale < 1
-        page = doc.reload_page(page)
         link = page.get_links()[0]  # extracts the links on the page
 
         assert link["uri"] == "https://www.artifex.com"
diff --git a/tests/test_widgets.py b/tests/test_widgets.py
index 9eafd0246..e9619e09f 100644
--- a/tests/test_widgets.py
+++ b/tests/test_widgets.py
@@ -398,8 +398,6 @@ def test_4055():
         w.field_value = w.on_state()
         w.update()
 
-    page = doc.reload_page(page)  # reload page to make sure we start fresh
-
     # Round 2: confirm that fields contain the PDF's own on values
     for w in page.widgets(types=[2]):
         # confirm each value coincides with the "Yes" value
@@ -407,16 +405,12 @@ def test_4055():
         w.field_value = False  # switch to "Off" using False
         w.update()
 
-    page = doc.reload_page(page)
-
     # Round 3: confirm that 'False' achieved "Off" values
     for w in page.widgets(types=[2]):
         assert w.field_value == "Off"
         w.field_value = True  # use True for the next round
         w.update()
 
-    page = doc.reload_page(page)
-
     # Round 4: confirm that setting to True also worked
     for w in page.widgets(types=[2]):
         assert w.field_value == w.on_state()
@@ -425,8 +419,6 @@ def test_4055():
         w.field_value = "Yes"
         w.update()
 
-    page = doc.reload_page(page)
-
     # Round 5: final check: setting to "Yes" also does work
     for w in page.widgets(types=[2]):
         assert w.field_value == w.on_state()