Fix: handle client-side meta refresh redirects (#40)

aliyanishfaq · web-flow · commit a429dc788b5f · 2025-07-22T16:22:39.000-04:00
Fixes the "Redirecting..." response issue by adding support for HTML
meta refresh redirects in `mcpdoc/main.py`.

- Parses `&lt;meta http-equiv="refresh"&gt;` tags to follow client-side
redirects
- Consistent with existing `--follow-redirects` flag behavior
- Resolves cases where documentation sites use meta refresh instead of
HTTP redirects

Modified: `mcpdoc/main.py`
diff --git a/mcpdoc/main.py b/mcpdoc/main.py
@@ -1,7 +1,8 @@
 """MCP Llms-txt server for docs."""
 
 import os
-from urllib.parse import urlparse
+import re
+from urllib.parse import urlparse, urljoin
 
 import httpx
 from markdownify import markdownify
@@ -228,7 +229,8 @@ def list_doc_sources() -> str:
 
     @server.tool(description=fetch_docs_description)
     async def fetch_docs(url: str) -> str:
-        nonlocal domains
+        nonlocal domains, follow_redirects
+        url = url.strip()
         # Handle local file paths (either as file:// URLs or direct filesystem paths)
         if not _is_http_or_https(url):
             abs_path = _normalize_path(url)
@@ -255,7 +257,33 @@ async def fetch_docs(url: str) -> str:
             try:
                 response = await httpx_client.get(url, timeout=timeout)
                 response.raise_for_status()
-                return markdownify(response.text)
+                content = response.text
+
+                if follow_redirects:
+                    # Check for meta refresh tag which indicates a client-side redirect
+                    match = re.search(
+                        r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
+                        content,
+                        re.IGNORECASE,
+                    )
+
+                    if match:
+                        redirect_url = match.group(1)
+                        new_url = urljoin(str(response.url), redirect_url)
+
+                        if "*" not in domains and not any(
+                            new_url.startswith(domain) for domain in domains
+                        ):
+                            return (
+                                "Error: Redirect URL not allowed. Must start with one of the following domains: "
+                                + ", ".join(domains)
+                            )
+
+                        response = await httpx_client.get(new_url, timeout=timeout)
+                        response.raise_for_status()
+                        content = response.text
+
+                return markdownify(content)
             except (httpx.HTTPStatusError, httpx.RequestError) as e:
                 return f"Encountered an HTTP error: {str(e)}"