Skip to content

Commit a429dc7

Browse files
authored
Fix: handle client-side meta refresh redirects (#40)
Fixes the "Redirecting..." response issue by adding support for HTML meta refresh redirects in `mcpdoc/main.py`. - Parses `<meta http-equiv="refresh">` tags to follow client-side redirects - Consistent with existing `--follow-redirects` flag behavior - Resolves cases where documentation sites use meta refresh instead of HTTP redirects Modified: `mcpdoc/main.py`
1 parent d1db631 commit a429dc7

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

mcpdoc/main.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""MCP Llms-txt server for docs."""
22

33
import os
4-
from urllib.parse import urlparse
4+
import re
5+
from urllib.parse import urlparse, urljoin
56

67
import httpx
78
from markdownify import markdownify
@@ -228,7 +229,8 @@ def list_doc_sources() -> str:
228229

229230
@server.tool(description=fetch_docs_description)
230231
async def fetch_docs(url: str) -> str:
231-
nonlocal domains
232+
nonlocal domains, follow_redirects
233+
url = url.strip()
232234
# Handle local file paths (either as file:// URLs or direct filesystem paths)
233235
if not _is_http_or_https(url):
234236
abs_path = _normalize_path(url)
@@ -255,7 +257,33 @@ async def fetch_docs(url: str) -> str:
255257
try:
256258
response = await httpx_client.get(url, timeout=timeout)
257259
response.raise_for_status()
258-
return markdownify(response.text)
260+
content = response.text
261+
262+
if follow_redirects:
263+
# Check for meta refresh tag which indicates a client-side redirect
264+
match = re.search(
265+
r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
266+
content,
267+
re.IGNORECASE,
268+
)
269+
270+
if match:
271+
redirect_url = match.group(1)
272+
new_url = urljoin(str(response.url), redirect_url)
273+
274+
if "*" not in domains and not any(
275+
new_url.startswith(domain) for domain in domains
276+
):
277+
return (
278+
"Error: Redirect URL not allowed. Must start with one of the following domains: "
279+
+ ", ".join(domains)
280+
)
281+
282+
response = await httpx_client.get(new_url, timeout=timeout)
283+
response.raise_for_status()
284+
content = response.text
285+
286+
return markdownify(content)
259287
except (httpx.HTTPStatusError, httpx.RequestError) as e:
260288
return f"Encountered an HTTP error: {str(e)}"
261289

0 commit comments

Comments
 (0)