Skip to content

Commit af8b7e6

Browse files
authored
Merge pull request #1063 from python-openapi/feature/cache-compiled-parsers
Cache compiled path parsers
2 parents 73b236f + 44444bb commit af8b7e6

File tree

6 files changed

+213
-48
lines changed

6 files changed

+213
-48
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,6 @@ docs-cleanup:
3838
@rm -rf docs_build
3939

4040
cleanup: dist-cleanup test-cleanup
41+
42+
bench-paths:
43+
@PYTHONHASHSEED=0 python tests/benchmarks/bench_paths.py --paths 500 --templates-ratio 0.7 --lookups 2000 --output bench-paths.json

openapi_core/templating/paths/iterators.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from functools import lru_cache
12
from typing import Iterator
23
from typing import List
34
from typing import Optional
@@ -12,9 +13,8 @@
1213
from openapi_core.templating.paths.datatypes import PathOperation
1314
from openapi_core.templating.paths.datatypes import PathOperationServer
1415
from openapi_core.templating.paths.exceptions import PathsNotFound
16+
from openapi_core.templating.paths.parsers import PathParser
1517
from openapi_core.templating.paths.util import template_path_len
16-
from openapi_core.templating.util import parse
17-
from openapi_core.templating.util import search
1818

1919

2020
class SimplePathsIterator:
@@ -52,14 +52,19 @@ def __call__(
5252
yield Path(path, path_result)
5353
# template path
5454
else:
55-
result = search(path_pattern, name)
55+
path_parser = self._get_path_parser(path_pattern)
56+
result = path_parser.search(name)
5657
if result:
5758
path_result = TemplateResult(path_pattern, result.named)
5859
template_paths.append(Path(path, path_result))
5960

6061
# Fewer variables -> more concrete path
6162
yield from sorted(template_paths, key=template_path_len)
6263

64+
@lru_cache(maxsize=4096)
65+
def _get_path_parser(self, path_pattern: str) -> PathParser:
66+
return PathParser(path_pattern, post_expression="$")
67+
6368

6469
class SimpleOperationsIterator:
6570
def __call__(
@@ -156,7 +161,10 @@ def __call__(
156161
)
157162
# template path
158163
else:
159-
result = parse(server["url"], server_url_pattern)
164+
server_url_parser = self._get_server_url_parser(
165+
server["url"]
166+
)
167+
result = server_url_parser.parse(server_url_pattern)
160168
if result:
161169
server_result = TemplateResult(
162170
server["url"], result.named
@@ -171,7 +179,7 @@ def __call__(
171179
# servers should'n end with tailing slash
172180
# but let's search for this too
173181
server_url_pattern += "/"
174-
result = parse(server["url"], server_url_pattern)
182+
result = server_url_parser.parse(server_url_pattern)
175183
if result:
176184
server_result = TemplateResult(
177185
server["url"], result.named
@@ -183,3 +191,7 @@ def __call__(
183191
path_result,
184192
server_result,
185193
)
194+
195+
@lru_cache(maxsize=1024)
196+
def _get_server_url_parser(self, server_url: str) -> PathParser:
197+
return PathParser(server_url, pre_expression="^")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Any
2+
3+
from parse import Parser
4+
5+
6+
class PathParameter:
7+
name = "PathParameter"
8+
pattern = r"[^\/]*"
9+
10+
def __call__(self, text: str) -> str:
11+
return text
12+
13+
14+
class PathParser(Parser): # type: ignore
15+
16+
parse_path_parameter = PathParameter()
17+
18+
def __init__(
19+
self, pattern: str, pre_expression: str = "", post_expression: str = ""
20+
) -> None:
21+
extra_types = {
22+
self.parse_path_parameter.name: self.parse_path_parameter
23+
}
24+
super().__init__(pattern, extra_types)
25+
self._expression: str = (
26+
pre_expression + self._expression + post_expression
27+
)
28+
29+
def _handle_field(self, field: str) -> Any:
30+
# handle as path parameter field
31+
field = field[1:-1]
32+
path_parameter_field = "{%s:PathParameter}" % field
33+
return super()._handle_field(path_parameter_field)

openapi_core/templating/util.py

Lines changed: 0 additions & 38 deletions
This file was deleted.

tests/benchmarks/bench_paths.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import gc
4+
import json
5+
import random
6+
import statistics
7+
import time
8+
from dataclasses import dataclass
9+
from typing import Any
10+
from typing import Dict
11+
from typing import List
12+
13+
from jsonschema_path import SchemaPath
14+
15+
from openapi_core.templating.paths.finders import APICallPathFinder
16+
17+
18+
@dataclass(frozen=True)
19+
class Result:
20+
paths: int
21+
templates_ratio: float
22+
lookups: int
23+
repeats: int
24+
warmup: int
25+
seconds: List[float]
26+
27+
def as_dict(self) -> Dict[str, Any]:
28+
return {
29+
"paths": self.paths,
30+
"templates_ratio": self.templates_ratio,
31+
"lookups": self.lookups,
32+
"repeats": self.repeats,
33+
"warmup": self.warmup,
34+
"seconds": self.seconds,
35+
"median_s": statistics.median(self.seconds),
36+
"mean_s": statistics.mean(self.seconds),
37+
"stdev_s": statistics.pstdev(self.seconds),
38+
"ops_per_sec_median": self.lookups
39+
/ statistics.median(self.seconds),
40+
}
41+
42+
43+
def build_spec(paths: int, templates_ratio: float) -> SchemaPath:
44+
# Mix of exact and templated paths.
45+
# Keep it minimal so we measure finder cost, not schema complexity.
46+
tmpl = int(paths * templates_ratio)
47+
exact = paths - tmpl
48+
49+
paths_obj: Dict[str, Any] = {}
50+
51+
# Exact paths (fast case)
52+
for i in range(exact):
53+
p = f"/resource/{i}/sub"
54+
paths_obj[p] = {"get": {"responses": {"200": {"description": "ok"}}}}
55+
56+
# Template paths (slow case)
57+
for i in range(tmpl):
58+
p = f"/resource/{i}" + "/{item_id}/sub/{sub_id}"
59+
paths_obj[p] = {"get": {"responses": {"200": {"description": "ok"}}}}
60+
61+
spec_dict = {
62+
"openapi": "3.0.0",
63+
"info": {"title": "bench", "version": "0"},
64+
"servers": [{"url": "http://example.com"}],
65+
"paths": paths_obj,
66+
}
67+
return SchemaPath.from_dict(spec_dict)
68+
69+
70+
def build_urls(
71+
paths: int, templates_ratio: float, lookups: int, seed: int
72+
) -> List[str]:
73+
rnd = random.Random(seed)
74+
tmpl = int(paths * templates_ratio)
75+
exact = paths - tmpl
76+
77+
urls: List[str] = []
78+
for _ in range(lookups):
79+
# 50/50 choose from each population, weighted by how many exist
80+
if tmpl > 0 and (exact == 0 or rnd.random() < (tmpl / paths)):
81+
i = rnd.randrange(tmpl) # matches template bucket
82+
item_id = rnd.randrange(1_000_000)
83+
sub_id = rnd.randrange(1_000_000)
84+
urls.append(
85+
f"http://example.com/resource/{i}/{item_id}/sub/{sub_id}"
86+
)
87+
else:
88+
i = rnd.randrange(exact) if exact > 0 else 0
89+
urls.append(f"http://example.com/resource/{i}/sub")
90+
return urls
91+
92+
93+
def run_once(finder: APICallPathFinder, urls: List[str]) -> float:
94+
t0 = time.perf_counter()
95+
for u in urls:
96+
finder.find("get", u)
97+
return time.perf_counter() - t0
98+
99+
100+
def main() -> None:
101+
ap = argparse.ArgumentParser()
102+
ap.add_argument("--paths", type=int, default=2000)
103+
ap.add_argument("--templates-ratio", type=float, default=0.6)
104+
ap.add_argument("--lookups", type=int, default=100_000)
105+
ap.add_argument("--repeats", type=int, default=7)
106+
ap.add_argument("--warmup", type=int, default=2)
107+
ap.add_argument("--seed", type=int, default=1)
108+
ap.add_argument("--output", type=str, default="")
109+
ap.add_argument("--no-gc", action="store_true")
110+
args = ap.parse_args()
111+
112+
spec = build_spec(args.paths, args.templates_ratio)
113+
finder = APICallPathFinder(spec)
114+
115+
urls = build_urls(
116+
args.paths, args.templates_ratio, args.lookups, args.seed
117+
)
118+
119+
if args.no_gc:
120+
gc.disable()
121+
122+
# Warmup (JIT-less, but warms caches, alloc patterns, etc.)
123+
for _ in range(args.warmup):
124+
run_once(finder, urls)
125+
126+
seconds: List[float] = []
127+
for _ in range(args.repeats):
128+
seconds.append(run_once(finder, urls))
129+
130+
if args.no_gc:
131+
gc.enable()
132+
133+
result = Result(
134+
paths=args.paths,
135+
templates_ratio=args.templates_ratio,
136+
lookups=args.lookups,
137+
repeats=args.repeats,
138+
warmup=args.warmup,
139+
seconds=seconds,
140+
)
141+
142+
payload = result.as_dict()
143+
print(json.dumps(payload, indent=2, sort_keys=True))
144+
145+
if args.output:
146+
with open(args.output, "w", encoding="utf-8") as f:
147+
json.dump(payload, f, indent=2, sort_keys=True)
148+
149+
150+
if __name__ == "__main__":
151+
main()

tests/unit/templating/test_templating_util.py renamed to tests/unit/templating/test_paths_parsers.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
11
import pytest
22

3-
from openapi_core.templating.util import search
3+
from openapi_core.templating.paths.parsers import PathParser
44

55

66
class TestSearch:
77
def test_endswith(self):
88
path_pattern = "/{test}/test"
9+
parser = PathParser(path_pattern, post_expression="$")
910
full_url_pattern = "/test1/test/test2/test"
1011

11-
result = search(path_pattern, full_url_pattern)
12+
result = parser.search(full_url_pattern)
1213

1314
assert result.named == {
1415
"test": "test2",
1516
}
1617

1718
def test_exact(self):
1819
path_pattern = "/{test}/test"
20+
parser = PathParser(path_pattern, post_expression="$")
1921
full_url_pattern = "/test/test"
2022

21-
result = search(path_pattern, full_url_pattern)
23+
result = parser.search(full_url_pattern)
2224

2325
assert result.named == {
2426
"test": "test",
@@ -33,9 +35,10 @@ def test_exact(self):
3335
],
3436
)
3537
def test_chars_valid(self, path_pattern, expected):
38+
parser = PathParser(path_pattern, post_expression="$")
3639
full_url_pattern = "/test/test"
3740

38-
result = search(path_pattern, full_url_pattern)
41+
result = parser.search(full_url_pattern)
3942

4043
assert result.named == expected
4144

@@ -53,8 +56,9 @@ def test_chars_valid(self, path_pattern, expected):
5356
],
5457
)
5558
def test_special_chars_valid(self, path_pattern, expected):
59+
parser = PathParser(path_pattern, post_expression="$")
5660
full_url_pattern = "/test/test"
5761

58-
result = search(path_pattern, full_url_pattern)
62+
result = parser.search(full_url_pattern)
5963

6064
assert result.named == expected

0 commit comments

Comments
 (0)