Skip to content

Commit e244200

Browse files
committed
port tocJSON scraper to jsdom
1 parent f40a4ec commit e244200

File tree

5 files changed

+49
-52
lines changed

5 files changed

+49
-52
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[*]
2+
indent_size = 2
3+
indent_style = space

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
*.html
22
node_modules
33

4+
schema.json
5+
46
*.pdf

src/deprecated_view_nav.js

Lines changed: 0 additions & 50 deletions
This file was deleted.

src/env.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@ const {JSDOM} = require("jsdom");
77
* JSDOM chosen due to availability of standard apis
88
* in a browser-free environment.
99
*/
10-
const createDOM = (category) => {
10+
const createDOM = (category, scripts = false) => {
1111
const fData = readFileSync("./" + category + ".html")
12-
const dom = new JSDOM(fData);
12+
const options = scripts ? { runScripts: "dangerously" } : undefined;
13+
const dom = new JSDOM(fData, options);
1314
return dom;
1415
}
1516

view_nav.js

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/env node
2+
const {categories} =require('./config');
3+
4+
const {normalize} = require('./src/util');
5+
const {createDOM} = require('./src/env');
6+
7+
const byTitle = (title) => a => a.title === title;
8+
9+
const onlyObjects = a => /-objects\.html$/.test(a.link);
10+
const toRepresentation = a => a.link.match(/([A-z]+)-objects\.html$/)[1];
11+
12+
/**
13+
* Grab all category defintions for navigation
14+
* from JavaScript in the page
15+
*/
16+
for (const _category of categories) {
17+
const category = normalize(_category);
18+
const {window} = createDOM(category, true);
19+
const {tocJSON} = window;
20+
21+
if (!tocJSON) {
22+
console.log('failure to parse TOC on page', category);
23+
continue;
24+
}
25+
26+
const items = tocJSON
27+
.find(byTitle('Reference'))
28+
.children[0]
29+
.children
30+
.find(byTitle('Objects and Attributes'))
31+
.children
32+
.filter(onlyObjects)
33+
.map(toRepresentation);
34+
35+
console.log('objects are', items);
36+
// only need a single version of tocJSON
37+
break;
38+
}
39+
40+
41+

0 commit comments

Comments
 (0)