@@ -3,6 +3,15 @@ import ort from './ort'
3
3
4
4
5
5
class Model {
6
+ /**
7
+ * create a base model.
8
+ * @param {string } modelURL model URL
9
+ * @param {object } sessionOption onnxruntime session options
10
+ * @param {(model: Model) => void } init model init function
11
+ * @param {(...any) => object } preProcess preprocess function
12
+ * @param {(resultsTensors: object, ...any) => any } postProcess postprocess function
13
+ * @returns {Promise<Model> } base model object
14
+ */
6
15
static async create ( modelURL , sessionOption = { logSeverityLevel : 4 } , init = null , preProcess = null , postProcess = null ) {
7
16
await WebAI . waitForOpenCV ( )
8
17
let model = new this ( ) ;
@@ -19,6 +28,11 @@ class Model {
19
28
return model
20
29
}
21
30
31
+ /**
32
+ * base model infer function.
33
+ * @param {...any } args model infer paramters
34
+ * @returns {any } model infer results
35
+ */
22
36
async infer ( ...args ) {
23
37
console . time ( 'Infer' ) ;
24
38
@@ -45,7 +59,17 @@ class Model {
45
59
}
46
60
}
47
61
62
+
48
63
class CV extends Model {
64
+ /**
65
+ * create a base CV model.
66
+ * @param {string } modelURL model URL
67
+ * @param {string } inferConfig model infer config URL
68
+ * @param {object } sessionOption onnxruntime session options
69
+ * @param {(imgTensor: ort.Tensor, imScaleX: number, imScaleY: number) => object } getFeeds get infer session feeds function
70
+ * @param {(resultsTensors: object, ...any) => any } postProcess postprocess function
71
+ * @returns {Promise<CV> } base CV model object
72
+ */
49
73
static async create ( modelURL , inferConfig , sessionOption = { logSeverityLevel : 4 } , getFeeds = null , postProcess = null ) {
50
74
let model = await super . create ( modelURL , sessionOption , null , null , postProcess )
51
75
model . loadConfigs ( inferConfig ) ;
@@ -55,6 +79,10 @@ class CV extends Model {
55
79
return model
56
80
}
57
81
82
+ /**
83
+ * load infer configs
84
+ * @param {string } inferConfig model infer config URL
85
+ */
58
86
loadConfigs ( inferConfig ) {
59
87
let inferConfigs = JSON . parse ( WebAI . loadText ( inferConfig ) ) ;
60
88
let preProcess = inferConfigs . Preprocess ;
@@ -123,6 +151,11 @@ class CV extends Model {
123
151
} )
124
152
}
125
153
154
+ /**
155
+ * model preprocess function.
156
+ * @param {...any } args preprocess args
157
+ * @returns {object } session infer feeds
158
+ */
126
159
preProcess ( ...args ) {
127
160
let [ imgRGBA , height , width ] = args . slice ( 0 , 3 )
128
161
let imgResize , imScaleX , imScaleY
@@ -169,7 +202,15 @@ class CV extends Model {
169
202
}
170
203
}
171
204
205
+
172
206
class Det extends CV {
207
+ /**
208
+ * get session infer feeds.
209
+ * @param {ort.Tensor } imgTensor image tensor
210
+ * @param {number } imScaleX image scale factor of x axis
211
+ * @param {number } imScaleY image scale factor of y axis
212
+ * @returns {object } session infer feeds
213
+ */
173
214
getFeeds ( imgTensor , imScaleX , imScaleY ) {
174
215
let inputNames = this . session . inputNames ;
175
216
let _feeds = {
@@ -184,6 +225,12 @@ class Det extends CV {
184
225
return feeds
185
226
}
186
227
228
+ /**
229
+ * detection postprocess.
230
+ * @param {object } resultsTensors result tensors
231
+ * @param {...any } args postprocess args
232
+ * @returns {{label: string, color: [number, number, number, number], score: number, x1: number, y1: number, x2: number, y2: number}[] } bboxes of the detection
233
+ */
187
234
postProcess ( resultsTensors , ...args ) {
188
235
let [ height , width , drawThreshold ] = args . slice ( 1 , 4 )
189
236
let bboxesTensor = Object . values ( resultsTensors ) [ 0 ] ;
@@ -219,16 +266,33 @@ class Det extends CV {
219
266
return bboxes
220
267
}
221
268
269
+ /**
270
+ * detection infer.
271
+ * @param {cv.Mat } imgRGBA RGBA image
272
+ * @param {number } drawThreshold threshold of detection
273
+ * @returns {{label: string, color: [number, number, number, number], score: number, x1: number, y1: number, x2: number, y2: number}[] } bboxes of the detection
274
+ */
222
275
async infer ( imgRGBA , drawThreshold = 0.5 ) {
223
276
return super . infer ( imgRGBA , imgRGBA . rows , imgRGBA . cols , drawThreshold )
224
277
}
225
278
}
226
279
227
280
class Cls extends CV {
281
+ /**
282
+ * get the feeds of the infer session.
283
+ * @param {ort.Tensor } imgTensor image tensor
284
+ * @returns {object } feeds of the infer session {x: image tensor}
285
+ */
228
286
getFeeds ( imgTensor ) {
229
287
return { x : imgTensor }
230
288
}
231
289
290
+ /**
291
+ * classification postprocess.
292
+ * @param {object } resultsTensors result tensors
293
+ * @param {...any } args postprocess args
294
+ * @returns {{label: string, prob: number}[] } probs of the classification
295
+ */
232
296
postProcess ( resultsTensors , ...args ) {
233
297
let topK = args [ 3 ] ;
234
298
let probsTensor = Object . values ( resultsTensors ) [ 0 ] ;
@@ -249,16 +313,32 @@ class Cls extends CV {
249
313
}
250
314
}
251
315
316
+ /**
317
+ * classification infer.
318
+ * @param {cv.Mat } imgRGBA RGBA image
319
+ * @param {number } topK probs top K
320
+ * @returns {{label: string, prob: number}[] } probs of the classification
321
+ */
252
322
async infer ( imgRGBA , topK = 5 ) {
253
323
return super . infer ( imgRGBA , imgRGBA . rows , imgRGBA . cols , topK )
254
324
}
255
325
}
256
326
257
327
class Seg extends CV {
328
+ /**
329
+ * get the feeds of the infer session.
330
+ * @param {ort.Tensor } imgTensor image tensor
331
+ * @returns {object } feeds of the infer session {x: image tensor}
332
+ */
258
333
getFeeds ( imgTensor ) {
259
334
return { x : imgTensor }
260
335
}
261
336
337
+ /**
338
+ * segmentation postprocess.
339
+ * @param {object } resultsTensors result tensors
340
+ * @returns {{gray: cv.Mat, colorRGBA: cv.Mat, colorMap: {label: string, color: [number, number, number, number]}[], delete: method} } segmentation results
341
+ */
262
342
postProcess ( resultsTensors ) {
263
343
let segTensor = Object . values ( resultsTensors ) [ 0 ] ;
264
344
let data = segTensor . data
@@ -297,12 +377,22 @@ class Seg extends CV {
297
377
}
298
378
}
299
379
380
+ /**
381
+ * segmentation infer.
382
+ * @param {cv.Mat } imgRGBA RGBA image
383
+ * @returns {{gray: cv.Mat, colorRGBA: cv.Mat, colorMap: {label: string, color: [number, number, number, number]}[], delete: method} } segmentation results
384
+ */
300
385
infer ( imgRGBA ) {
301
386
return super . infer ( imgRGBA , imgRGBA . rows , imgRGBA . cols )
302
387
}
303
388
}
304
389
305
390
class WebAI {
391
+ /**
392
+ * get the index of the max value of the array.
393
+ * @param {number[] } arr array
394
+ * @returns {number } the index of the max value of the array
395
+ */
306
396
static argmax ( arr ) {
307
397
let max = Math . max . apply ( null , arr ) ;
308
398
let index = arr . findIndex (
@@ -318,6 +408,15 @@ class WebAI {
318
408
return index
319
409
}
320
410
411
+ /**
412
+ * get image scale.
413
+ * @param {number } height image height
414
+ * @param {number } width image width
415
+ * @param {[number, number] } targetSize target size [h, w]
416
+ * @param {boolean } keepRatio is keep the ratio of image size
417
+ * @param {boolean } limitMax is limit max size of image
418
+ * @returns {[number, number] } [scale factor of x axis, , scale factor of y axis]
419
+ */
321
420
static getIMScale ( height , width , targetSize , keepRatio , limitMax ) {
322
421
let imScaleX , imScaleY ;
323
422
if ( keepRatio ) {
@@ -343,25 +442,52 @@ class WebAI {
343
442
return [ imScaleX , imScaleY ]
344
443
}
345
444
445
+ /**
446
+ * RGBA -> RGB image.
447
+ * @param {cv.Mat } imgRGBA RGBA image
448
+ * @returns {cv.Mat } RGB image
449
+ */
346
450
static rgba2rgb ( imgRGBA ) {
347
451
let imgRGB = new this . cv . Mat ( ) ;
348
452
this . cv . cvtColor ( imgRGBA , imgRGB , this . cv . COLOR_RGBA2RGB ) ;
349
453
return imgRGB
350
454
}
351
455
456
+ /**
457
+ * RGBA -> BGR image.
458
+ * @param {cv.Mat } imgRGBA RGBA image
459
+ * @returns {cv.Mat } BGR image
460
+ */
352
461
static rgba2bgr ( imgRGBA ) {
353
462
let imgBGR = new this . cv . Mat ( ) ;
354
463
this . cv . cvtColor ( imgRGBA , imgBGR , this . cv . COLOR_RGBA2BGR ) ;
355
464
return imgBGR
356
465
}
357
466
467
+ /**
468
+ * image resize.
469
+ * @param {cv.Mat } img image mat
470
+ * @param {number } height image height
471
+ * @param {number } width image width
472
+ * @param {[number, number] } targetSize target size [h, w]
473
+ * @param {boolean } keepRatio is keep the ratio of image size
474
+ * @param {boolean } limitMax is limit max size of image
475
+ * @param {number } interp interpolation method
476
+ * @returns {[cv.Mat, number, number] } [image resized, scale factor of x axis, , scale factor of y axis]
477
+ */
358
478
static resize ( img , height , width , targetSize , keepRatio , limitMax , interp ) {
359
479
let [ imScaleX , imScaleY ] = this . getIMScale ( height , width , targetSize , keepRatio , limitMax ) ;
360
480
let imgResize = new this . cv . Mat ( ) ;
361
481
this . cv . resize ( img , imgResize , { width : 0 , height : 0 } , imScaleX , imScaleY , interp ) ;
362
482
return [ imgResize , imScaleX , imScaleY ]
363
483
}
364
484
485
+ /**
486
+ * image center crop.
487
+ * @param {cv.Mat } img image mat
488
+ * @param {[number, number] } cropSize crop size [h, w]
489
+ * @returns {cv.Mat } cropped image
490
+ */
365
491
static crop ( img , cropSize ) {
366
492
let imgCrop = img . roi ( {
367
493
x : Math . ceil ( ( img . cols - cropSize [ 1 ] ) / 2 ) ,
@@ -373,29 +499,43 @@ class WebAI {
373
499
return imgCrop
374
500
}
375
501
376
- static normalize ( imgRGB , scale , mean , std , isScale ) {
377
- imgRGB . convertTo ( imgRGB , this . cv . CV_32F ) ;
502
+ /**
503
+ * image normalize.
504
+ * @param {cv.Mat } img image mat
505
+ * @param {[number, number, number, number] } scale normalize scale
506
+ * @param {[number, number, number, number] } mean normalize mean
507
+ * @param {[number, number, number, number] } std normalize std
508
+ * @param {boolean } isScale is scale the image
509
+ * @returns {cv.Mat } normalized image
510
+ */
511
+ static normalize ( img , scale , mean , std , isScale ) {
512
+ img . convertTo ( img , this . cv . CV_32F ) ;
378
513
379
514
if ( isScale ) {
380
- let imgScale = new this . cv . Mat ( imgRGB . rows , imgRGB . cols , this . cv . CV_32FC3 , scale ) ;
381
- this . cv . divide ( imgRGB , imgScale , imgRGB ) ;
515
+ let imgScale = new this . cv . Mat ( img . rows , img . cols , this . cv . CV_32FC3 , scale ) ;
516
+ this . cv . divide ( img , imgScale , img ) ;
382
517
imgScale . delete ( ) ;
383
518
}
384
519
385
- let imgMean = new this . cv . Mat ( imgRGB . rows , imgRGB . cols , this . cv . CV_32FC3 , mean ) ;
386
- this . cv . subtract ( imgRGB , imgMean , imgRGB ) ;
520
+ let imgMean = new this . cv . Mat ( img . rows , img . cols , this . cv . CV_32FC3 , mean ) ;
521
+ this . cv . subtract ( img , imgMean , img ) ;
387
522
imgMean . delete ( ) ;
388
523
389
- let imgStd = new this . cv . Mat ( imgRGB . rows , imgRGB . cols , this . cv . CV_32FC3 , std ) ;
390
- this . cv . divide ( imgRGB , imgStd , imgRGB ) ;
524
+ let imgStd = new this . cv . Mat ( img . rows , img . cols , this . cv . CV_32FC3 , std ) ;
525
+ this . cv . divide ( img , imgStd , img ) ;
391
526
imgStd . delete ( ) ;
392
527
393
- return imgRGB
528
+ return img
394
529
}
395
530
396
- static permute ( imgRGB ) {
531
+ /**
532
+ * permute hwc -> chw.
533
+ * @param {cv.Mat } img image mat
534
+ * @returns {Float32Array } image data
535
+ */
536
+ static permute ( img ) {
397
537
let rgbPlanes = new this . cv . MatVector ( ) ;
398
- this . cv . split ( imgRGB , rgbPlanes ) ;
538
+ this . cv . split ( img , rgbPlanes ) ;
399
539
let R = rgbPlanes . get ( 0 ) ;
400
540
let G = rgbPlanes . get ( 1 ) ;
401
541
let B = rgbPlanes . get ( 2 ) ;
@@ -408,17 +548,27 @@ class WebAI {
408
548
R . delete ( ) ;
409
549
G . delete ( ) ;
410
550
B . delete ( ) ;
411
- imgRGB . delete ( ) ;
551
+ img . delete ( ) ;
412
552
return imgData
413
553
}
414
554
555
+ /**
556
+ * load text content.
557
+ * @param {string } textURL text URL
558
+ * @returns {string } content of the text
559
+ */
415
560
static loadText ( textURL ) {
416
561
let xhr = new XMLHttpRequest ( ) ;
417
562
xhr . open ( 'get' , textURL , false ) ;
418
563
xhr . send ( null ) ;
419
564
return xhr . responseText
420
565
}
421
566
567
+ /**
568
+ * get color map of label list.
569
+ * @param {string[] } labelList label list
570
+ * @returns {{label: string, color: [number, number, number, number]}[] } color map of label list
571
+ */
422
572
static getColorMap ( labelList ) {
423
573
let classNum = labelList . length
424
574
let colorMap = [ ]
@@ -444,6 +594,18 @@ class WebAI {
444
594
return colorMap
445
595
}
446
596
597
+ /**
598
+ * draw bboxes onto the image.
599
+ * @param {cv.Mat } img image mat
600
+ * @param {{label: string, color: [number, number, number, number], score: number, x1: number, y1: number, x2: number, y2: number}[] } bboxes bboxes of detection
601
+ * @param {boolean } withLabel draw with label
602
+ * @param {boolean } withScore draw with score
603
+ * @param {number } thickness line thickness
604
+ * @param {number } lineType line type
605
+ * @param {number } fontFace font face
606
+ * @param {number } fontScale font scale
607
+ * @returns {cv.Mat } drawed image
608
+ */
447
609
static drawBBoxes ( img , bboxes , withLabel = true , withScore = true , thickness = 2.0 , lineType = 8 , fontFace = 0 , fontScale = 0.7 ) {
448
610
let imgShow = img . clone ( )
449
611
for ( let i = 0 ; i < bboxes . length ; i ++ ) {
@@ -462,6 +624,10 @@ class WebAI {
462
624
return imgShow
463
625
}
464
626
627
+ /**
628
+ * wait for OpenCV loading.
629
+ * @returns {Promise<boolean> } promise of opencv.js loaded
630
+ */
465
631
static waitForOpenCV ( ) {
466
632
return new Promise ( resolve => {
467
633
if ( typeof cv . onRuntimeInitialized == 'undefined' ) {
0 commit comments