Skip to content

Commit 856fd49

Browse files
authored
feat: luhn mask (#62)
* docs: fix typo * feat(luhn): add venom test * feat(luhn): add mask skeleton * feat(luhn): implement mask * feat(luhn): update json schema * feat(luhn): add venom test * docs(luhn): add mask example * docs(luhn): add mask example * test(luhn): ok * docs(luhn): add universe example
1 parent 4ed5aa6 commit 856fd49

File tree

6 files changed

+204
-1
lines changed

6 files changed

+204
-1
lines changed

README.md

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ The following types of masks can be used :
8484
* [`fluxUri`](#fluxUri) is to replace by a sequence of values defined in an external resource.
8585
* [`replacement`](#replacement) is to mask a data with another data from the jsonline.
8686
* [`pipe`](#pipe) is a mask to handle complex nested array structures, it can read an array as an object stream and process it with a sub-pipeline.
87+
* [`luhn`](#luhn) can generate valid numbers using the Luhn algorithm (e.g. french SIRET or SIREN).
8788

8889
A full `masking.yml` file example, using every kind of mask, is given with the source code.
8990

@@ -256,7 +257,7 @@ This example will mask the `town` field of the input jsonlines with a value from
256257
- selector:
257258
jsonpath: "name"
258259
mask:
259-
hashInUri: "pimo://nameFR
260+
hashInUri: "pimo://nameFR"
260261
```
261262

262263
This example will mask the `name` field of the input jsonlines with a value from the list nameFR contained in pimo, the same way as for `hash` mask. The different URI usable with this selector are : `pimo`, `file` and `http`/`https`.
@@ -513,6 +514,7 @@ This mask will replace an integer value `{"age": 27}` with a range like this `{"
513514
If the data structure contains arrays of object like in the example below, this mask can pipe the objects into a sub pipeline definition.
514515

515516
**`data.jsonl`**
517+
516518
```json
517519
{
518520
"organizations": [
@@ -550,7 +552,33 @@ If the data structure contains arrays of object like in the example below, this
550552
}
551553
```
552554

555+
### Luhn
556+
557+
The [Luhn](https://en.wikipedia.org/wiki/Luhn_algorithm) algorithm is a simple checksum formula used to validate a variety of identification numbers.
558+
559+
The `luhn` mask can calculate the checksum for any value.
560+
561+
```yaml
562+
- selector:
563+
jsonpath: "siret"
564+
mask:
565+
luhn: {}
566+
```
567+
568+
In this example, the `siret` value will be appended with the correct checksum, to create a valid SIRET number (french business identifier).
569+
570+
The mask can be parametered to use a different universe of valid characters, internally using the [Luhn mod N](https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm) algorithm.
571+
572+
```yaml
573+
- selector:
574+
jsonpath: "siret"
575+
mask:
576+
luhn:
577+
universe: "abcde"
578+
```
579+
553580
**`masking.yml`**
581+
554582
```yaml
555583
version: "1"
556584
seed: 42

cmd/pimo/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939
"github.com/cgi-fr/pimo/pkg/hash"
4040
"github.com/cgi-fr/pimo/pkg/increment"
4141
"github.com/cgi-fr/pimo/pkg/jsonline"
42+
"github.com/cgi-fr/pimo/pkg/luhn"
4243
"github.com/cgi-fr/pimo/pkg/model"
4344
"github.com/cgi-fr/pimo/pkg/pipe"
4445
"github.com/cgi-fr/pimo/pkg/randdate"
@@ -260,6 +261,7 @@ func injectMaskFactories() []model.MaskFactory {
260261
randomdecimal.Factory,
261262
dateparser.Factory,
262263
ff1.Factory,
264+
luhn.Factory,
263265
}
264266
}
265267

pkg/luhn/luhn.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Copyright (C) 2021 CGI France
2+
//
3+
// This file is part of PIMO.
4+
//
5+
// PIMO is free software: you can redistribute it and/or modify
6+
// it under the terms of the GNU General Public License as published by
7+
// the Free Software Foundation, either version 3 of the License, or
8+
// (at your option) any later version.
9+
//
10+
// PIMO is distributed in the hope that it will be useful,
11+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
// GNU General Public License for more details.
14+
//
15+
// You should have received a copy of the GNU General Public License
16+
// along with PIMO. If not, see <http://www.gnu.org/licenses/>.
17+
18+
package luhn
19+
20+
import (
21+
"bytes"
22+
"fmt"
23+
24+
"github.com/cgi-fr/pimo/pkg/model"
25+
"github.com/rs/zerolog/log"
26+
)
27+
28+
// MaskEngine is a struct to create incremental int
29+
type MaskEngine struct {
30+
Universe []byte
31+
}
32+
33+
// NewMask create an Luhn mask
34+
func NewMask(universe []byte) MaskEngine {
35+
return MaskEngine{universe}
36+
}
37+
38+
// Mask return the value with luhn checksum
39+
func (l MaskEngine) Mask(e model.Entry, context ...model.Dictionary) (model.Entry, error) {
40+
if e == nil {
41+
// Cannot use a nil value so we leave it untouched
42+
log.Warn().Msg("Mask luhn - ignored null value")
43+
return e, nil
44+
}
45+
46+
log.Info().Msg("Mask luhn")
47+
48+
factor := 2
49+
sum := 0
50+
n := len(l.Universe)
51+
input := e.(string)
52+
53+
// Starting from the right and working leftwards is easier since
54+
// the initial "factor" will always be "2".
55+
for i := len(input) - 1; i >= 0; i-- {
56+
codePoint := bytes.IndexByte(l.Universe, input[i])
57+
addend := factor * codePoint
58+
59+
// Alternate the "factor" that each "codePoint" is multiplied by
60+
if factor == 2 {
61+
factor = 1
62+
} else {
63+
factor = 2
64+
}
65+
66+
// Sum the digits of the "addend" as expressed in base "n"
67+
addend = addend/n + (addend % n)
68+
sum += addend
69+
}
70+
71+
// Calculate the number that must be added to the "sum"
72+
// to make it divisible by "n".
73+
remainder := sum % n
74+
checkCodePoint := (n - remainder) % n
75+
76+
return input + string(l.Universe[checkCodePoint]), nil
77+
}
78+
79+
// Create a mask from a configuration
80+
func Factory(conf model.Masking, seed int64, caches map[string]model.Cache) (model.MaskEngine, bool, error) {
81+
if conf.Mask.Luhn != nil {
82+
if conf.Mask.Luhn.Universe != "" {
83+
if len(conf.Mask.Luhn.Universe)%2 != 0 {
84+
return nil, true, fmt.Errorf("luhn universe size must be divisible by 2")
85+
}
86+
return NewMask([]byte(conf.Mask.Luhn.Universe)), true, nil
87+
}
88+
return NewMask([]byte("0123456789")), true, nil
89+
}
90+
return nil, false, nil
91+
}

pkg/model/model.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ type TemplateEachType struct {
122122
Template string `yaml:"template,omitempty"`
123123
}
124124

125+
type LuhnType struct {
126+
Universe string `yaml:"universe,omitempty"`
127+
}
128+
125129
type MaskType struct {
126130
Add Entry `yaml:"add,omitempty" jsonschema:"oneof_required=Add"`
127131
AddTransient Entry `yaml:"add-transient,omitempty" jsonschema:"oneof_required=AddTransient"`
@@ -150,6 +154,7 @@ type MaskType struct {
150154
FF1 FF1Type `yaml:"ff1,omitempty" jsonschema:"oneof_required=FF1"`
151155
Pipe PipeType `yaml:"pipe,omitempty" jsonschema:"oneof_required=Pipe"`
152156
FromJSON string `yaml:"fromjson,omitempty" jsonschema:"oneof_required=FromJSON"`
157+
Luhn *LuhnType `yaml:"luhn,omitempty" jsonschema:"oneof_required=Luhn"`
153158
}
154159

155160
type Masking struct {

schema/v1/pimo.schema.json

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,15 @@
9292
"additionalProperties": false,
9393
"type": "object"
9494
},
95+
"LuhnType": {
96+
"properties": {
97+
"universe": {
98+
"type": "string"
99+
}
100+
},
101+
"additionalProperties": false,
102+
"type": "object"
103+
},
95104
"MaskType": {
96105
"properties": {
97106
"add": {
@@ -193,6 +202,10 @@
193202
},
194203
"fromjson": {
195204
"type": "string"
205+
},
206+
"luhn": {
207+
"$schema": "http://json-schema.org/draft-04/schema#",
208+
"$ref": "#/definitions/LuhnType"
196209
}
197210
},
198211
"additionalProperties": false,
@@ -359,6 +372,12 @@
359372
"fromjson"
360373
],
361374
"title": "FromJSON"
375+
},
376+
{
377+
"required": [
378+
"luhn"
379+
],
380+
"title": "Luhn"
362381
}
363382
]
364383
},

test/suites/masking_luhn.yml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
name: luhn mask
2+
testcases:
3+
- name: default universe
4+
steps:
5+
- script: rm -f masking.yml
6+
- script: |-
7+
cat > masking.yml <<EOF
8+
version: "1"
9+
masking:
10+
- selector:
11+
jsonpath: "siret"
12+
mask:
13+
luhn: {}
14+
EOF
15+
- script: |-
16+
echo '{"siret": "12345678"}' | pimo
17+
assertions:
18+
- result.code ShouldEqual 0
19+
- result.systemoutjson.siret ShouldEqual 123456782
20+
- result.systemerr ShouldBeEmpty
21+
- name: custom universe
22+
steps:
23+
- script: rm -f masking.yml
24+
- script: |-
25+
cat > masking.yml <<EOF
26+
version: "1"
27+
masking:
28+
- selector:
29+
jsonpath: "siret"
30+
mask:
31+
luhn:
32+
universe: "abcdef"
33+
EOF
34+
- script: |-
35+
echo '{"siret": "abcdef"}' | pimo
36+
assertions:
37+
- result.code ShouldEqual 0
38+
- result.systemoutjson.siret ShouldEqual abcdefe
39+
- result.systemerr ShouldBeEmpty
40+
41+
- name: default universe - additional test
42+
steps:
43+
- script: rm -f masking.yml
44+
- script: |-
45+
cat > masking.yml <<EOF
46+
version: "1"
47+
masking:
48+
- selector:
49+
jsonpath: "siret"
50+
mask:
51+
luhn: {}
52+
EOF
53+
- script: |-
54+
echo '{"siret": "1234567821234"}' | pimo
55+
assertions:
56+
- result.code ShouldEqual 0
57+
- result.systemoutjson.siret ShouldEqual 12345678212346
58+
- result.systemerr ShouldBeEmpty

0 commit comments

Comments
 (0)