From 448a2fe0a57b428d62b3fc5eab7ab48b51aa4519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Youen=20P=C3=A9ron?= Date: Wed, 19 Feb 2025 22:55:24 +0100 Subject: [PATCH 1/3] feat(pull): filter by scanning all rows --- internal/app/pull/cli.go | 57 +++++++--- internal/app/pull/http.go | 2 +- pkg/pull/driver.go | 10 +- pkg/pull/driver_parallel.go | 9 +- pkg/pull/driver_test.go | 4 +- tests/suites/pull/pull_with_file_filters.yml | 111 +++++++++++++++++++ 6 files changed, 174 insertions(+), 19 deletions(-) diff --git a/internal/app/pull/cli.go b/internal/app/pull/cli.go index f295133e..0c923bd5 100755 --- a/internal/app/pull/cli.go +++ b/internal/app/pull/cli.go @@ -84,6 +84,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra var initialFilters map[string]string var diagnostic bool var filters pull.RowReader + var scann bool var parallel uint cmd := &cobra.Command{ @@ -99,6 +100,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra Bool("diagnostic", diagnostic). Bool("distinct", distinct). Str("filter-from-file", filefilter). + Bool("scann", scann). Str("exclude-from-file", fileexclude). Str("table", table). Str("where", where). @@ -133,19 +135,47 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra tracer = traceListener } - switch filefilter { - case "": - filters = pull.NewOneEmptyRowReader() - case "-": - filters = rowReaderFactory(in) - default: - filterReader, e3 := os.Open(filefilter) - if e3 != nil { - fmt.Fprintln(err, e3.Error()) - os.Exit(1) + var filtersIn pull.KeyStore + if scann { + switch filefilter { + case "": + filtersIn = nil + case "-": + var eks error + filtersIn, eks = keyStoreFactory(in, start.Keys) + if err != nil { + fmt.Fprintln(err, eks.Error()) + os.Exit(1) + } + default: + filterReader, e3 := os.Open(filefilter) + if e3 != nil { + fmt.Fprintln(err, e3.Error()) + os.Exit(1) + } + var eks error + filtersIn, eks = keyStoreFactory(filterReader, start.Keys) + if err != nil { + fmt.Fprintln(err, eks.Error()) + os.Exit(1) + } + log.Trace().Str("file", filefilter).Msg("reading file") + } + } else { + switch filefilter { + case "": + filters = pull.NewOneEmptyRowReader() + case "-": + filters = rowReaderFactory(in) + default: + filterReader, e3 := os.Open(filefilter) + if e3 != nil { + fmt.Fprintln(err, e3.Error()) + os.Exit(1) + } + filters = rowReaderFactory(filterReader) + log.Trace().Str("file", filefilter).Msg("reading file") } - filters = rowReaderFactory(filterReader) - log.Trace().Str("file", filefilter).Msg("reading file") } var filtersEx pull.KeyStore @@ -175,7 +205,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra } puller := pull.NewPullerParallel(plan, datasource, pullExporterFactory(out), tracer, parallel) - if e3 := puller.Pull(start, filter, startSelect, filters, filtersEx); e3 != nil { + if e3 := puller.Pull(start, filter, startSelect, filters, filtersEx, filtersIn); e3 != nil { log.Fatal().AnErr("error", e3).Msg("Fatal error stop the pull command") os.Exit(1) } @@ -192,6 +222,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra cmd.Flags().BoolVarP(&diagnostic, "diagnostic", "d", false, "Set diagnostic debug on") cmd.Flags().BoolVarP(&distinct, "distinct", "D", false, "select distinct values from start table") cmd.Flags().StringVarP(&filefilter, "filter-from-file", "F", "", "Use file to filter start table") + cmd.Flags().BoolVarP(&scann, "scann", "s", false, "read all rows from start table and apply filter `filter-from-file` in memory") cmd.Flags().StringVarP(&fileexclude, "exclude-from-file", "X", "", "Use file to filter out start table") cmd.Flags().StringVarP(&table, "table", "t", "", "pull content of table without relations instead of ingress descriptor definition") cmd.Flags().StringVarP(&where, "where", "w", "", "Advanced SQL where clause to filter") diff --git a/internal/app/pull/http.go b/internal/app/pull/http.go index 7912b788..ff8d989e 100644 --- a/internal/app/pull/http.go +++ b/internal/app/pull/http.go @@ -141,7 +141,7 @@ func HandlerFactory(ingressDescriptor string) func(w http.ResponseWriter, r *htt pullExporter := pullExporterFactory(w) puller := pull.NewPuller(plan, datasource, pullExporter, pull.NoTraceListener{}) - e3 := puller.Pull(start, pull.Filter{Limit: limit, Values: pull.Row{}, Where: where, Distinct: distinct}, startSelect, nil, nil) + e3 := puller.Pull(start, pull.Filter{Limit: limit, Values: pull.Row{}, Where: where, Distinct: distinct}, startSelect, nil, nil, nil) if e3 != nil { log.Error().Err(e3).Msg("") w.WriteHeader(http.StatusInternalServerError) diff --git a/pkg/pull/driver.go b/pkg/pull/driver.go index 1efcf428..ce67890d 100755 --- a/pkg/pull/driver.go +++ b/pkg/pull/driver.go @@ -46,7 +46,7 @@ func NewStep(puller *puller, out ExportedRow, entry Relation) *Step { } type Puller interface { - Pull(start Table, filter Filter, selectColumns []string, filterCohort RowReader, excluded KeyStore) error + Pull(start Table, filter Filter, selectColumns []string, filterCohort RowReader, excluded KeyStore, included KeyStore) error } type puller struct { @@ -65,7 +65,7 @@ func NewPuller(plan Plan, datasource DataSource, exporter RowExporter, diagnosti } } -func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filterCohort RowReader, excluded KeyStore) error { +func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filterCohort RowReader, excluded KeyStore, included KeyStore) error { start.selectColumns(selectColumns...) start = p.graph.addMissingColumns(start) @@ -78,9 +78,11 @@ func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filter Reset() filters := []Filter{} + // If scann is on all rows are pulled and filter after if filterCohort != nil { for filterCohort.Next() { fc := filterCohort.Value() + values := Row{} for key, val := range fc { values[key] = val @@ -119,6 +121,10 @@ func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filter continue } + if included != nil && !included.Has(extract(row, start.Keys)) { + continue + } + if err := p.pull(start, row); err != nil { return fmt.Errorf("%w", err) } diff --git a/pkg/pull/driver_parallel.go b/pkg/pull/driver_parallel.go index 23a71b1c..319d687b 100644 --- a/pkg/pull/driver_parallel.go +++ b/pkg/pull/driver_parallel.go @@ -36,6 +36,7 @@ type pullerParallel struct { outChan chan ExportedRow errors []error excluded KeyStore + included KeyStore } func NewPullerParallel(plan Plan, datasource DataSource, exporter RowExporter, diagnostic TraceListener, nbworkers uint) Puller { //nolint:lll @@ -55,13 +56,14 @@ func NewPullerParallel(plan Plan, datasource DataSource, exporter RowExporter, d outChan: nil, errors: nil, excluded: nil, + included: nil, } } return puller } -func (p *pullerParallel) Pull(start Table, filter Filter, selectColumns []string, filterCohort RowReader, excluded KeyStore) error { +func (p *pullerParallel) Pull(start Table, filter Filter, selectColumns []string, filterCohort RowReader, excluded KeyStore, included KeyStore) error { start.selectColumns(selectColumns...) start = p.graph.addMissingColumns(start) @@ -103,6 +105,7 @@ func (p *pullerParallel) Pull(start Table, filter Filter, selectColumns []string p.outChan = make(chan ExportedRow) p.errors = []error{} p.excluded = excluded + p.included = included wg := &sync.WaitGroup{} @@ -172,6 +175,10 @@ LOOP: continue } + if p.included != nil && !p.included.Has(extract(out, start.Keys)) { + continue + } + err := p.pull(start, out) if err != nil { p.errChan <- err diff --git a/pkg/pull/driver_test.go b/pkg/pull/driver_test.go index a9bfaae9..9abf9282 100644 --- a/pkg/pull/driver_test.go +++ b/pkg/pull/driver_test.go @@ -74,7 +74,7 @@ func RunTest(t *testing.T, test *Test) { for _, execution := range test.Executions { collector.Reset() - assert.NoError(t, puller.Pull(execution.Start, execution.Filter, execution.Select, nil, nil)) + assert.NoError(t, puller.Pull(execution.Start, execution.Filter, execution.Select, nil, nil, nil)) assert.Len(t, collector.Result, len(execution.Result)) for i := 0; i < len(execution.Result); i++ { @@ -94,7 +94,7 @@ func RunBench(b *testing.B, test *Test) { for _, execution := range test.Executions { collector.Reset() - assert.NoError(b, puller.Pull(execution.Start, execution.Filter, execution.Select, nil, nil)) + assert.NoError(b, puller.Pull(execution.Start, execution.Filter, execution.Select, nil, nil, nil)) assert.Len(b, collector.Result, len(execution.Result)) } } diff --git a/tests/suites/pull/pull_with_file_filters.yml b/tests/suites/pull/pull_with_file_filters.yml index f538774b..2d28e1c7 100644 --- a/tests/suites/pull/pull_with_file_filters.yml +++ b/tests/suites/pull/pull_with_file_filters.yml @@ -38,6 +38,16 @@ testcases: - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":13,"create_date":"2006-02-14T00:00:00Z","customer_id":9,"email":"MARGARET.MOORE@sakilacustomer.org","first_name":"MARGARET","last_name":"MOORE","last_update":"2006-02-15T09:57:20Z","store_id":2} - result.systemerr ShouldBeEmpty + - name: pull one value scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"customer_id":9}' > customer_filter.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":13,"create_date":"2006-02-14T00:00:00Z","customer_id":9,"email":"MARGARET.MOORE@sakilacustomer.org","first_name":"MARGARET","last_name":"MOORE","last_update":"2006-02-15T09:57:20Z","store_id":2} + - result.systemerr ShouldBeEmpty + - name: pull error value steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -47,6 +57,15 @@ testcases: - result.code ShouldEqual 1 - result.systemout ShouldBeEmpty + - name: pull error value scann mode should not be detected + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"no_column_id":9}' > customer_filter.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldBeEmpty + - name: pull no value steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -57,6 +76,16 @@ testcases: - result.systemout ShouldBeEmpty - result.systemerr ShouldBeEmpty + - name: pull no value scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"customer_id":-1}' > customer_filter.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldBeEmpty + - result.systemerr ShouldBeEmpty + - name: pull many value with implicit limit to 1 steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -67,6 +96,16 @@ testcases: - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1} - result.systemerr ShouldBeEmpty + - name: pull many value with implicit limit to 1 scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"active":1}' > customer_filter.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1} + - result.systemerr ShouldBeEmpty + - name: pull many values with explicit limit to 2 steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -82,6 +121,21 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty + - name: pull many values with explicit limit to 2 scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"active":1}' > customer_filter.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl --limit 2 > actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemerr ShouldBeEmpty + - script: diff expected.jsonl actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldBeEmpty + - name: pull values with reversed order steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -97,6 +151,21 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty + - name: pull values with reversed order not respected in scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"customer_id":2}\n{"customer_id":1}' > customer_filter.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl + - script: lino pull source scann --filter-from-file customer_filter.jsonl --limit 2 > actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemerr ShouldBeEmpty + - script: diff expected.jsonl actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldBeEmpty + - name: pull values with aditional filter steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -109,6 +178,18 @@ testcases: result.systemout ShouldEqual "1 0" + - name: pull values with aditional filter scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"active":1}\n{"active":0}' > customer_filter.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl --filter store_id=2 | jq .active + assertions: + - result.code ShouldEqual 0 + - result.systemerr ShouldBeEmpty + - | + result.systemout ShouldEqual "1 + 0" + - name: pull values with override filter steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -124,6 +205,21 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty + - name: pull values with override filter scann mode + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"customer_id":1}\n{"customer_id":3}' > customer_filter.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl + - script: lino pull source --scann --filter-from-file customer_filter.jsonl --filter customer_id=2 > actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemerr ShouldBeEmpty + - script: diff expected.jsonl actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldBeEmpty + - name: pull values with filter from stdin steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -139,6 +235,21 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty + - name: pull values with filter scann mode from stdin + steps: + - script: sed -i "s/true/false/g" ingress-descriptor.yaml + - script: echo '{"customer_id":1}\n{"customer_id":2}' > customer_filter.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl + - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl + - script: lino pull source --scann --filter-from-file - < customer_filter.jsonl > actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemerr ShouldBeEmpty + - script: diff expected.jsonl actual.jsonl + assertions: + - result.code ShouldEqual 0 + - result.systemout ShouldBeEmpty + - name: pull all exclude one value steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml From f04cda2b075582b26c445fff00648be1f77eef55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Youen=20P=C3=A9ron?= Date: Wed, 19 Feb 2025 23:36:45 +0100 Subject: [PATCH 2/3] fix(pull): bad error test --- internal/app/pull/cli.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/app/pull/cli.go b/internal/app/pull/cli.go index 0c923bd5..5c817899 100755 --- a/internal/app/pull/cli.go +++ b/internal/app/pull/cli.go @@ -143,7 +143,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra case "-": var eks error filtersIn, eks = keyStoreFactory(in, start.Keys) - if err != nil { + if eks != nil { fmt.Fprintln(err, eks.Error()) os.Exit(1) } @@ -155,7 +155,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra } var eks error filtersIn, eks = keyStoreFactory(filterReader, start.Keys) - if err != nil { + if eks != nil { fmt.Fprintln(err, eks.Error()) os.Exit(1) } From 382ea7c762d1cd84bf175e88cb87af068bf550e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Youen=20P=C3=A9ron?= Date: Mon, 24 Feb 2025 16:55:50 +0100 Subject: [PATCH 3/3] fix(pull): rename scann to scan --- internal/app/pull/cli.go | 8 +-- internal/infra/pull/keystore_json_test.go | 58 +++++++++++++++++++ pkg/pull/driver.go | 11 +++- tests/suites/pull/pull_with_file_filters.yml | 60 +++++--------------- 4 files changed, 86 insertions(+), 51 deletions(-) create mode 100644 internal/infra/pull/keystore_json_test.go diff --git a/internal/app/pull/cli.go b/internal/app/pull/cli.go index 5c817899..9f6d39d0 100755 --- a/internal/app/pull/cli.go +++ b/internal/app/pull/cli.go @@ -84,7 +84,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra var initialFilters map[string]string var diagnostic bool var filters pull.RowReader - var scann bool + var scan bool var parallel uint cmd := &cobra.Command{ @@ -100,7 +100,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra Bool("diagnostic", diagnostic). Bool("distinct", distinct). Str("filter-from-file", filefilter). - Bool("scann", scann). + Bool("scan", scan). Str("exclude-from-file", fileexclude). Str("table", table). Str("where", where). @@ -136,7 +136,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra } var filtersIn pull.KeyStore - if scann { + if scan { switch filefilter { case "": filtersIn = nil @@ -222,7 +222,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra cmd.Flags().BoolVarP(&diagnostic, "diagnostic", "d", false, "Set diagnostic debug on") cmd.Flags().BoolVarP(&distinct, "distinct", "D", false, "select distinct values from start table") cmd.Flags().StringVarP(&filefilter, "filter-from-file", "F", "", "Use file to filter start table") - cmd.Flags().BoolVarP(&scann, "scann", "s", false, "read all rows from start table and apply filter `filter-from-file` in memory") + cmd.Flags().BoolVarP(&scan, "scan", "s", false, "read all rows from start table and apply filter `filter-from-file` in memory") cmd.Flags().StringVarP(&fileexclude, "exclude-from-file", "X", "", "Use file to filter out start table") cmd.Flags().StringVarP(&table, "table", "t", "", "pull content of table without relations instead of ingress descriptor definition") cmd.Flags().StringVarP(&where, "where", "w", "", "Advanced SQL where clause to filter") diff --git a/internal/infra/pull/keystore_json_test.go b/internal/infra/pull/keystore_json_test.go new file mode 100644 index 00000000..b3ab8108 --- /dev/null +++ b/internal/infra/pull/keystore_json_test.go @@ -0,0 +1,58 @@ +// Copyright (C) 2023 CGI France +// +// This file is part of LINO. +// +// LINO is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// LINO is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with LINO. If not, see . + +package pull + +import ( + "bytes" + "strings" + "testing" + + "github.com/cgi-fr/lino/pkg/pull" + "github.com/stretchr/testify/assert" +) + +func TestJSONKeyStore(t *testing.T) { + var buffer bytes.Buffer + buffer.WriteString(`{"ID_INDIVIDU":1000006456,"ID_LANGUE":472355}`) + + ks, err := NewJSONKeyStore(&buffer, []string{"ID_INDIVIDU", "ID_LANGUE"}) + + assert.Nil(t, err) + assert.True(t, ks.Has(pull.Row{"ID_INDIVIDU": 1000006456, "ID_LANGUE": 472355})) + + assert.True(t, ks.Has(pull.Row{"ID_INDIVIDU": 1000006456, "ID_LANGUE": 472355, "OTHER": 42})) + + assert.False(t, ks.Has(pull.Row{"ID_INDIVIDU": 1000008957, "ID_LANGUE": 472355})) + assert.False(t, ks.Has(pull.Row{"ID_INDIVIDU": 1000006456, "ID_LANGUE": 472354, "OTHER": 42})) + assert.False(t, ks.Has(pull.Row{"ID_INDIVIDU": 1000006456, "OTHER": 42})) +} + +func TestJSONKeyStoreFromJSON(t *testing.T) { + ks, err := NewJSONKeyStore(strings.NewReader( + `{"ID_INDIVIDU":1000006456,"ID_LANGUE":472355}`), + []string{"ID_INDIVIDU", "ID_LANGUE"}) + + assert.Nil(t, err) + + jrr := NewJSONRowReader(strings.NewReader(`{"ID_INDIVIDU":1000006456,"ID_LANGUE":472355,"CODE_LANGUE":"AN","NIVEAU":"2","COMPLEMENT":null,"JSON_LIAISON":null,"ORIGINE_DONNEE":"MIGAUDEPP","VISIBILITE_DONNEE":"C"}`)) + + assert.True(t, jrr.Next()) + + row := jrr.Value() + assert.True(t, ks.Has(row)) +} diff --git a/pkg/pull/driver.go b/pkg/pull/driver.go index ce67890d..34c5df88 100755 --- a/pkg/pull/driver.go +++ b/pkg/pull/driver.go @@ -78,7 +78,6 @@ func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filter Reset() filters := []Filter{} - // If scann is on all rows are pulled and filter after if filterCohort != nil { for filterCohort.Next() { fc := filterCohort.Value() @@ -105,6 +104,7 @@ func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filter Distinct: filter.Distinct, }) } + log.Trace().Interface("included", included).Msg("filtrer") for _, f := range filters { IncFiltersCount() @@ -116,12 +116,19 @@ func (p *puller) Pull(start Table, filter Filter, selectColumns []string, filter for reader.Next() { IncLinesPerStepCount(string(start.Name)) row := start.export(reader.Value()) - + log.Trace().Interface("row", row).Msg("read from DB") + row_keys := extract(row, start.Keys) + log.Trace().Interface("row_keys", row_keys).Msg("read from DB extract keys") + log.Trace().Interface("included", included).Msg("incl") + log.Trace().Interface("excluded", excluded).Msg("excluded") if excluded != nil && excluded.Has(extract(row, start.Keys)) { + log.Trace().Interface("row", row).Msg("in excluded") continue } if included != nil && !included.Has(extract(row, start.Keys)) { + log.Trace().Interface("row", row).Msg("not in included") + continue } diff --git a/tests/suites/pull/pull_with_file_filters.yml b/tests/suites/pull/pull_with_file_filters.yml index 2d28e1c7..2aa32d11 100644 --- a/tests/suites/pull/pull_with_file_filters.yml +++ b/tests/suites/pull/pull_with_file_filters.yml @@ -38,11 +38,11 @@ testcases: - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":13,"create_date":"2006-02-14T00:00:00Z","customer_id":9,"email":"MARGARET.MOORE@sakilacustomer.org","first_name":"MARGARET","last_name":"MOORE","last_update":"2006-02-15T09:57:20Z","store_id":2} - result.systemerr ShouldBeEmpty - - name: pull one value scann mode + - name: pull one value scan mode steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"customer_id":9}' > customer_filter.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl + - script: lino pull source --scan --filter-from-file customer_filter.jsonl -l 0 assertions: - result.code ShouldEqual 0 - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":13,"create_date":"2006-02-14T00:00:00Z","customer_id":9,"email":"MARGARET.MOORE@sakilacustomer.org","first_name":"MARGARET","last_name":"MOORE","last_update":"2006-02-15T09:57:20Z","store_id":2} @@ -57,11 +57,11 @@ testcases: - result.code ShouldEqual 1 - result.systemout ShouldBeEmpty - - name: pull error value scann mode should not be detected + - name: pull error value scan mode should not be detected steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"no_column_id":9}' > customer_filter.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl + - script: lino pull source --scan --filter-from-file customer_filter.jsonl assertions: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty @@ -76,11 +76,11 @@ testcases: - result.systemout ShouldBeEmpty - result.systemerr ShouldBeEmpty - - name: pull no value scann mode + - name: pull no value scan mode steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"customer_id":-1}' > customer_filter.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl + - script: lino pull source --scan --filter-from-file customer_filter.jsonl assertions: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty @@ -96,16 +96,6 @@ testcases: - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1} - result.systemerr ShouldBeEmpty - - name: pull many value with implicit limit to 1 scann mode - steps: - - script: sed -i "s/true/false/g" ingress-descriptor.yaml - - script: echo '{"active":1}' > customer_filter.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl - assertions: - - result.code ShouldEqual 0 - - result.systemout ShouldEqual {"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1} - - result.systemerr ShouldBeEmpty - - name: pull many values with explicit limit to 2 steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -121,20 +111,15 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty - - name: pull many values with explicit limit to 2 scann mode + - name: pull many values with explicit limit to 2 in scan mode is not garanted steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"active":1}' > customer_filter.jsonl - - script: echo '{"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl - - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl --limit 2 > actual.jsonl - assertions: - - result.code ShouldEqual 0 - - result.systemerr ShouldBeEmpty - - script: diff expected.jsonl actual.jsonl + - script: lino pull source --scan --filter-from-file customer_filter.jsonl --limit 2 > actual.jsonl assertions: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty + - result.systemerr ShouldBeEmpty - name: pull values with reversed order steps: @@ -151,13 +136,13 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty - - name: pull values with reversed order not respected in scann mode + - name: pull values with reversed order not respected in scan mode steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"customer_id":2}\n{"customer_id":1}' > customer_filter.jsonl - script: echo '{"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl - - script: lino pull source scann --filter-from-file customer_filter.jsonl --limit 2 > actual.jsonl + - script: lino pull source --scan --filter-from-file customer_filter.jsonl --limit 0 > actual.jsonl assertions: - result.code ShouldEqual 0 - result.systemerr ShouldBeEmpty @@ -178,18 +163,6 @@ testcases: result.systemout ShouldEqual "1 0" - - name: pull values with aditional filter scann mode - steps: - - script: sed -i "s/true/false/g" ingress-descriptor.yaml - - script: echo '{"active":1}\n{"active":0}' > customer_filter.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl --filter store_id=2 | jq .active - assertions: - - result.code ShouldEqual 0 - - result.systemerr ShouldBeEmpty - - | - result.systemout ShouldEqual "1 - 0" - - name: pull values with override filter steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml @@ -205,19 +178,16 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty - - name: pull values with override filter scann mode + - name: pull values with override filter scan mode steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"customer_id":1}\n{"customer_id":3}' > customer_filter.jsonl - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl - - script: lino pull source --scann --filter-from-file customer_filter.jsonl --filter customer_id=2 > actual.jsonl + - script: lino pull source --scan --filter-from-file customer_filter.jsonl --filter customer_id=2 > actual.jsonl assertions: - result.code ShouldEqual 0 - result.systemerr ShouldBeEmpty - - script: diff expected.jsonl actual.jsonl - assertions: - - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty - name: pull values with filter from stdin @@ -235,13 +205,13 @@ testcases: - result.code ShouldEqual 0 - result.systemout ShouldBeEmpty - - name: pull values with filter scann mode from stdin + - name: pull values with filter scan mode from stdin steps: - script: sed -i "s/true/false/g" ingress-descriptor.yaml - script: echo '{"customer_id":1}\n{"customer_id":2}' > customer_filter.jsonl - script: echo '{"active":1,"activebool":true,"address_id":5,"create_date":"2006-02-14T00:00:00Z","customer_id":1,"email":"MARY.SMITH@sakilacustomer.org","first_name":"MARY","last_name":"SMITH","last_update":"2006-02-15T09:57:20Z","store_id":1}' > expected.jsonl - script: echo '{"active":1,"activebool":true,"address_id":6,"create_date":"2006-02-14T00:00:00Z","customer_id":2,"email":"PATRICIA.JOHNSON@sakilacustomer.org","first_name":"PATRICIA","last_name":"JOHNSON","last_update":"2006-02-15T09:57:20Z","store_id":1}' >> expected.jsonl - - script: lino pull source --scann --filter-from-file - < customer_filter.jsonl > actual.jsonl + - script: lino pull source --scan --filter-from-file - -l 0 < customer_filter.jsonl > actual.jsonl assertions: - result.code ShouldEqual 0 - result.systemerr ShouldBeEmpty