Skip to content

Commit 3c24865

Browse files
authored
feat: autotruncate (#261)
* feat(autotruncate): add venom test * feat(autotruncate): implement autotruncate * feat(autotruncate): read autotruncate flag * feat(autotruncate): add byte based truncate mode * feat(autotruncate): add flags to add-column command * feat(autotruncate): fix truncate in bytes * feat(autotruncate): docs * fix(table): export defaults to make push work * fix(table): export defaults to make push work
1 parent b367c39 commit 3c24865

File tree

16 files changed

+325
-68
lines changed

16 files changed

+325
-68
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ Types of changes
1919
- `Added` columns information and export type using the `lino table extract` command, columns and keys organized according to the database order.
2020
- `Added` flag `--only-tables` to `lino table extract` command. This flag allows for the extraction of table information exclusively, excluding columns. It has been included to maintain the previous behavior.
2121
- `Added` flag `--with-db-infos` to `lino table extract` command. This flag enables the extraction of information regarding column types, length, size, and precision if the column has been configured with these specifications.
22+
- `Added` flag `--autotruncate` to `lino push` command. This flag will enable a truncate on each value based each `dbinfo`.`length` parameters set in the table.yaml file for each columns.
23+
- `Added` property `dbinfo`.`bytes` to column definition in table.yaml file. Set it to true to truncate the value based on a maximum number of bytes and not characters (assuming utf-8 encoding for now).
24+
- `Added` flags `--max-length` and `--bytes` to `lino table add-column` command. Use it to edit the properties `dbinfo`.`length` and `dbinfo`.`bytes` of the table.yaml file.
2225

2326
## [2.6.1]
2427

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,36 @@ Each line is a filter and `lino` apply it to the start table to extract data.
343343
344344
The `push` sub-command import a **json** line stream (jsonline format http://jsonlines.org/) in each table, following the ingress descriptor defined in current directory.
345345
346+
### Autotruncate values
347+
348+
Use the `autotruncate` flag to automatically truncate string values that overflows the maximum length accepted by the database.
349+
350+
```
351+
$ lino push truncate dest --table actor --autotruncate < actors.jsonl
352+
```
353+
354+
LINO will truncate each value based each `dbinfo`.`length` parameters set in the table.yaml file for each columns.
355+
356+
Additionnaly, if your database maximum value is not defined in number of characters but in number of bytes, set the `dbinfo`.`bytes` to true. LINO will truncate the value based on a maximum number of bytes and not characters (assuming utf-8 encoding for now).
357+
358+
```yaml
359+
version: v1
360+
tables:
361+
- name: actor
362+
keys:
363+
- actor_id
364+
columns:
365+
- name: actor_id
366+
dbinfo:
367+
type: INT4
368+
- name: first_name
369+
export: string
370+
dbinfo:
371+
type: VARCHAR
372+
length: 45
373+
bytes: true
374+
```
375+
346376
### How to update primary key
347377

348378
Let's say you have this record in database :

internal/app/push/cli.go

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra
8080
pkTranslations map[string]string
8181
whereField string
8282
savepoint string
83+
autoTruncate bool
8384
)
8485

8586
cmd := &cobra.Command{
@@ -127,7 +128,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra
127128
os.Exit(1)
128129
}
129130

130-
plan, e2 := getPlan(idStorageFactory(table, ingressDescriptor))
131+
plan, e2 := getPlan(idStorageFactory(table, ingressDescriptor), autoTruncate)
131132
if e2 != nil {
132133
fmt.Fprintln(err, e2.Error())
133134
os.Exit(2)
@@ -151,7 +152,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra
151152
os.Exit(1)
152153
}
153154

154-
e3 := push.Push(rowIteratorFactory(in), datadestination, plan, mode, commitSize, disableConstraints, rowExporter, translator, whereField, savepoint)
155+
e3 := push.Push(rowIteratorFactory(in), datadestination, plan, mode, commitSize, disableConstraints, rowExporter, translator, whereField, savepoint, autoTruncate)
155156
if e3 != nil {
156157
log.Fatal().AnErr("error", e3).Msg("Fatal error stop the push command")
157158
os.Exit(1)
@@ -172,6 +173,7 @@ func NewCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra
172173
cmd.Flags().StringToStringVar(&pkTranslations, "pk-translation", map[string]string{}, "list of dictionaries old value / new value for primary key update")
173174
cmd.Flags().StringVar(&whereField, "using-pk-field", "__usingpk__", "Name of the data field that can be used as pk for update queries")
174175
cmd.Flags().StringVar(&savepoint, "savepoint", "", "Name of a file to write primary keys of effectively processed lines (commit to database)")
176+
cmd.Flags().BoolVarP(&autoTruncate, "autotruncate", "a", false, "Automatically truncate values to the maximum length defined in table.yaml")
175177
cmd.SetOut(out)
176178
cmd.SetErr(err)
177179
cmd.SetIn(in)
@@ -241,7 +243,7 @@ func getDataDestination(dataconnectorName string) (push.DataDestination, *push.E
241243
return datadestinationFactory.New(u.URL.String(), alias.Schema), nil
242244
}
243245

244-
func getPlan(idStorage id.Storage) (push.Plan, *push.Error) {
246+
func getPlan(idStorage id.Storage, autoTruncate bool) (push.Plan, *push.Error) {
245247
id, err1 := idStorage.Read()
246248
if err1 != nil {
247249
return nil, &push.Error{Description: err1.Error()}
@@ -274,7 +276,7 @@ func getPlan(idStorage id.Storage) (push.Plan, *push.Error) {
274276
pushtmap: map[string]push.Table{},
275277
}
276278

277-
return converter.getPlan(id), nil
279+
return converter.getPlan(id, autoTruncate), nil
278280
}
279281

280282
type idToPushConverter struct {
@@ -285,7 +287,7 @@ type idToPushConverter struct {
285287
pushtmap map[string]push.Table
286288
}
287289

288-
func (c idToPushConverter) getTable(name string) push.Table {
290+
func (c idToPushConverter) getTable(name string, autoTruncate bool) push.Table {
289291
if pushtable, ok := c.pushtmap[name]; ok {
290292
return pushtable
291293
}
@@ -300,13 +302,13 @@ func (c idToPushConverter) getTable(name string) push.Table {
300302

301303
columns := []push.Column{}
302304
for _, col := range table.Columns {
303-
columns = append(columns, push.NewColumn(col.Name, col.Export, col.Import))
305+
columns = append(columns, push.NewColumn(col.Name, col.Export, col.Import, col.DBInfo.Length, col.DBInfo.ByteBased, autoTruncate))
304306
}
305307

306308
return push.NewTable(table.Name, table.Keys, push.NewColumnList(columns))
307309
}
308310

309-
func (c idToPushConverter) getRelation(name string) push.Relation {
311+
func (c idToPushConverter) getRelation(name string, autoTruncate bool) push.Relation {
310312
if pushrelation, ok := c.pushrmap[name]; ok {
311313
return pushrelation
312314
}
@@ -321,12 +323,12 @@ func (c idToPushConverter) getRelation(name string) push.Relation {
321323

322324
return push.NewRelation(
323325
relation.Name,
324-
c.getTable(relation.Parent.Name),
325-
c.getTable(relation.Child.Name),
326+
c.getTable(relation.Parent.Name, autoTruncate),
327+
c.getTable(relation.Child.Name, autoTruncate),
326328
)
327329
}
328330

329-
func (c idToPushConverter) getPlan(idesc id.IngressDescriptor) push.Plan {
331+
func (c idToPushConverter) getPlan(idesc id.IngressDescriptor, autoTruncate bool) push.Plan {
330332
relations := []push.Relation{}
331333

332334
activeTables, err := id.GetActiveTables(idesc)
@@ -338,9 +340,9 @@ func (c idToPushConverter) getPlan(idesc id.IngressDescriptor) push.Plan {
338340
rel := idesc.Relations().Relation(idx)
339341
if (activeTables.Contains(rel.Child().Name()) && rel.LookUpChild()) ||
340342
(activeTables.Contains(rel.Parent().Name()) && rel.LookUpParent()) {
341-
relations = append(relations, c.getRelation(rel.Name()))
343+
relations = append(relations, c.getRelation(rel.Name(), autoTruncate))
342344
}
343345
}
344346

345-
return push.NewPlan(c.getTable(idesc.StartTable().Name()), relations)
347+
return push.NewPlan(c.getTable(idesc.StartTable().Name(), autoTruncate), relations)
346348
}

internal/app/push/http.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,23 @@ func Handler(w http.ResponseWriter, r *http.Request, mode push.Mode, ingressDesc
8181
return
8282
}
8383

84-
plan, e2 := getPlan(idStorageFactory(query.Get("table"), ingressDescriptor))
84+
autoTruncate := false
85+
if query.Get("auto-truncate") != "" {
86+
var err error
87+
autoTruncate, err = strconv.ParseBool(query.Get("auto-truncate"))
88+
if err != nil {
89+
log.Error().Err(err).Msg("can't parse auto-truncate")
90+
w.WriteHeader(http.StatusBadRequest)
91+
_, ew := w.Write([]byte("{\"error\" : \"param auto-truncate must be a boolean\"}\n"))
92+
if ew != nil {
93+
log.Error().Err(ew).Msg("Write failed")
94+
return
95+
}
96+
return
97+
}
98+
}
99+
100+
plan, e2 := getPlan(idStorageFactory(query.Get("table"), ingressDescriptor), autoTruncate)
85101
if e2 != nil {
86102
log.Error().Err(e2).Msg("")
87103
w.WriteHeader(http.StatusNotFound)
@@ -130,7 +146,7 @@ func Handler(w http.ResponseWriter, r *http.Request, mode push.Mode, ingressDesc
130146

131147
log.Debug().Msg(fmt.Sprintf("call Push with mode %s", mode))
132148

133-
e3 := push.Push(rowIteratorFactory(r.Body), datadestination, plan, mode, commitSize, disableConstraints, push.NoErrorCaptureRowWriter{}, nil, query.Get("using-pk-field"), "")
149+
e3 := push.Push(rowIteratorFactory(r.Body), datadestination, plan, mode, commitSize, disableConstraints, push.NoErrorCaptureRowWriter{}, nil, query.Get("using-pk-field"), "", false)
134150
if e3 != nil {
135151
log.Error().Err(e3).Msg("")
136152
w.WriteHeader(http.StatusNotFound)

internal/app/table/add-columns.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ import (
2929
func newAddColumnCommand(fullName string, err *os.File, out *os.File, in *os.File) *cobra.Command {
3030
// local flags
3131
var exportType, importType string
32+
var maxLength int64
33+
var byteBased bool
3234

3335
cmd := &cobra.Command{
3436
Use: "add-column [Table Name] [Column Name]",
@@ -40,7 +42,7 @@ func newAddColumnCommand(fullName string, err *os.File, out *os.File, in *os.Fil
4042
tableName := args[0]
4143
columnName := args[1]
4244

43-
_, e1 := table.AddOrUpdateColumn(tableStorage, tableName, columnName, exportType, importType)
45+
_, e1 := table.AddOrUpdateColumn(tableStorage, tableName, columnName, exportType, importType, maxLength, byteBased)
4446
if e1 != nil {
4547
fmt.Fprintln(err, e1.Description)
4648
os.Exit(1)
@@ -54,5 +56,7 @@ func newAddColumnCommand(fullName string, err *os.File, out *os.File, in *os.Fil
5456
cmd.SetIn(in)
5557
cmd.Flags().StringVarP(&exportType, "export", "e", "", "export type for the column")
5658
cmd.Flags().StringVarP(&importType, "import", "i", "", "import type for the column")
59+
cmd.Flags().Int64VarP(&maxLength, "max-length", "l", 0, "set optional maximum length for this column that can be used with --autotruncate flag on push")
60+
cmd.Flags().BoolVarP(&byteBased, "bytes", "b", false, "maximum length is expressed in bytes, not in characters")
5761
return cmd
5862
}

internal/infra/table/extractor_postgres.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,19 +79,19 @@ func (d PostgresDialect) GetExportType(dbtype string) (string, bool) {
7979
return "string", true
8080
// Numeric types
8181
case "NUMERIC", "DECIMAL", "FLOAT", "REAL", "DOUBLE PRECISION", "MONEY", "INTEGER", "BIGINT",
82-
"NUMBER", "BINARY_FLOAT", "BINARY_DOUBLE", "INT", "TINYINT", "SMALLINT", "MEDIUMINT":
82+
"NUMBER", "BINARY_FLOAT", "BINARY_DOUBLE", "INT", "TINYINT", "SMALLINT", "MEDIUMINT", "INT4", "INT2", "BOOL":
8383
return "numeric", true
8484
// Timestamp types
8585
case "TIMESTAMP", "TIMESTAMPTZ",
8686
"TIMESTAMP WITH TIME ZONE", "TIMESTAMP WITH LOCAL TIME ZONE":
87-
return "timestamp", true
87+
return "datetime", true // export timestamps to datetime is the only option that works well with lino push
8888
// Datetime types
8989
case "DATE", "DATETIME2", "SMALLDATETIME", "DATETIME":
9090
return "datetime", true
9191
// Base64 types
9292
case "BYTEA", "BLOB":
9393
return "base64", true
9494
default:
95-
return "", false
95+
return "string", true // default to export string since it will work most of the time (binary types are already handled)
9696
}
9797
}

internal/infra/table/storage_yaml.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type YAMLDBInfo struct {
5656
Length int64 `yaml:"length,omitempty"`
5757
Size int64 `yaml:"size,omitempty"`
5858
Precision int64 `yaml:"precision,omitempty"`
59+
ByteBased bool `yaml:"bytes,omitempty"`
5960
}
6061

6162
// YAMLStorage provides storage in a local YAML file

pkg/push/driver.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import (
2626
)
2727

2828
// Push write rows to target table
29-
func Push(ri RowIterator, destination DataDestination, plan Plan, mode Mode, commitSize uint, disableConstraints bool, catchError RowWriter, translator Translator, whereField string, savepointPath string) (err *Error) {
29+
func Push(ri RowIterator, destination DataDestination, plan Plan, mode Mode, commitSize uint, disableConstraints bool, catchError RowWriter, translator Translator, whereField string, savepointPath string, autotruncate bool) (err *Error) {
3030
err1 := destination.Open(plan, mode, disableConstraints)
3131
if err1 != nil {
3232
return err1

pkg/push/driver_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func TestSimplePush(t *testing.T) {
4949
}
5050
dest := memoryDataDestination{tables, false, false, false}
5151

52-
err := push.Push(&ri, &dest, plan, push.Insert, 2, true, push.NoErrorCaptureRowWriter{}, nil, "", "")
52+
err := push.Push(&ri, &dest, plan, push.Insert, 2, true, push.NoErrorCaptureRowWriter{}, nil, "", "", false)
5353

5454
assert.Nil(t, err)
5555
assert.Equal(t, true, dest.closed)
@@ -88,7 +88,7 @@ func TestRelationPush(t *testing.T) {
8888
}
8989
dest := memoryDataDestination{tables, false, false, false}
9090

91-
err := push.Push(&ri, &dest, plan, push.Insert, 2, true, push.NoErrorCaptureRowWriter{}, nil, "", "")
91+
err := push.Push(&ri, &dest, plan, push.Insert, 2, true, push.NoErrorCaptureRowWriter{}, nil, "", "", false)
9292

9393
// no error
9494
assert.Nil(t, err)
@@ -137,7 +137,7 @@ func TestRelationPushWithEmptyRelation(t *testing.T) {
137137
}
138138
dest := memoryDataDestination{tables, false, false, false}
139139

140-
err := push.Push(&ri, &dest, plan, push.Insert, 2, true, push.NoErrorCaptureRowWriter{}, nil, "", "")
140+
err := push.Push(&ri, &dest, plan, push.Insert, 2, true, push.NoErrorCaptureRowWriter{}, nil, "", "", false)
141141

142142
// no error
143143
assert.Nil(t, err)
@@ -188,7 +188,7 @@ func TestInversseRelationPush(t *testing.T) {
188188
}
189189
dest := memoryDataDestination{tables, false, false, false}
190190

191-
err := push.Push(&ri, &dest, plan, push.Insert, 5, true, push.NoErrorCaptureRowWriter{}, nil, "", "")
191+
err := push.Push(&ri, &dest, plan, push.Insert, 5, true, push.NoErrorCaptureRowWriter{}, nil, "", "", false)
192192

193193
// no error
194194
assert.Nil(t, err)

pkg/push/model.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ type Column interface {
4545
Name() string
4646
Export() string
4747
Import() string
48+
Length() int64
49+
LengthInBytes() bool
50+
Truncate() bool
4851
}
4952

5053
// Plan describe how to push data

0 commit comments

Comments
 (0)