Escape string and blob results from dump more correctly

dumpTables currently badly handles BLOB and TEXT data containing control
characters:

* MySQL will interpret and unescape string literals e.g.`\r` will become
carriage return.

* Postgres will not allow string literals to contain NUL nor will
SQLite so BLOBs will not dump correctly.

* Schemas should not be set on the destination dump

* MSSQL needs the N prefix to correctly ensure that UTF-8 data is
correctly transferred.

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
Andrew Thornton 2022-01-03 21:55:07 +00:00
parent 57365108ae
commit 7dbf688ae9
No known key found for this signature in database
GPG Key ID: 3CDE74631F13A748
1 changed files with 140 additions and 1 deletions

141
engine.go
View File

@ -11,7 +11,9 @@ import (
"io"
"os"
"reflect"
"regexp"
"runtime"
"strconv"
"strings"
"time"
@ -450,6 +452,8 @@ func formatBool(s string, dstDialect dialects.Dialect) string {
return s
}
var controlCharactersRe = regexp.MustCompile(`[\x00-\x1f\x7f]+`)
// dumpTables dump database all table structs and data to w with specify db type
func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w io.Writer, tp ...schemas.DBType) error {
var dstDialect dialects.Dialect
@ -465,7 +469,7 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w
destURI := dialects.URI{
DBType: tp[0],
DBName: uri.DBName,
Schema: uri.Schema,
// DO NOT SET SCHEMA HERE
}
if err := dstDialect.Init(&destURI); err != nil {
return err
@ -480,6 +484,13 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w
return err
}
if dstDialect.URI().DBType == schemas.MYSQL {
// For MySQL set NO_BACKLASH_ESCAPES so that strings work properly
if _, err := io.WriteString(w, "SET sql_mode='NO_BACKSLASH_ESCAPES';\n"); err != nil {
return err
}
}
for i, table := range tables {
dstTable := table
if table.Type != nil {
@ -594,7 +605,135 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w
if _, err = io.WriteString(w, "'"+r+"'"); err != nil {
return err
}
} else if len(s.String) == 0 {
if _, err := io.WriteString(w, "''"); err != nil {
return err
}
} else if dstDialect.URI().DBType == schemas.POSTGRES {
if dstTable.Columns()[i].SQLType.IsBlob() {
// Postgres has the escape format and we should use that for bytea data
if _, err := fmt.Fprintf(w, "'\\x%x'", s.String); err != nil {
return err
}
} else {
// Postgres concatentates strings using || (NOTE: a NUL byte in a text segment will fail)
toCheck := strings.ReplaceAll(s.String, "'", "''")
for len(toCheck) > 0 {
loc := controlCharactersRe.FindStringIndex(toCheck)
if loc == nil {
if _, err := io.WriteString(w, "'"+toCheck+"'"); err != nil {
return err
}
break
}
if loc[0] > 0 {
if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"' || "); err != nil {
return err
}
}
if _, err := io.WriteString(w, "e'"); err != nil {
return err
}
for i := loc[0]; i < loc[1]; i++ {
if _, err := fmt.Fprintf(w, "\\x%0x", toCheck[i]); err != nil {
return err
}
}
toCheck = toCheck[loc[1]:]
if len(toCheck) > 0 {
if _, err := io.WriteString(w, "' || "); err != nil {
return err
}
} else {
if _, err := io.WriteString(w, "'"); err != nil {
return err
}
}
}
}
} else if dstDialect.URI().DBType == schemas.MYSQL {
loc := controlCharactersRe.FindStringIndex(s.String)
if loc == nil {
if _, err := io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil {
return err
}
} else {
if _, err := io.WriteString(w, "CONCAT("); err != nil {
return err
}
toCheck := strings.ReplaceAll(s.String, "'", "''")
for len(toCheck) > 0 {
loc := controlCharactersRe.FindStringIndex(toCheck)
if loc == nil {
if _, err := io.WriteString(w, "'"+toCheck+"')"); err != nil {
return err
}
break
}
if loc[0] > 0 {
if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"', "); err != nil {
return err
}
}
for i := loc[0]; i < loc[1]-1; i++ {
if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(toCheck[i]))+"), "); err != nil {
return err
}
}
char := toCheck[loc[1]-1]
toCheck = toCheck[loc[1]:]
if len(toCheck) > 0 {
if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"), "); err != nil {
return err
}
} else {
if _, err = io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"))"); err != nil {
return err
}
}
}
}
} else if dstDialect.URI().DBType == schemas.SQLITE {
if dstTable.Columns()[i].SQLType.IsBlob() {
// SQLite has its escape format
if _, err := fmt.Fprintf(w, "X'%x'", s.String); err != nil {
return err
}
} else {
// SQLite concatentates strings using || (NOTE: a NUL byte in a text segment will fail)
toCheck := strings.ReplaceAll(s.String, "'", "''")
for len(toCheck) > 0 {
loc := controlCharactersRe.FindStringIndex(toCheck)
if loc == nil {
if _, err := io.WriteString(w, "'"+toCheck+"'"); err != nil {
return err
}
break
}
if loc[0] > 0 {
if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"' || "); err != nil {
return err
}
}
if _, err := fmt.Fprintf(w, "X'%x'", toCheck[loc[0]:loc[1]]); err != nil {
return err
}
toCheck = toCheck[loc[1]:]
if len(toCheck) > 0 {
if _, err := io.WriteString(w, " || "); err != nil {
return err
}
}
}
}
} else {
// In MSSQL we have to use NChar format to get unicode strings.
if dstDialect.URI().DBType == schemas.MSSQL && dstTable.Columns()[i].SQLType.IsText() {
if _, err = io.WriteString(w, "N"); err != nil {
return err
}
}
if _, err = io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil {
return err
}