From cd36b112ae482f4fa9f0c590e103e32211a2ad80 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 7 Jan 2022 16:04:01 +0800 Subject: [PATCH] Escape string and blob results from dump more correctly (#2091) dumpTables currently badly handles BLOB and TEXT data containing control characters: * MySQL will interpret and unescape string literals e.g.`\r` will become carriage return. * Postgres will not allow string literals to contain NUL nor will SQLite so BLOBs will not dump correctly. * Schemas should not be set on the destination dump * MSSQL needs the N prefix to correctly ensure that UTF-8 data is correctly transferred. Signed-off-by: Andrew Thornton Co-authored-by: Lunny Xiao Reviewed-on: https://gitea.com/xorm/xorm/pulls/2091 Reviewed-by: Lunny Xiao Co-authored-by: Andrew Thornton Co-committed-by: Andrew Thornton --- engine.go | 191 +++++++++++++++++++++++++++++++++++- integrations/engine_test.go | 13 ++- 2 files changed, 199 insertions(+), 5 deletions(-) diff --git a/engine.go b/engine.go index 1257de20..b7dcf5a2 100644 --- a/engine.go +++ b/engine.go @@ -11,6 +11,7 @@ import ( "io" "os" "reflect" + "regexp" "runtime" "strconv" "strings" @@ -449,6 +450,8 @@ func formatBool(s bool, dstDialect dialects.Dialect) string { return strconv.FormatBool(s) } +var controlCharactersRe = regexp.MustCompile(`[\x00-\x1f\x7f]+`) + // dumpTables dump database all table structs and data to w with specify db type func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w io.Writer, tp ...schemas.DBType) error { var dstDialect dialects.Dialect @@ -464,7 +467,10 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w destURI := dialects.URI{ DBType: tp[0], DBName: uri.DBName, - Schema: uri.Schema, + // DO NOT SET SCHEMA HERE + } + if tp[0] == schemas.POSTGRES { + destURI.Schema = engine.dialect.URI().Schema } if err := dstDialect.Init(&destURI); err != nil { return err @@ -479,6 +485,13 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w return err } + if dstDialect.URI().DBType == schemas.MYSQL { + // For MySQL set NO_BACKLASH_ESCAPES so that strings work properly + if _, err := io.WriteString(w, "SET sql_mode='NO_BACKSLASH_ESCAPES';\n"); err != nil { + return err + } + } + for i, table := range tables { dstTable := table if table.Type != nil { @@ -598,6 +611,182 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w if _, err = io.WriteString(w, "'"+r+"'"); err != nil { return err } + } else if len(s.String) == 0 { + if _, err := io.WriteString(w, "''"); err != nil { + return err + } + } else if dstDialect.URI().DBType == schemas.POSTGRES { + if dstTable.Columns()[i].SQLType.IsBlob() { + // Postgres has the escape format and we should use that for bytea data + if _, err := fmt.Fprintf(w, "'\\x%x'", s.String); err != nil { + return err + } + } else { + // Postgres concatentates strings using || (NOTE: a NUL byte in a text segment will fail) + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"'"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"' || "); err != nil { + return err + } + } + if _, err := io.WriteString(w, "e'"); err != nil { + return err + } + for i := loc[0]; i < loc[1]; i++ { + if _, err := fmt.Fprintf(w, "\\x%02x", toCheck[i]); err != nil { + return err + } + } + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, "' || "); err != nil { + return err + } + } else { + if _, err := io.WriteString(w, "'"); err != nil { + return err + } + } + } + } + } else if dstDialect.URI().DBType == schemas.MYSQL { + loc := controlCharactersRe.FindStringIndex(s.String) + if loc == nil { + if _, err := io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { + return err + } + } else { + if _, err := io.WriteString(w, "CONCAT("); err != nil { + return err + } + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"')"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"', "); err != nil { + return err + } + } + for i := loc[0]; i < loc[1]-1; i++ { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(toCheck[i]))+"), "); err != nil { + return err + } + } + char := toCheck[loc[1]-1] + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"), "); err != nil { + return err + } + } else { + if _, err = io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"))"); err != nil { + return err + } + } + } + } + } else if dstDialect.URI().DBType == schemas.SQLITE { + if dstTable.Columns()[i].SQLType.IsBlob() { + // SQLite has its escape format + if _, err := fmt.Fprintf(w, "X'%x'", s.String); err != nil { + return err + } + } else { + // SQLite concatentates strings using || (NOTE: a NUL byte in a text segment will fail) + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"'"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"' || "); err != nil { + return err + } + } + if _, err := fmt.Fprintf(w, "X'%x'", toCheck[loc[0]:loc[1]]); err != nil { + return err + } + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, " || "); err != nil { + return err + } + } + } + } + } else if dstDialect.URI().DBType == schemas.DAMENG || dstDialect.URI().DBType == schemas.ORACLE { + if dstTable.Columns()[i].SQLType.IsBlob() { + // ORACLE/DAMENG uses HEXTORAW + if _, err := fmt.Fprintf(w, "HEXTORAW('%x')", s.String); err != nil { + return err + } + } else { + // ORACLE/DAMENG concatentates strings in multiple ways but uses CHAR and has CONCAT + // (NOTE: a NUL byte in a text segment will fail) + if _, err := io.WriteString(w, "CONCAT("); err != nil { + return err + } + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"')"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"', "); err != nil { + return err + } + } + for i := loc[0]; i < loc[1]-1; i++ { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(toCheck[i]))+"), "); err != nil { + return err + } + } + char := toCheck[loc[1]-1] + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"), "); err != nil { + return err + } + } else { + if _, err = io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"))"); err != nil { + return err + } + } + } + } + } else if dstDialect.URI().DBType == schemas.MSSQL { + if dstTable.Columns()[i].SQLType.IsBlob() { + // MSSQL uses CONVERT(VARBINARY(MAX), '0xDEADBEEF', 1) + if _, err := fmt.Fprintf(w, "CONVERT(VARBINARY(MAX), '0x%x', 1)", s.String); err != nil { + return err + } + } else { + if _, err = io.WriteString(w, "N'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { + return err + } + } } else { if _, err = io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { return err diff --git a/integrations/engine_test.go b/integrations/engine_test.go index dbe17571..cdcdd6be 100644 --- a/integrations/engine_test.go +++ b/integrations/engine_test.go @@ -143,6 +143,7 @@ func TestDumpTables(t *testing.T) { type TestDumpTableStruct struct { Id int64 + Data []byte `xorm:"BLOB"` Name string IsMan bool Created time.Time `xorm:"created"` @@ -152,10 +153,14 @@ func TestDumpTables(t *testing.T) { _, err := testEngine.Insert([]TestDumpTableStruct{ {Name: "1", IsMan: true}, - {Name: "2\n"}, - {Name: "3;"}, - {Name: "4\n;\n''"}, - {Name: "5'\n"}, + {Name: "2\n", Data: []byte{'\000', '\001', '\002'}}, + {Name: "3;", Data: []byte("0x000102")}, + {Name: "4\n;\n''", Data: []byte("Help")}, + {Name: "5'\n", Data: []byte("0x48656c70")}, + {Name: "6\\n'\n", Data: []byte("48656c70")}, + {Name: "7\\n'\r\n", Data: []byte("7\\n'\r\n")}, + {Name: "x0809ee"}, + {Name: "090a10"}, }) assert.NoError(t, err)