Commit 961e365a authored by gwenn's avatar gwenn

Improve CSV virtual table module.

One may specify column names/types as module arguments.
parent d6f93349
...@@ -6,7 +6,7 @@ There are two layers: ...@@ -6,7 +6,7 @@ There are two layers:
* one matching the SQLite API (with Backup, Blob, user-defined Function/Module, ...). * one matching the SQLite API (with Backup, Blob, user-defined Function/Module, ...).
* and another implementing the "database/sql/driver" interface. * and another implementing the "database/sql/driver" interface.
[![GoDoc](https://godoc.org/github.com/gwenn/gosqlite?status.png)](https://godoc.org/github.com/gwenn/gosqlite) [![GoDoc](https://godoc.org/github.com/gwenn/gosqlite?status.svg)](https://godoc.org/github.com/gwenn/gosqlite)
[![Build Status][1]][2] [![Build Status][1]][2]
......
...@@ -18,13 +18,20 @@ import ( ...@@ -18,13 +18,20 @@ import (
type csvModule struct { type csvModule struct {
} }
// args[0] => module name
// args[1] => db name
// args[2] => table name
// TODO http://www.ch-werner.de/sqliteodbc/html/csvtable_8c.html make possible to specify the column/type name // TODO http://www.ch-werner.de/sqliteodbc/html/csvtable_8c.html make possible to specify the column/type name
// TODO https://github.com/karbarcca/SQLite.jl & infer // TODO https://github.com/karbarcca/SQLite.jl & infer
// args[0] => module name
// args[1] => db name
// args[2] => table name
// args[3] => filename (maybe quoted: '...')
// args[i>3] :
// - contains HEADER ignoring case => use first line in file as column names or skip first line if NAMES are specified
// - contains NO_QUOTE ignoring case => no double quoted field expected in file
// - single char (;) or quoted char (';') => values separator in file
// - contains NAMES ignoring case => use args[i+1], ... as column names (until _TYPES_)
// - contains TYPES ignoring case => use args[I+1], ... as column types
// Beware, empty args are skipped (..., ,...), use '' empty SQL string instead (..., '', ...).
func (m csvModule) Create(c *Conn, args []string) (VTab, error) { func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
if len(args) < 4 { if len(args) < 4 {
return nil, errors.New("no CSV file specified") return nil, errors.New("no CSV file specified")
...@@ -37,22 +44,38 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) { ...@@ -37,22 +44,38 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
/* if a custom delimiter specified, pull it out */ /* if a custom delimiter specified, pull it out */
var separator byte = ',' var separator byte = ','
/* should the header zRow be used */ /* should the header zRow be used */
useHeaderRow := false header := false
quoted := true quoted := true
guess := true guess := true
var cols, types []string
for i := 4; i < len(args); i++ { for i := 4; i < len(args); i++ {
arg := args[i] arg := args[i]
switch { switch {
case strings.Contains(strings.ToUpper(arg), "HEADER"): case types != nil:
useHeaderRow = true if arg[0] == '\'' {
case strings.Contains(strings.ToUpper(arg), "NO_QUOTE"): arg = arg[1 : len(arg)-1]
quoted = false }
types = append(types, arg)
case cols != nil:
if strings.ToUpper(arg) == "_TYPES_" {
types = make([]string, 0, len(cols))
} else {
cols = append(cols, arg)
}
case len(arg) == 1: case len(arg) == 1:
separator = arg[0] separator = arg[0]
guess = false guess = false
case len(arg) == 3 && arg[0] == '\'': case len(arg) == 3 && arg[0] == '\'':
separator = arg[1] separator = arg[1]
guess = false guess = false
case strings.Contains(strings.ToUpper(arg), "HEADER"):
header = true
case strings.Contains(strings.ToUpper(arg), "NO_QUOTE"):
quoted = false
case strings.Contains(strings.ToUpper(arg), "NAMES"):
cols = make([]string, 0, 10)
case strings.Contains(strings.ToUpper(arg), "TYPES"):
types = make([]string, 0, 10)
} }
} }
/* open the source csv file */ /* open the source csv file */
...@@ -67,15 +90,25 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) { ...@@ -67,15 +90,25 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
vTab.maxColumn = int(c.Limit(LimitColumn)) vTab.maxColumn = int(c.Limit(LimitColumn))
reader := yacr.NewReader(file, separator, quoted, guess) reader := yacr.NewReader(file, separator, quoted, guess)
if useHeaderRow { if header {
reader.Split(vTab.split(reader.ScanField)) reader.Split(vTab.split(reader.ScanField))
} }
if err = vTab.readRow(reader); err != nil || len(vTab.cols) == 0 { if err = vTab.readRow(reader); err != nil {
if err == nil {
err = errors.New("no columns found")
}
return nil, err return nil, err
} }
named := header
if len(cols) > 0 { // headers ignored
// TODO check len(cols) == len(vTab.cols) ?
vTab.cols = cols
named = true
}
if len(vTab.cols) == 0 {
if len(types) == 0 {
return nil, errors.New("no column name/type specified")
}
vTab.cols = types
}
if guess { if guess {
vTab.sep = reader.Sep() vTab.sep = reader.Sep()
} }
...@@ -89,13 +122,17 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) { ...@@ -89,13 +122,17 @@ func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
if i == len(vTab.cols)-1 { if i == len(vTab.cols)-1 {
tail = ");" tail = ");"
} }
if useHeaderRow { colType := ""
if len(types) > i {
colType = " " + types[i]
}
if named {
if len(col) == 0 { if len(col) == 0 {
return nil, errors.New("no column name found") return nil, errors.New("no column name found")
} }
sql = fmt.Sprintf("%s\"%s\"%s", sql, col, tail) sql = fmt.Sprintf("%s\"%s\"%s%s", sql, col, colType, tail)
} else { } else {
sql = fmt.Sprintf("%scol%d%s", sql, i+1, tail) sql = fmt.Sprintf("%scol%d%s%s", sql, i+1, colType, tail)
} }
} }
if err = c.DeclareVTab(sql); err != nil { if err = c.DeclareVTab(sql); err != nil {
...@@ -363,6 +400,7 @@ func (db *Conn) ImportCSV(in io.Reader, ic ImportConfig, dbName, table string) e ...@@ -363,6 +400,7 @@ func (db *Conn) ImportCSV(in io.Reader, ic ImportConfig, dbName, table string) e
return err return err
} }
} }
var sql string var sql string
if len(dbName) == 0 { if len(dbName) == 0 {
sql = fmt.Sprintf(`INSERT INTO "%s" VALUES (?%s)`, escapeQuote(table), strings.Repeat(", ?", nCol-1)) sql = fmt.Sprintf(`INSERT INTO "%s" VALUES (?%s)`, escapeQuote(table), strings.Repeat(", ?", nCol-1))
......
...@@ -45,6 +45,120 @@ func TestCsvModule(t *testing.T) { ...@@ -45,6 +45,120 @@ func TestCsvModule(t *testing.T) {
checkNoError(t, err, "couldn't drop CSV virtual table: %s") checkNoError(t, err, "couldn't drop CSV virtual table: %s")
} }
var csvModuleTests = []struct {
Name string
Args []string
Count int
Names []string
Types []string
Error string
}{
{
Name: "No file",
Args: []string{},
Error: "no CSV file specified",
},
{
Name: "File not found",
Args: []string{"blam.csv"},
Error: "error opening CSV file: 'blam.csv'",
},
{
Name: "No header",
Args: []string{"test.csv"},
Count: 6,
Names: []string{"col1", "col2", "col3"},
Types: []string{"", "", ""},
},
{
Name: "Headers",
Args: []string{"test.csv", "USE_HEADER_ROW"},
Count: 5,
Names: []string{"colA", "colB", "colC"},
Types: []string{"", "", ""},
},
{
Name: "Names",
Args: []string{"test.csv", "COL_NAMES", "C1", "C2", "C3"},
Count: 6,
Names: []string{"C1", "C2", "C3"},
Types: []string{"", "", ""},
},
{
Name: "Names & Headers",
Args: []string{"test.csv", "HEADERS", "COL_NAMES", "C1", "C2", "C3"},
Count: 5,
Names: []string{"C1", "C2", "C3"},
Types: []string{"", "", ""},
},
{
Name: "Types",
Args: []string{"test.csv", "TYPES", "TEXT", "''", "TEXT"},
Names: []string{"col1", "col2", "col3"},
Types: []string{"TEXT", "", "TEXT"},
},
}
func TestCsvModuleArguments(t *testing.T) {
db := open(t)
defer checkClose(db, t)
err := LoadCsvModule(db)
checkNoError(t, err, "couldn't create CSV module: %s")
for _, tt := range csvModuleTests {
ddl := []byte("CREATE VIRTUAL TABLE vtab USING csv(")
for i, arg := range tt.Args {
if i > 0 {
ddl = append(ddl, ", "...)
}
ddl = append(ddl, arg...)
}
ddl = append(ddl, ")"...)
//println("DDL: ", string(ddl))
err = db.Exec(string(ddl))
if tt.Error != "" {
if err == nil || !strings.Contains(err.Error(), tt.Error) {
t.Errorf("%s: error %v, want error %q", tt.Name, err, tt.Error)
}
continue
} else {
checkNoError(t, err, "couldn't create CSV virtual table: %s")
}
if tt.Count > 0 {
var count int
err = db.OneValue("SELECT count(1) FROM vtab", &count)
checkNoError(t, err, "couldn't select from CSV virtual table: %s")
assert.Equalf(t, tt.Count, count, "%s: got %d rows, want %d", tt.Name, count, tt.Count)
}
/*var schema string
err = db.OneValue("SELECT sql FROM sqlite_master WHERE name like ? and type = ?", &schema, "vtab", "table")
checkNoError(t, err, "couldn't get schema of CSV virtual table: %s")
println("SCHEMA:", schema)*/
if len(tt.Names) > 0 {
cols, err := db.Columns("", "vtab")
checkNoError(t, err, "couldn't get columns of CSV virtual table: %s")
assert.Equalf(t, len(tt.Names), len(cols), "%s: got %d columns, want %d", tt.Name, len(cols), len(tt.Names))
for i, col := range cols {
assert.Equalf(t, tt.Names[i], col.Name, "%s: got %s, want %s as column name at %d", tt.Name, col.Name, tt.Names[i], i+1)
}
}
if len(tt.Types) > 0 {
cols, err := db.Columns("", "vtab")
checkNoError(t, err, "couldn't get columns of CSV virtual table: %s")
assert.Equalf(t, len(tt.Types), len(cols), "%s: got %d columns, want %d", tt.Name, len(cols), len(tt.Types))
for i, col := range cols {
assert.Equalf(t, tt.Types[i], col.DataType, "%s: got %s, want %s as column type at %d", tt.Name, col.DataType, tt.Types[i], i+1)
}
}
err = db.Exec("DROP TABLE vtab")
checkNoError(t, err, "couldn't drop CSV virtual table: %s")
}
}
func TestImportCSV(t *testing.T) { func TestImportCSV(t *testing.T) {
db := open(t) db := open(t)
defer checkClose(db, t) defer checkClose(db, t)
......
...@@ -94,7 +94,7 @@ type intArray struct { ...@@ -94,7 +94,7 @@ type intArray struct {
// CreateIntArray create a specific instance of an intarray object. // CreateIntArray create a specific instance of an intarray object.
// //
// Each intarray object corresponds to a virtual table in the TEMP table // Each intarray object corresponds to a virtual table in the TEMP database
// with the specified name. // with the specified name.
// //
// Destroy the intarray object by dropping the virtual table. If not done // Destroy the intarray object by dropping the virtual table. If not done
......
...@@ -202,25 +202,29 @@ func (s *Stmt) ColumnTypeAffinity(index int) Affinity { ...@@ -202,25 +202,29 @@ func (s *Stmt) ColumnTypeAffinity(index int) Affinity {
} }
} }
declType := s.ColumnDeclaredType(index) declType := s.ColumnDeclaredType(index)
affinity := typeAffinity(declType)
s.affinities[index] = affinity
return affinity
}
func (c Column) Affinity() Affinity {
return typeAffinity(c.DataType)
}
func typeAffinity(declType string) Affinity {
if declType == "" { if declType == "" {
s.affinities[index] = None
return None return None
} }
declType = strings.ToUpper(declType) declType = strings.ToUpper(declType)
if strings.Contains(declType, "INT") { if strings.Contains(declType, "INT") {
s.affinities[index] = Integral
return Integral return Integral
} else if strings.Contains(declType, "TEXT") || strings.Contains(declType, "CHAR") || strings.Contains(declType, "CLOB") { } else if strings.Contains(declType, "TEXT") || strings.Contains(declType, "CHAR") || strings.Contains(declType, "CLOB") {
s.affinities[index] = Textual
return Textual return Textual
} else if strings.Contains(declType, "BLOB") { } else if strings.Contains(declType, "BLOB") {
s.affinities[index] = None
return None return None
} else if strings.Contains(declType, "REAL") || strings.Contains(declType, "FLOA") || strings.Contains(declType, "DOUB") { } else if strings.Contains(declType, "REAL") || strings.Contains(declType, "FLOA") || strings.Contains(declType, "DOUB") {
s.affinities[index] = Real
return Real return Real
} }
s.affinities[index] = Numerical
return Numerical return Numerical
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment