Skip to content

Commit a8a3492

Browse files
committed
fix(startf/ds.set_body): infer structure when ds.set_body is called
lots of tiny problems were arising from starlark breaking the assertion that any dataset with a body must also have a structure that dictates how to read the body. Fixing this at the source
1 parent 30e9aa9 commit a8a3492

File tree

9 files changed

+20
-150
lines changed

9 files changed

+20
-150
lines changed

base/dataset_prepare.go

Lines changed: 3 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
package base
22

33
import (
4-
"bytes"
54
"context"
65
"errors"
76
"fmt"
8-
"io"
97
"path/filepath"
108
"strings"
119

1210
"github.com/qri-io/dataset"
1311
"github.com/qri-io/dataset/detect"
1412
"github.com/qri-io/dataset/validate"
15-
"github.com/qri-io/qfs"
1613
"github.com/qri-io/qri/dsref"
1714
qerr "github.com/qri-io/qri/errors"
1815
"github.com/qri-io/qri/logbook"
@@ -140,14 +137,10 @@ func InferValues(pro *profile.Profile, ds *dataset.Dataset) error {
140137
// NOTE: add author ProfileID here to keep the dataset package agnostic to
141138
// all identity stuff except keypair crypto
142139
ds.Commit.Author = &dataset.User{ID: pro.ID.String()}
143-
// TODO - infer title & message
144140

145-
// if we don't have a structure or schema then attempt to determine one
146-
body := ds.BodyFile()
147-
if body != nil && (ds.Structure == nil || ds.Structure.Schema == nil) {
148-
if err := InferStructure(ds); err != nil {
149-
return err
150-
}
141+
// add any missing structure fields
142+
if err := detect.Structure(ds); err != nil && !errors.Is(err, dataset.ErrNoBody) {
143+
return err
151144
}
152145

153146
if ds.Transform != nil && ds.Transform.ScriptFile() == nil && ds.Transform.IsEmpty() {
@@ -157,57 +150,6 @@ func InferValues(pro *profile.Profile, ds *dataset.Dataset) error {
157150
return nil
158151
}
159152

160-
// InferStructure infers the Structure field of the dataset, guaranteeing
161-
// that the structure will contain a Format, FormatConfig, and Schema, based
162-
// on the given dataset body. It will not write over any Structure, Format,
163-
// FormatConfig, or Schema that already exists.
164-
func InferStructure(ds *dataset.Dataset) error {
165-
if ds == nil {
166-
return fmt.Errorf("empty dataset")
167-
}
168-
169-
body := ds.BodyFile()
170-
if body == nil {
171-
return fmt.Errorf("empty body")
172-
}
173-
// use a TeeReader that writes to a buffer to preserve data
174-
buf := &bytes.Buffer{}
175-
tr := io.TeeReader(body, buf)
176-
var df dataset.DataFormat
177-
178-
df, err := detect.ExtensionDataFormat(body.FileName())
179-
if err != nil {
180-
log.Debug(err.Error())
181-
return fmt.Errorf("invalid data format: %s", err.Error())
182-
}
183-
184-
guessedStructure, _, err := detect.FromReader(df, tr)
185-
if err != nil {
186-
log.Debug(err.Error())
187-
return fmt.Errorf("determining dataset structure: %s", err.Error())
188-
}
189-
190-
// attach the structure, schema, and formatConfig, as appropriate
191-
if ds.Structure == nil {
192-
ds.Structure = guessedStructure
193-
}
194-
if ds.Structure.Schema == nil {
195-
ds.Structure.Schema = guessedStructure.Schema
196-
}
197-
if ds.Structure.FormatConfig == nil {
198-
ds.Structure.FormatConfig = guessedStructure.FormatConfig
199-
}
200-
if ds.Structure.Format == "" {
201-
ds.Structure.Format = guessedStructure.Format
202-
}
203-
204-
// glue whatever we just read back onto the reader
205-
// TODO (b5)- this may ruin readers that transparently depend on a read-closer
206-
// we should consider a method on qfs.File that allows this non-destructive read pattern
207-
ds.SetBodyFile(qfs.NewMemfileReader(body.FileName(), io.MultiReader(buf, body)))
208-
return nil
209-
}
210-
211153
// ValidateDataset checks that a dataset is semantically valid
212154
func ValidateDataset(ds *dataset.Dataset) (err error) {
213155
// Ensure that dataset structure is valid
@@ -216,6 +158,5 @@ func ValidateDataset(ds *dataset.Dataset) (err error) {
216158
err = fmt.Errorf("invalid dataset: %s", err.Error())
217159
return
218160
}
219-
220161
return nil
221162
}

base/dataset_prepare_test.go

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -104,60 +104,6 @@ func TestInferValues(t *testing.T) {
104104
}
105105
}
106106

107-
func TestInferStructure(t *testing.T) {
108-
ds := &dataset.Dataset{
109-
Name: "animals",
110-
}
111-
ds.SetBodyFile(qfs.NewMemfileBytes("animals.csv",
112-
[]byte("Animal,Sound,Weight\ncat,meow,1.4\ndog,bark,3.7\n")))
113-
114-
if err := InferStructure(ds); err != nil {
115-
t.Error(err)
116-
}
117-
118-
if ds.Structure.Format != "csv" {
119-
t.Errorf("expected format CSV, got %s", ds.Structure.Format)
120-
}
121-
if ds.Structure.FormatConfig["headerRow"] != true {
122-
t.Errorf("expected format config to set headerRow set to true")
123-
}
124-
125-
actual := datasetSchemaToJSON(ds)
126-
expect := `{"items":{"items":[{"title":"animal","type":"string"},{"title":"sound","type":"string"},{"title":"weight","type":"number"}],"type":"array"},"type":"array"}`
127-
128-
if expect != actual {
129-
t.Errorf("mismatched schema, expected \"%s\", got \"%s\"", expect, actual)
130-
}
131-
}
132-
133-
func TestInferStructureSchema(t *testing.T) {
134-
ds := &dataset.Dataset{
135-
Name: "animals",
136-
Structure: &dataset.Structure{
137-
Format: "csv",
138-
},
139-
}
140-
ds.SetBodyFile(qfs.NewMemfileBytes("animals.csv",
141-
[]byte("Animal,Sound,Weight\ncat,meow,1.4\ndog,bark,3.7\n")))
142-
if err := InferStructure(ds); err != nil {
143-
t.Error(err)
144-
}
145-
146-
if ds.Structure.Format != "csv" {
147-
t.Errorf("expected format CSV, got %s", ds.Structure.Format)
148-
}
149-
if ds.Structure.FormatConfig["headerRow"] != true {
150-
t.Errorf("expected format config to set headerRow set to true")
151-
}
152-
153-
actual := datasetSchemaToJSON(ds)
154-
expect := `{"items":{"items":[{"title":"animal","type":"string"},{"title":"sound","type":"string"},{"title":"weight","type":"number"}],"type":"array"},"type":"array"}`
155-
156-
if expect != actual {
157-
t.Errorf("mismatched schema, expected \"%s\", got \"%s\"", expect, actual)
158-
}
159-
}
160-
161107
func TestInferValuesDontOverwriteSchema(t *testing.T) {
162108
r := newTestRepo(t)
163109
pro := r.Profiles().Owner()
@@ -188,15 +134,15 @@ func TestInferValuesDontOverwriteSchema(t *testing.T) {
188134
if ds.Structure.Format != "csv" {
189135
t.Errorf("expected format CSV, got %s", ds.Structure.Format)
190136
}
191-
if ds.Structure.FormatConfig != nil {
192-
t.Errorf("expected format config to be nil")
137+
if ds.Structure.FormatConfig == nil {
138+
t.Errorf("expected format config to be non-nil")
193139
}
194140

195141
actual := datasetSchemaToJSON(ds)
196142
expect := `{"items":{"items":[{"title":"animal","type":"number"},{"title":"noise","type":"number"},{"title":"height","type":"number"}],"type":"array"},"type":"array"}`
197143

198144
if expect != actual {
199-
t.Errorf("mismatched schema, expected \"%s\", got \"%s\"", expect, actual)
145+
t.Errorf("mismatched schema, expected %q, got %q", expect, actual)
200146
}
201147
}
202148

changes/changes_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77

88
"github.com/google/go-cmp/cmp"
99
"github.com/qri-io/dataset"
10+
"github.com/qri-io/dataset/detect"
1011
"github.com/qri-io/dataset/tabular"
1112
"github.com/qri-io/qfs"
1213
"github.com/qri-io/qri/base"
@@ -743,7 +744,7 @@ func (run *testRunner) updateDataset(t *testing.T, ds *dataset.Dataset, newBody
743744

744745
// force recalculate structure as that is what we rely on for the change reports
745746
ds.Structure = nil
746-
if err := base.InferStructure(ds); err != nil {
747+
if err := detect.Structure(ds); err != nil {
747748
t.Fatal(err.Error())
748749
}
749750

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ require (
4242
github.com/olekukonko/tablewriter v0.0.4
4343
github.com/pkg/errors v0.9.1
4444
github.com/qri-io/dag v0.2.2-0.20201208212257-ae00241c4b48
45-
github.com/qri-io/dataset v0.2.1-0.20210304141850-a4a809d46350
45+
github.com/qri-io/dataset v0.2.1-0.20210312210644-ba8eaa336c8d
4646
github.com/qri-io/deepdiff v0.2.1-0.20200807143746-d02d9f531f5b
4747
github.com/qri-io/didmod v0.0.0-20201123165422-8b2e224c993a
4848
github.com/qri-io/doggos v0.1.0

go.sum

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,10 +1131,8 @@ github.com/qri-io/compare v0.1.0 h1:A/MRx3uEnJ/iMjfJY1VOqH9CYs9zFSEYaFVeXuGfmis=
11311131
github.com/qri-io/compare v0.1.0/go.mod h1:i/tVuDGRXVxhuZ8ZUieF23u6rQ6wLGJl7KKWpoMRaTE=
11321132
github.com/qri-io/dag v0.2.2-0.20201208212257-ae00241c4b48 h1:6fTW2iHGbaEKQt9u8+04kB3m33KSGLqxF/2pWNleeEg=
11331133
github.com/qri-io/dag v0.2.2-0.20201208212257-ae00241c4b48/go.mod h1:1AwOy3yhcZTAXzaF4wGSdnrp87u3PBOrsWXUjOtQCXo=
1134-
github.com/qri-io/dataset v0.2.1-0.20210128201320-3b1209495e96 h1:SiP48nzhKLJbvM6SA+5wK53PKUs0FY0DWDylMPyi8S4=
1135-
github.com/qri-io/dataset v0.2.1-0.20210128201320-3b1209495e96/go.mod h1:vlq9+Nu37koO3mrp25QGNOt68CLe2d2rAtB9cnDLV6E=
1136-
github.com/qri-io/dataset v0.2.1-0.20210304141850-a4a809d46350 h1:uXvx2/y+eqV5o77HLmw51S1aiskOvlq+b6WNTtXHAGk=
1137-
github.com/qri-io/dataset v0.2.1-0.20210304141850-a4a809d46350/go.mod h1:vlq9+Nu37koO3mrp25QGNOt68CLe2d2rAtB9cnDLV6E=
1134+
github.com/qri-io/dataset v0.2.1-0.20210312210644-ba8eaa336c8d h1:5KfPirdkABg/R3/8S9xwNZUDuwBj32BHrw/BgQ9DiXw=
1135+
github.com/qri-io/dataset v0.2.1-0.20210312210644-ba8eaa336c8d/go.mod h1:vlq9+Nu37koO3mrp25QGNOt68CLe2d2rAtB9cnDLV6E=
11381136
github.com/qri-io/deepdiff v0.2.1-0.20200807143746-d02d9f531f5b h1:T8qEIv+qLi5mVWvSS329wJ+HbN7cfMwCWjRVzh/+upo=
11391137
github.com/qri-io/deepdiff v0.2.1-0.20200807143746-d02d9f531f5b/go.mod h1:NrL/b7YvexgpGb4HEO3Rlx5RrMLDfxuKDf/XDAq5ac0=
11401138
github.com/qri-io/didmod v0.0.0-20201123165422-8b2e224c993a h1:40BIa59lae2xZ7iieb3UU4/X57jZsWZ6QgqwdjDQhig=

lib/datasets.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,11 +1437,9 @@ func (m *DatasetMethods) Validate(ctx context.Context, p *ValidateParams) (*Vali
14371437
// Schema is set to the provided filename if given, otherwise the dataset's schema
14381438
if schemaFlagType == "" {
14391439
st = ds.Structure
1440-
if ds.Structure == nil || ds.Structure.Schema == nil {
1441-
if err := base.InferStructure(ds); err != nil {
1442-
log.Debug("lib.Validate: InferStructure error: %w", err)
1443-
return nil, err
1444-
}
1440+
if err := detect.Structure(ds); err != nil {
1441+
log.Debug("lib.Validate: InferStructure error: %w", err)
1442+
return nil, err
14451443
}
14461444
} else {
14471445
data, err := ioutil.ReadFile(schemaFilename)

lib/transform.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77

88
"github.com/qri-io/dataset"
99
"github.com/qri-io/dataset/preview"
10-
"github.com/qri-io/qri/base"
1110
"github.com/qri-io/qri/dsref"
1211
"github.com/qri-io/qri/event"
1312
"github.com/qri-io/qri/transform"
@@ -116,13 +115,6 @@ func (m *TransformMethods) Apply(ctx context.Context, p *ApplyParams) (*ApplyRes
116115
}
117116

118117
if p.Wait {
119-
if ds.Structure == nil {
120-
if err := base.InferStructure(ds); err != nil {
121-
log.Debugw("inferring structure", "err", err)
122-
return nil, err
123-
}
124-
}
125-
126118
ds, err := preview.Create(ctx, ds)
127119
if err != nil {
128120
return nil, err

transform/startf/ds/dataset.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"sync"
99

1010
"github.com/qri-io/dataset"
11+
"github.com/qri-io/dataset/detect"
1112
"github.com/qri-io/dataset/dsio"
1213
"github.com/qri-io/qfs"
1314
"github.com/qri-io/starlib/util"
@@ -310,6 +311,11 @@ func (d *Dataset) SetBody(thread *starlark.Thread, _ *starlark.Builtin, args sta
310311
d.write.SetBodyFile(qfs.NewMemfileBytes(fmt.Sprintf("body.%s", df), []byte(string(str))))
311312
d.modBody = true
312313
d.bodyCache = nil
314+
315+
if err := detect.Structure(d.write); err != nil {
316+
return nil, err
317+
}
318+
313319
return starlark.None, nil
314320
}
315321

@@ -324,7 +330,6 @@ func (d *Dataset) SetBody(thread *starlark.Thread, _ *starlark.Builtin, args sta
324330
if err != nil {
325331
return starlark.None, err
326332
}
327-
328333
r := NewEntryReader(d.write.Structure, iter)
329334
if err := dsio.Copy(r, w); err != nil {
330335
return starlark.None, err

transform/startf/exec_step.go

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"github.com/qri-io/dataset"
1010
"github.com/qri-io/dataset/preview"
1111
"github.com/qri-io/qfs"
12-
"github.com/qri-io/qri/base"
1312
"github.com/qri-io/qri/dsref"
1413
"github.com/qri-io/qri/event"
1514
skyctx "github.com/qri-io/qri/transform/startf/context"
@@ -159,16 +158,6 @@ func (r *StepRunner) callTransformFunc(ctx context.Context, thread *starlark.Thr
159158
return err
160159
}
161160

162-
// TODO (b5) - this should happen in ds.set_body method call
163-
if f := ds.BodyFile(); f != nil {
164-
if ds.Structure == nil {
165-
if err := base.InferStructure(ds); err != nil {
166-
log.Debugw("inferring structure", "err", err)
167-
return err
168-
}
169-
}
170-
}
171-
172161
if r.eventsCh != nil {
173162
pview, err := preview.Create(ctx, ds)
174163
if err != nil {

0 commit comments

Comments
 (0)