From cf0d004205e906be1e7385b9c8f37b4096885489 Mon Sep 17 00:00:00 2001 From: ramfox Date: Thu, 2 Sep 2021 10:46:49 -0400 Subject: [PATCH 1/6] refactor(dsref): Add aggregate fields to `VersionInfo` Add "aggregate" fields to `VersionInfo`, keeping track of follower count, open issue count, download count, on top of commit count. Renames `NumVersions` field to `CommitCount` In the future, these fields will likely be moved to a different struct, store, or subsystem. --- cmd/cmd_test.go | 2 +- cmd/list.go | 11 +++++------ cmd/stringers.go | 8 ++++---- dscache/dscache.go | 6 +++--- dscache/dscachefb/RefEntryInfo.go | 8 ++++---- dsref/version_info.go | 27 +++++++++++++++++++++++---- lib/params.go | 3 --- logbook/logbook.go | 4 ++-- repo/ref/convert.go | 2 +- 9 files changed, 43 insertions(+), 28 deletions(-) diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index a550789ab..45540bb13 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -626,7 +626,7 @@ func TestListFormatJson(t *testing.T) { "commitTime": "2001-01-01T01:01:01.000000001Z", "commitTitle": "created dataset from body_ten.csv", "commitMessage": "created dataset from body_ten.csv", - "numVersions": 1 + "commitCount": 1 } ]`, map[string]string{ "profileID": "QmeL2mdVka1eahKENjehK6tBxkkpk5dNQ1qMcgWi7Hrb4B", diff --git a/cmd/list.go b/cmd/list.go index 8e285aa68..ab3b1e82f 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -105,12 +105,11 @@ func (o *ListOptions) Run() (err error) { } p := &lib.ListParams{ - Term: o.Term, - Username: o.Username, - Limit: page.Limit(), - Offset: page.Offset(), - Public: o.Public, - ShowNumVersions: o.ShowNumVersions, + Term: o.Term, + Username: o.Username, + Limit: page.Limit(), + Offset: page.Offset(), + Public: o.Public, } infos, err := o.inst.Collection().List(ctx, p) if err != nil { diff --git a/cmd/stringers.go b/cmd/stringers.go index cf44d72cf..273928e7a 100644 --- a/cmd/stringers.go +++ b/cmd/stringers.go @@ -138,12 +138,12 @@ func (vis versionInfoStringer) String() string { } else { fmt.Fprintf(w, ", %d errors", vis.NumErrors) } - if vis.NumVersions == 0 { + if vis.CommitCount == 0 { // nothing - } else if vis.NumVersions == 1 { - fmt.Fprintf(w, ", %d version", vis.NumVersions) + } else if vis.CommitCount == 1 { + fmt.Fprintf(w, ", %d version", vis.CommitCount) } else { - fmt.Fprintf(w, ", %d versions", vis.NumVersions) + fmt.Fprintf(w, ", %d versions", vis.CommitCount) } fmt.Fprintf(w, "\n\n") diff --git a/dscache/dscache.go b/dscache/dscache.go index 20ea58b84..b4231bfae 100644 --- a/dscache/dscache.go +++ b/dscache/dscache.go @@ -374,8 +374,8 @@ func (d *Dscache) updateChangeCursor(act dsref.VersionInfo) error { // Start building a ref object, by mutating an existing ref object. refStartMutationFunc(builder) // Add only the fields we want to change. - dscachefb.RefEntryInfoAddTopIndex(builder, int32(act.NumVersions)) - dscachefb.RefEntryInfoAddCursorIndex(builder, int32(act.NumVersions)) + dscachefb.RefEntryInfoAddTopIndex(builder, int32(act.CommitCount)) + dscachefb.RefEntryInfoAddCursorIndex(builder, int32(act.CommitCount)) dscachefb.RefEntryInfoAddMetaTitle(builder, metaTitle) dscachefb.RefEntryInfoAddCommitTime(builder, act.CommitTime.Unix()) dscachefb.RefEntryInfoAddBodySize(builder, int64(act.BodySize)) @@ -429,7 +429,7 @@ func convertEntryToVersionInfo(r *dscachefb.RefEntryInfo) dsref.VersionInfo { BodyFormat: string(r.BodyFormat()), NumErrors: int(r.NumErrors()), CommitTime: time.Unix(r.CommitTime(), 0), - NumVersions: int(r.NumVersions()), + CommitCount: int(r.CommitCount()), } } diff --git a/dscache/dscachefb/RefEntryInfo.go b/dscache/dscachefb/RefEntryInfo.go index 3d29c127d..a0263c47b 100644 --- a/dscache/dscachefb/RefEntryInfo.go +++ b/dscache/dscachefb/RefEntryInfo.go @@ -177,7 +177,7 @@ func (rcv *RefEntryInfo) MutateCommitTime(n int64) bool { return rcv._tab.MutateInt64Slot(30, n) } -func (rcv *RefEntryInfo) NumVersions() int32 { +func (rcv *RefEntryInfo) CommitCount() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(32)) if o != 0 { return rcv._tab.GetInt32(o + rcv._tab.Pos) @@ -185,7 +185,7 @@ func (rcv *RefEntryInfo) NumVersions() int32 { return 0 } -func (rcv *RefEntryInfo) MutateNumVersions(n int32) bool { +func (rcv *RefEntryInfo) MutateCommitCount(n int32) bool { return rcv._tab.MutateInt32Slot(32, n) } @@ -294,8 +294,8 @@ func RefEntryInfoAddNumErrors(builder *flatbuffers.Builder, numErrors int32) { func RefEntryInfoAddCommitTime(builder *flatbuffers.Builder, commitTime int64) { builder.PrependInt64Slot(13, commitTime, 0) } -func RefEntryInfoAddNumVersions(builder *flatbuffers.Builder, numVersions int32) { - builder.PrependInt32Slot(14, numVersions, 0) +func RefEntryInfoAddCommitCount(builder *flatbuffers.Builder, commitCount int32) { + builder.PrependInt32Slot(14, commitCount, 0) } func RefEntryInfoAddHeadRef(builder *flatbuffers.Builder, headRef flatbuffers.UOffsetT) { builder.PrependUOffsetTSlot(15, flatbuffers.UOffsetT(headRef), 0) diff --git a/dsref/version_info.go b/dsref/version_info.go index 841da07cd..efaa2c867 100644 --- a/dsref/version_info.go +++ b/dsref/version_info.go @@ -79,10 +79,6 @@ type VersionInfo struct { // Message field from the commit CommitMessage string `json:"commitMessage,omitempty"` // - // About the dataset's history and location - // - // Number of versions that the dataset has - NumVersions int `json:"numVersions,omitempty"` // // Workflow fields // @@ -106,6 +102,29 @@ type VersionInfo struct { // RunDuration is not stored on a dataset version, and instead must come from // either run state or a cache of run state RunDuration int64 `json:"runDuration,omitempty"` + // + // + // Aggregate Fields + // TODO (ramfox): These fields are only temporarily living on `VersionInfo`. + // They are needed by the frontend to display "details" about the head of + // of the dataset. When we get more user feedback and settle what info + // users want about their datasets, these fields may move to a new struct + // store, or subsystem. + // These fields are not derived from any `dataset.Dataset` fields. + // These fields should only be used in the `collection` package. + // + // RunCount is the number of times this dataset's transform has been run + RunCount int `json:"runCount,omitempty"` + // CommitCount is the number of commits in this dataset's history + CommitCount int `json:"commitCount,omitempty"` + // DownloadCount is the number of times this dataset has been directly + // downloaded from this Qri node + DownloadCount int `json:"downloadCount,omitempty"` + // FollowerCount is the number of followers this dataset has on this Qri node + FollowerCount int `json:"followerCount,omitempty"` + // OpenIssueCount is the number of open issues this dataset has on this + // Qri node + OpenIssueCount int `json:"openIssueCount,omitempty"` } // NewVersionInfoFromRef creates a sparse-populated VersionInfo from a dsref.Ref diff --git a/lib/params.go b/lib/params.go index 1583cbc8b..6bf8b0688 100644 --- a/lib/params.go +++ b/lib/params.go @@ -42,9 +42,6 @@ type ListParams struct { // Public only applies to listing datasets, shows only datasets that are // set to visible Public bool `json:"public"` - // ShowNumVersions only applies to listing datasets - // TODO (b5): deprecate this once collection subsystem is up to speed - ShowNumVersions bool `json:"showNumVersions" docs:"hidden"` } // SetNonZeroDefaults sets OrderBy to "created" if it's value is the empty string diff --git a/logbook/logbook.go b/logbook/logbook.go index 193378868..51f4cac85 100644 --- a/logbook/logbook.go +++ b/logbook/logbook.go @@ -557,7 +557,7 @@ func (book *Book) WriteVersionSave(ctx context.Context, author *profile.Profile, } info := dsref.ConvertDatasetToVersionInfo(ds) - info.NumVersions = topIndex + info.CommitCount = topIndex if err = book.publisher.Publish(ctx, event.ETDatasetCommitChange, info); err != nil { log.Error(err) @@ -694,7 +694,7 @@ func (book *Book) WriteVersionDelete(ctx context.Context, author *profile.Profil if len(items) > 0 { lastItem := items[len(items)-1] lastItem.InitID = initID - lastItem.NumVersions = len(items) + lastItem.CommitCount = len(items) if err = book.publisher.Publish(ctx, event.ETDatasetCommitChange, lastItem); err != nil { log.Error(err) diff --git a/repo/ref/convert.go b/repo/ref/convert.go index 96755b636..89bd8fc60 100644 --- a/repo/ref/convert.go +++ b/repo/ref/convert.go @@ -39,7 +39,7 @@ func ConvertToVersionInfo(r *DatasetRef) dsref.VersionInfo { build.CommitMessage = ds.Commit.Message } if ds != nil { - build.NumVersions = ds.NumVersions + build.CommitCount = ds.NumVersions } return build } From 2cc343a3d99ea27f99964963e760cc895e160c7d Mon Sep 17 00:00:00 2001 From: ramfox Date: Thu, 2 Sep 2021 10:48:37 -0400 Subject: [PATCH 2/6] feat(event): add events to track aggregate dataset information adds `ETDatasetDownload` `ETRemoteDatasetFollowed` `ETRemoteDatasetUnfollowed` `ETRemoteDatasetIssueOpened` `ETRemoteDatasetIssueClosed` All the "remote" events are expected to be emitted by a remote that implements followers and issues. --- event/dataset.go | 3 +++ event/remote.go | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/event/dataset.go b/event/dataset.go index 84f4e6cc3..84cb61c4d 100644 --- a/event/dataset.go +++ b/event/dataset.go @@ -16,6 +16,9 @@ const ( // ETDatasetCreateLink occurs when a dataset gets linked to a working directory // payload is a dsref.VersionInfo ETDatasetCreateLink = Type("dataset:CreateLink") + // ETDatasetDownload indicates that a dataset has been downloaded + // payload is an `InitID` string + ETDatasetDownload = Type("dataset:Download") // ETDatasetSaveStarted occurs when a dataset starts being saved // this event is sent asynchronously; the publisher is not blocked diff --git a/event/remote.go b/event/remote.go index fb2db4851..a93d40fce 100644 --- a/event/remote.go +++ b/event/remote.go @@ -46,6 +46,18 @@ const ( // (logbook + versions) remove completed // payload will be a RemoteEvent ETRemoteClientRemoveDatasetCompleted = Type("remoteClient:RemoveDatasetCompleted") + // ETRemoteDatasetFollowed indicates that the dataset has been followed by a user + // payload is an `InitID` string + ETRemoteDatasetFollowed = Type("remote:DatasetFollowed") + // ETRemoteDatasetUnfollowed indicates that the dataset has been unfollowed by a user + // payload is an `InitID` string + ETRemoteDatasetUnfollowed = Type("remote:DatasetUnfollowed") + // ETRemoteDatasetIssueOpened indicates that an issue has been opened for this dataset + // payload is an `initID` string + ETRemoteDatasetIssueOpened = Type("remote:DatasetIssueOpened") + // ETRemoteDatasetIssueClosed indicates that an issue has been closed for this dataset + // payload is an `initID` string + ETRemoteDatasetIssueClosed = Type("remote:DatasetIssueClosed") ) // RemoteEvent encapsulates the push / pull progress of a dataset version From 144011a9cfef17c46b547a12a8f9f73709aa0ee5 Mon Sep 17 00:00:00 2001 From: ramfox Date: Thu, 2 Sep 2021 10:54:48 -0400 Subject: [PATCH 3/6] refactor(collection): handle aggregate events the handler now listens for `event.ETDatasetDownload`, `event.ETRemoteDatasetFollowed`, `event.ETRemoteDatasetUnfollowed`, `event.ETRemoteDatasetIssueOpened`, `event.ETRemoteDatasetIssueClosed` events, and adjusts the collection accordingly. --- collection/collection.go | 53 +++++++++++-- collection/collection_assert_test.go | 107 +++++++++++++++++++++++++-- 2 files changed, 146 insertions(+), 14 deletions(-) diff --git a/collection/collection.go b/collection/collection.go index d74715329..f857c9ad5 100644 --- a/collection/collection.go +++ b/collection/collection.go @@ -56,11 +56,16 @@ func (sm *SetMaintainer) subscribe(bus event.Bus) { event.ETDatasetCommitChange, event.ETDatasetRename, event.ETDatasetDeleteAll, + event.ETDatasetDownload, // remote & registry events event.ETDatasetPushed, event.ETDatasetPulled, event.ETRegistryProfileCreated, + event.ETRemoteDatasetFollowed, + event.ETRemoteDatasetUnfollowed, + event.ETRemoteDatasetIssueOpened, + event.ETRemoteDatasetIssueClosed, // automation events event.ETAutomationWorkflowStarted, @@ -116,6 +121,12 @@ func (sm *SetMaintainer) handleEvent(ctx context.Context, e event.Event) error { } } } + case event.ETDatasetDownload: + if initID, ok := e.Payload.(string); ok { + sm.UpdateEverywhere(ctx, initID, func(vi *dsref.VersionInfo) { + vi.DownloadCount++ + }) + } case event.ETRegistryProfileCreated: if p, ok := e.Payload.(event.RegistryProfileCreated); ok { pid, err := profile.IDB58Decode(p.ProfileID) @@ -140,6 +151,36 @@ func (sm *SetMaintainer) handleEvent(ctx context.Context, e event.Event) error { } } } + case event.ETRemoteDatasetFollowed: + if initID, ok := e.Payload.(string); ok { + sm.UpdateEverywhere(ctx, initID, func(vi *dsref.VersionInfo) { + vi.FollowerCount++ + }) + } + case event.ETRemoteDatasetUnfollowed: + if initID, ok := e.Payload.(string); ok { + sm.UpdateEverywhere(ctx, initID, func(vi *dsref.VersionInfo) { + vi.FollowerCount-- + if vi.FollowerCount < 0 { + vi.FollowerCount = 0 + } + }) + } + case event.ETRemoteDatasetIssueOpened: + if initID, ok := e.Payload.(string); ok { + sm.UpdateEverywhere(ctx, initID, func(vi *dsref.VersionInfo) { + vi.OpenIssueCount++ + }) + } + case event.ETRemoteDatasetIssueClosed: + if initID, ok := e.Payload.(string); ok { + sm.UpdateEverywhere(ctx, initID, func(vi *dsref.VersionInfo) { + vi.OpenIssueCount-- + if vi.OpenIssueCount < 0 { + vi.OpenIssueCount = 0 + } + }) + } case event.ETAutomationWorkflowStarted: if evt, ok := e.Payload.(event.WorkflowStartedEvent); ok { err := sm.UpdateEverywhere(ctx, evt.InitID, func(vi *dsref.VersionInfo) { @@ -194,7 +235,7 @@ type Set interface { // List the collection of a single user List(ctx context.Context, pid profile.ID, lp params.List) ([]dsref.VersionInfo, error) // Get info about a single dataset in a single user's collection - Get(ctx context.Context, pid profile.ID, initID string) (dsref.VersionInfo, error) + Get(ctx context.Context, pid profile.ID, initID string) (*dsref.VersionInfo, error) // Add adds a dataset or datasets to a user's collection Add(ctx context.Context, pid profile.ID, add ...dsref.VersionInfo) error // RenameUser changes a user's name @@ -312,24 +353,24 @@ func (s *localSet) List(ctx context.Context, pid profile.ID, lp params.List) ([] return results, nil } -func (s *localSet) Get(ctx context.Context, pid profile.ID, initID string) (dsref.VersionInfo, error) { +func (s *localSet) Get(ctx context.Context, pid profile.ID, initID string) (*dsref.VersionInfo, error) { s.Lock() defer s.Unlock() if err := pid.Validate(); err != nil { - return dsref.VersionInfo{}, err + return nil, err } collection, ok := s.collections[pid] if !ok { - return dsref.VersionInfo{}, fmt.Errorf("%w: no collection for profile ID %q", ErrNotFound, pid.Encode()) + return nil, fmt.Errorf("%w: no collection for profile ID %q", ErrNotFound, pid.Encode()) } for _, vi := range collection { if vi.InitID == initID { - return vi, nil + return &vi, nil } } - return dsref.VersionInfo{}, ErrNotFound + return nil, ErrNotFound } func (s *localSet) Add(ctx context.Context, pid profile.ID, items ...dsref.VersionInfo) error { diff --git a/collection/collection_assert_test.go b/collection/collection_assert_test.go index 37fa9c4f9..bc7360e3a 100644 --- a/collection/collection_assert_test.go +++ b/collection/collection_assert_test.go @@ -2,6 +2,7 @@ package collection_test import ( "context" + "errors" "fmt" "testing" "time" @@ -112,7 +113,6 @@ func AssertSetSpec(t *testing.T, constructor Constructor) { t.Fatalf("error adding items: %s", err) } }) - t.Run("list", func(t *testing.T) { assertCollectionList(ctx, t, kermit, params.ListAll, ec, []dsref.VersionInfo{ { @@ -189,6 +189,40 @@ func AssertSetSpec(t *testing.T, constructor Constructor) { assertCollectionList(ctx, t, missPiggy, params.ListAll, ec, []dsref.VersionInfo{}) }) + + t.Run("get", func(t *testing.T) { + muppetDSInitID := "muppet_DS_init_id" + expect := &dsref.VersionInfo{ + ProfileID: kermit.ID.Encode(), + InitID: muppetDSInitID, + Username: "kermit", + Name: "muppet_names", + CommitTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC), + } + err = ec.Add(ctx, kermit.ID, *expect) + if err != nil { + t.Fatalf("error adding items: %s", err) + } + + got, err := ec.Get(ctx, kermit.ID, muppetDSInitID) + if err != nil { + t.Fatalf("error getting version info: %s", err) + } + if diff := cmp.Diff(expect, got); diff != "" { + t.Errorf("collection get version info mismatch (-want +got):\n%s", diff) + } + + got, err = ec.Get(ctx, kermit.ID, "bad_init_id") + if !errors.Is(err, collection.ErrNotFound) { + t.Errorf("error mismatch, expected %q, got %q", collection.ErrNotFound, err) + } + + got, err = ec.Get(ctx, missPiggy.ID, muppetDSInitID) + if !errors.Is(err, collection.ErrNotFound) { + t.Errorf("error mismatch, expected %q, got %q", collection.ErrNotFound, err) + } + }) + } // AssertCollectionEventListenerSpec defines expected behaviours for collections @@ -241,6 +275,11 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { } assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + // simulate dataset download + mustPublish(ctx, t, bus, event.ETDatasetDownload, muppetNamesInitID) + expect[0].DownloadCount = 1 + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + // simulate version creation mustPublish(ctx, t, bus, event.ETDatasetCommitChange, dsref.VersionInfo{ InitID: muppetNamesInitID, @@ -248,7 +287,7 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { Path: "/mem/PathToMuppetNamesVersionOne", Username: kermit.Peername, Name: muppetNamesName1, - NumVersions: 2, + CommitCount: 2, BodySize: 20, }) @@ -258,7 +297,7 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { ProfileID: kermit.ID.Encode(), Username: kermit.Peername, Name: muppetNamesName1, - NumVersions: 2, + CommitCount: 2, Path: "/mem/PathToMuppetNamesVersionOne", BodySize: 20, }, @@ -278,7 +317,7 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { ProfileID: kermit.ID.Encode(), Username: kermit.Peername, Name: muppetNamesName2, - NumVersions: 2, + CommitCount: 2, Path: "/mem/PathToMuppetNamesVersionOne", BodySize: 20, }, @@ -329,6 +368,58 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { // assert default ordering of datasets }) + t.Run("user_1_remote_actions", func(t *testing.T) { + muppetNamesInitID := "initID" + muppetNamesName1 := "muppet_names" + + // initialize a dataset with the given name, initID, and profileID + mustPublish(ctx, t, bus, event.ETDatasetNameInit, dsref.VersionInfo{ + InitID: muppetNamesInitID, + ProfileID: kermit.ID.Encode(), + Username: kermit.Peername, + Name: muppetNamesName1, + }) + + expect := []dsref.VersionInfo{ + { + InitID: muppetNamesInitID, + ProfileID: kermit.ID.Encode(), + Username: kermit.Peername, + Name: muppetNamesName1, + }, + } + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + + // simulate that someone has followed the dataset + mustPublish(ctx, t, bus, event.ETRemoteDatasetFollowed, muppetNamesInitID) + expect[0].FollowerCount = 1 + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + + // simulate that someone has unfollowed the dataset + mustPublish(ctx, t, bus, event.ETRemoteDatasetUnfollowed, muppetNamesInitID) + expect[0].FollowerCount = 0 + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + + // simulate that someone has opened an issue on the dataset + mustPublish(ctx, t, bus, event.ETRemoteDatasetIssueOpened, muppetNamesInitID) + expect[0].OpenIssueCount = 1 + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + + // simulate that someone has closed an issue on the dataset + mustPublish(ctx, t, bus, event.ETRemoteDatasetIssueClosed, muppetNamesInitID) + expect[0].OpenIssueCount = 0 + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + + // dataset deleted using a scope associated with the owning profile + { + scopedCtx := profile.AddIDToContext(ctx, kermit.ID.Encode()) + mustPublish(scopedCtx, t, bus, event.ETDatasetDeleteAll, muppetNamesInitID) + } + + expect = []dsref.VersionInfo{} + assertCollectionList(ctx, t, kermit, params.ListAll, s, expect) + }) + t.Run("user_1_ordering_and_filtering", func(t *testing.T) { t.Skip("TODO (b5): add a third dataset") }) @@ -346,7 +437,7 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { mustPublish(ctx, t, bus, event.ETDatasetNameInit, dsref.VersionInfo{ InitID: missPiggyDatasetInitID, - NumVersions: 1, + CommitCount: 1, Path: "/mem/PathToMissPiggyDatasetVersionOne", ProfileID: missPiggy.ID.Encode(), Username: missPiggy.Peername, @@ -359,7 +450,7 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { ProfileID: missPiggy.ID.Encode(), Username: missPiggy.Peername, Name: missPiggyDatasetName, - NumVersions: 1, + CommitCount: 1, Path: "/mem/PathToMissPiggyDatasetVersionOne", }, } @@ -379,13 +470,13 @@ func AssertCollectionEventListenerSpec(t *testing.T, constructor Constructor) { mustPublish(ctx, t, bus, event.ETDatasetCommitChange, dsref.VersionInfo{ InitID: missPiggyDatasetInitID, - NumVersions: 2, + CommitCount: 2, Path: "/mem/PathToMissPiggyDatasetVersionTwo", ProfileID: missPiggy.ID.Encode(), Username: missPiggy.Peername, Name: missPiggyDatasetName, }) - expect[0].NumVersions = 2 + expect[0].CommitCount = 2 expect[0].Path = "/mem/PathToMissPiggyDatasetVersionTwo" assertCollectionList(ctx, t, missPiggy, params.ListAll, s, expect) From 068e4abe855875cafcad91cf59928a6e5486aa5c Mon Sep 17 00:00:00 2001 From: ramfox Date: Thu, 2 Sep 2021 10:57:10 -0400 Subject: [PATCH 4/6] refactor(lib): add `Get` from collection `CollectionMethods.Get` gets a since version info from the collection Also, we are adding an api endpoint "/collection/get", to fetch a single version info from the collection via the api --- lib/collection.go | 42 ++++++++++++++++++++++++++++++++++++++++- lib/collection_test.go | 43 ++++++++++++++++++++++++++++++++++++++++++ lib/http/api.go | 2 ++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/lib/collection.go b/lib/collection.go index 092737dfb..562768278 100644 --- a/lib/collection.go +++ b/lib/collection.go @@ -35,6 +35,7 @@ func (m CollectionMethods) Attributes() map[string]AttributeSet { return map[string]AttributeSet{ "list": {Endpoint: qhttp.AEList, HTTPVerb: "POST"}, "listrawrefs": {Endpoint: qhttp.DenyHTTP}, + "get": {Endpoint: qhttp.AECollectionGet, HTTPVerb: "POST"}, } } @@ -59,6 +60,29 @@ func (m CollectionMethods) ListRawRefs(ctx context.Context, p *EmptyParams) (str return "", dispatchReturnError(got, err) } +// CollectionGetParams defines parameters for looking up the head of a dataset from the collection +type CollectionGetParams struct { + Ref string `json:"ref"` + InitID string `json:"initID"` +} + +// Validate returns an error if CollectionGetParams fields are in an invalid state +func (p *CollectionGetParams) Validate() error { + if p.Ref == "" && p.InitID == "" { + return fmt.Errorf("either ref or initID are required") + } + return nil +} + +// Get gets the head of a dataset as a VersionInfo from the collection +func (m CollectionMethods) Get(ctx context.Context, p *CollectionGetParams) (*dsref.VersionInfo, error) { + got, _, err := m.d.Dispatch(ctx, dispatchMethodName(m, "get"), p) + if res, ok := got.(*dsref.VersionInfo); ok { + return res, err + } + return nil, dispatchReturnError(got, err) +} + // collectionImpl holds the method implementations for CollectionMethods type collectionImpl struct{} @@ -142,7 +166,7 @@ func (collectionImpl) List(scope scope, p *ListParams) ([]dsref.VersionInfo, err infos[i] = reporef.ConvertToVersionInfo(&r) } } else if listProfile.Peername == "" || reqProfile.Peername == listProfile.Peername { - infos, err = base.ListDatasets(scope.Context(), scope.Repo(), p.Term, restrictPid, p.Offset, p.Limit, p.Public, p.ShowNumVersions) + infos, err = base.ListDatasets(scope.Context(), scope.Repo(), p.Term, restrictPid, p.Offset, p.Limit, p.Public, true) if errors.Is(err, ErrListWarning) { // This warning can happen when there's conflicts between usernames and // profileIDs. This type of conflict should not break listing functionality. @@ -198,3 +222,19 @@ func (collectionImpl) ListRawRefs(scope scope, p *EmptyParams) (string, error) { } return base.RawDatasetRefs(scope.Context(), scope.ActiveProfile().ID, scope.CollectionSet()) } + +// Get gets the head of a dataset as a VersionInfo from the collection +func (collectionImpl) Get(scope scope, p *CollectionGetParams) (*dsref.VersionInfo, error) { + s := scope.CollectionSet() + if s == nil { + return nil, fmt.Errorf("no collection") + } + if p.InitID == "" { + ref, _, err := scope.ParseAndResolveRef(scope.Context(), p.Ref) + if err != nil { + return nil, err + } + p.InitID = ref.InitID + } + return s.Get(scope.Context(), scope.ActiveProfile().ID, p.InitID) +} diff --git a/lib/collection_test.go b/lib/collection_test.go index 4e06f96c7..9d6bb8363 100644 --- a/lib/collection_test.go +++ b/lib/collection_test.go @@ -120,6 +120,49 @@ func compareVersionInfoAsSimple(a, b dsref.VersionInfo) error { return nil } +func TestGetFromCollection(t *testing.T) { + tr := newTestRunner(t) + defer tr.Delete() + + // Save a dataset with a body + _, err := tr.SaveWithParams(&SaveParams{ + Ref: "me/cities_ds", + BodyPath: "testdata/cities_2/body.csv", + }) + if err != nil { + t.Fatal(err) + } + + // get from the repo + ds := tr.MustGet(t, "me/cities_ds") + expect := dsref.ConvertDatasetToVersionInfo(ds) + pro, err := tr.Instance.activeProfile(tr.Ctx) + if err != nil { + t.Fatal(err) + } + expect.ProfileID = pro.ID.Encode() + expect.CommitCount = 1 + + // fetch from the collection + got, err := tr.Instance.Collection().Get(tr.Ctx, &CollectionGetParams{Ref: "me/cities_ds"}) + if err != nil { + t.Fatalf("error getting from collection by ref: %s", err) + } + + if diff := cmp.Diff(expect, *got); diff != "" { + t.Errorf("get from collection mistmatch (-want +got):\n%s", diff) + } + + got, err = tr.Instance.Collection().Get(tr.Ctx, &CollectionGetParams{InitID: expect.InitID}) + if err != nil { + t.Fatalf("error getting from collection by initID: %s", err) + } + + if diff := cmp.Diff(expect, *got); diff != "" { + t.Errorf("get from collection mistmatch (-want +got):\n%s", diff) + } +} + func TestDatasetRequestsListP2p(t *testing.T) { ctx, done := context.WithCancel(context.Background()) defer done() diff --git a/lib/http/api.go b/lib/http/api.go index c9a2c0297..9b118efa5 100644 --- a/lib/http/api.go +++ b/lib/http/api.go @@ -33,6 +33,8 @@ const ( // AEList lists all datasets in your collection AEList APIEndpoint = "/list" + // AECollectionGet returns info on a head dataset in your collection + AECollectionGet APIEndpoint = "/collection/get" // AEDiff is an endpoint for generating dataset diffs AEDiff APIEndpoint = "/diff" // AEChanges is an endpoint for generating dataset change reports From 8bb28381893ef341a8f4c49804c29203bcba1918 Mon Sep 17 00:00:00 2001 From: ramfox Date: Thu, 2 Sep 2021 10:57:45 -0400 Subject: [PATCH 5/6] test(api): add spec test for new "collection/get" endpoint & update open api doc --- api/open_api_3.yaml | 1341 ++++++++++++++++-------------- api/spec/testdata/aggregate.json | 16 +- 2 files changed, 724 insertions(+), 633 deletions(-) diff --git a/api/open_api_3.yaml b/api/open_api_3.yaml index d39d64e1d..5a5cea6f0 100644 --- a/api/open_api_3.yaml +++ b/api/open_api_3.yaml @@ -51,6 +51,62 @@ paths: allOf: - $ref: '#/components/schemas/RawResponse' + description: OK + '400': + content: + application/json: + schema: + allOf: + - $ref: '#/components/schemas/APIResponse' + - properties: + meta: + allOf: + - $ref: '#/components/schemas/APIMetaError' + description: Bad request + '500': + content: + application/json: + schema: + type: string + nullable: true + description: Server error + default: + content: + application/json: + schema: + allOf: + - $ref: '#/components/schemas/APIResponse' + - properties: + meta: + allOf: + - $ref: '#/components/schemas/APIMetaError' + description: Error + '/collection/get': + post: + description: Get gets the head of a dataset as a VersionInfo from the collection + operationId: 'collection.Get' + tags: + - collection + requestBody: + required: true + content: + application/json: + schema: + '$ref': '#/components/schemas/CollectionGetParams' + + + responses: + '200': + content: + application/json: + schema: + allOf: + - $ref: '#/components/schemas/APIResponse' + - properties: + data: + allOf: + - $ref: '#/components/schemas/VersionInfo' + description: OK '400': content: @@ -2468,133 +2524,136 @@ paths: description: Error components: schemas: - ValidateParams: + RemoveResponse: type: object properties: ref: type: string - bodyFilename: - type: string + numDeleted: + type: number - schemaFilename: + message: type: string - structureFilename: - type: string + unlinked: + type: boolean - MemResolver: + ActivityParams: type: object properties: - Username: + ref: + type: string + description: "Reference to data to fetch history for" + example: "b5/world_bank_population" + pull: + type: boolean + description: "if true, pull any datasets that aren't stored locally" + example: "false" + ApplyParams: + type: object + properties: + ref: type: string - RefMap: + transform: type: object - IDMap: + secrets: type: object - DAGInfoParams: - type: object - properties: - ref: - type: string + wait: + type: boolean - label: - type: string + ScriptOutput: + type: object + description: "TODO(arqu): substitute with websockets when working over the wire " + Hooks: + type: object - RunParams: + DiffParams: type: object properties: - ref: + leftPath: type: string - - initID: + description: "File paths or reference to datasets " + rightPath: type: string - workflowID: + WorkingDir: + type: string + description: "If not null, the working directory that the diff is using " + UseLeftPrevVersion: + type: boolean + description: "Whether to get the previous version of the left parameter " + Selector: type: string + description: "Which component or part of a dataset to compare " + ProfileParams: + type: object + SetProfileParams: + type: object + properties: + pro: + type: object - RefListParams: + ListParams: type: object properties: - Ref: + term: type: string - description: "String value of a reference " - Offset: + description: "term to filter list by" + example: "population" + username: + type: string + description: "username to filter collection by" + example: "ramfox" + orderBy: + type: string + description: "field name to order list by" + example: "created" + limit: type: number - description: "Pagination Parameters " - VersionInfo: + description: "maximum number of datasets to use. use -1 to list all datasets" + example: "50" + offset: + type: number + description: "number of items to skip" + example: "0" + public: + type: boolean + description: "Public only applies to listing datasets, shows only datasets that are set to visible " + Ref: type: object properties: initID: type: string - description: "Key as a stable identifier InitID is derived from the logbook for the dataset " + description: "InitID is the canonical identifer for a dataset history " username: type: string - description: "Fields from dsref.Ref Username of dataset owner " + description: "Username of dataset owner " profileID: type: string - description: "ProfileID of dataset owner " + description: "ProfileID of dataset owner deprecated - avoid using this field, we're working towards removing it generally profile IDs should come from request scopes, or be fetched from stores of identity info (profile.Store) " name: type: string description: "Unique name reference for this dataset " path: type: string description: "Content-addressed path for this dataset " - published: - type: boolean - description: "State about the dataset that can change If true, this dataset has published versions " - foreign: - type: boolean - description: "If true, this reference doesn't exist locally. Only makes sense if path is set, as this flag refers to specific versions, not to entire dataset histories. " - metaTitle: - type: string - description: "Meta fields Title from the meta structure " - themeList: - type: string - description: "List of themes from the meta structure, comma-separated list " - bodySize: - type: number - description: "Structure fields Size of the body in bytes " - bodyRows: - type: number - description: "Num of rows in the body " - bodyFormat: - type: string - description: "Format of the body, such as 'csv' or 'json' " - numErrors: - type: number - description: "Number of errors from the structure " - commitTime: - type: object - description: "Commit fields Timestamp field from the commit " - commitTitle: - type: string - description: "Title field from the commit " - commitMessage: - type: string - description: "Message field from the commit " - numVersions: - type: number - description: "About the dataset's history and location Number of versions that the dataset has " - workflowID: - type: string - description: "Workflow fields " - workflowtriggerDescription: + PeerInfoParams: + type: object + properties: + peername: type: string - runID: - type: string - description: "Run Fields RunID is derived from from either the Commit.RunID, field or the runID of a failed run. In the latter case the Path value will be empty " - runStatus: + profileID: type: string - description: "RunStatus is a string version of the run.Status enumeration. This value will always be one of: ''|'waiting'|'running'|'succeeded'|'failed'|'unchanged'|'skipped' RunStatus is not stored on a dataset version, and instead must come from either run state or a cache of run state it's of type string to follow the 'plain old data' pattern " - runDuration: - type: object - description: "RunDuration is how long the run took/has currently taken in nanoseconds default value of 0 means no duration data is available. RunDuration is not stored on a dataset version, and instead must come from either run state or a cache of run state " - ConnectionsParams: + + verbose: + type: boolean + description: "Verbose adds network details from the p2p Peerstore " + PeerListParams: type: object properties: limit: @@ -2603,216 +2662,155 @@ components: offset: type: number - User: + cached: + type: boolean + description: "Cached == true will return offline peers from the repo as well as online peers, default is to list connected peers only " + RawLogbookParams: + type: object + Viz: type: object properties: - id: + format: type: string - - name: + description: "Format designates the visualization configuration syntax. currently the only supported syntax is 'html' " + path: type: string - - email: + description: "Path is the location of a viz, transient derived " + qri: type: string - - DeployParams: + description: "Qri should always be 'vc:0' derived " + scriptPath: + type: string + description: "ScriptPath is the path to the script that created this " + text: + type: string + description: "Text contains the contents of the script, transient " + renderedPath: + type: string + description: "RenderedPath is the path to the file rendered using the viz script and the body " + ManifestMissingParams: type: object properties: - Run: - type: boolean - - Workflow: - type: object - - Dataset: + manifest: type: object - PushParams: + GetConfigParams: type: object properties: - ref: + Field: type: string - remote: - type: string - - all: + WithPrivateKey: type: boolean - description: "All indicates all versions of a dataset and the dataset namespace should be either published or removed " - Rev: - type: object - properties: - Field: + + Format: type: string - description: "field scopt, currently can only be a component name, or the entire dataset " - Gen: - type: number - description: "the nth-generational ancestor of a history " - CSVOptions: - type: object - properties: - headerRow: - type: boolean - description: "HeaderRow specifies weather this csv file has a header row or not " - lazyQuotes: - type: boolean - description: "If LazyQuotes is true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field. " - separator: - type: object - description: "Separator is the field delimiter. It is set to comma (',') by NewReader. Comma must be a valid rune and must not be \r, \n, or the Unicode replacement character (0xFFFD). " - variadicFields: + + Concise: type: boolean - description: "VariadicFields sets permits records to have a variable number of fields avoid using this " - RenderParams: + + RenameParams: type: object properties: - ref: - type: string - description: "Ref is a string reference to the dataset to render " - dataset: - type: object - description: "Optionally pass an entire dataset in for rendering, if providing a dataset, the Ref field must be empty " - template: - type: object - description: "Optional template override " - useFSI: - type: boolean - description: "TODO (b5): investigate if this field is still in use " - format: + current: type: string - description: "Output format. defaults to 'html' " - selector: + + next: type: string - description: "Selector " - Dataset: + + Structure: type: object properties: - body: - type: object - description: "Body represents dataset data with native go types. Datasets have at most one body. Body, BodyBytes, and BodyPath work together, often with only one field used at a time " - bodyBytes: - type: object - description: "BodyBytes is for representing dataset data as a slice of bytes " - bodyPath: - type: string - description: "BodyPath is the path to the hash of raw data as it resolves on the network " - commit: - type: object - description: "Commit contains author & change message information that describes this version of a dataset " - id: - type: string - description: "ID is an identifier string for this dataset. " - meta: - type: object - description: "Meta contains all human-readable meta about this dataset intended to aid in discovery and organization of this document " - name: - type: string - description: "name reference for this dataset, transient " - path: + checksum: type: string - description: "Location of this dataset, transient " - peername: + description: "Checksum is a bas58-encoded multihash checksum of the entire data file this structure points to. This is different from IPFS hashes, which are calculated after breaking the file into blocks derived " + compression: type: string - description: "Peername of dataset owner, transient " - previousPath: + description: "Compression specifies any compression on the source data, if empty assume no compression " + depth: + type: number + description: "Maximum nesting level of composite types in the dataset. eg: depth 1 == [], depth 2 == [[]] derived " + encoding: type: string - description: "PreviousPath connects datasets to form a historical merkle-DAG of snapshots of this document, creating a version history " - profileID: + description: "Encoding specifics character encoding, assume utf-8 if not specified " + errCount: + type: number + description: "ErrCount is the number of errors returned by validating data against this schema. required derived " + entries: + type: number + description: "Entries is number of top-level entries in the dataset. With tablular data this is the same as the number of 'rows' derived " + format: type: string - description: "ProfileID of dataset owner, transient " - readme: + description: "Format specifies the format of the raw data MIME type " + formatConfig: type: object - description: "Readme is a path to the readme file for this dataset " - numVersions: + description: "FormatConfig removes as much ambiguity as possible about how to interpret the speficied format. FormatConfig FormatConfig `json:'formatConfig,omitempty'` " + length: type: number - description: "Number of versions this dataset has, transient " + description: "Length is the length of the data object in bytes. must always match & be present derived " + path: + type: string + description: "location of this structure, transient derived " qri: type: string - description: "Qri is a key for both identifying this document type, and versioning the dataset document definition itself. derived " - structure: - type: object - description: "Structure of this dataset " - stats: - type: object - description: "Stats is a component containing statistical metadata about the dataset body " - transform: - type: object - description: "Transform is a path to the transformation that generated this resource " - viz: + description: "Qri should always be KindStructure derived " + schema: type: object - description: "Viz stores configuration data related to representing a dataset as a visualization " - GetConfigParams: + description: "Schema contains the schema definition for the underlying data, schemas are defined using the IETF json-schema specification. for more info on json-schema see: https://json-schema.org " + strict: + type: boolean + description: "Strict requires schema validation to pass without error. Datasets with strict: true can have additional functionality and performance speedups that comes with being able to assume that all data is valid " + FileParams: type: object properties: - Field: - type: string - - WithPrivateKey: - type: boolean - - Format: + filename: type: string - - Concise: - type: boolean - - TransformStep: + description: "url to download data from. either Url or Data is required Url string Filename of data file. extension is used for filetype detection " + data: + type: object + description: "Data is the file as slice of bytes " + ValidateParams: type: object properties: - name: - type: string - - path: - type: string - - syntax: + ref: type: string - category: + bodyFilename: type: string - script: - type: object - - RenameParams: - type: object - properties: - current: + schemaFilename: type: string - next: + structureFilename: type: string - Theme: + Commit: type: object properties: - description: - type: string - - display_name: + author: + type: object + description: "Author of this commit " + message: type: string - - image_display_url: + description: "Message is an optional " + path: type: string - - id: + description: "Path is the location of this commit, transient derived " + qri: type: string - - name: + description: "Qri is this commit's qri kind derived " + signature: type: string - + description: "Signature is a base58 encoded privateKey signing of Title " + timestamp: + type: object + description: "Time this dataset was created. Required. " title: type: string - - FileParams: - type: object - properties: - filename: + description: "Title of the commit. Required. " + runID: type: string - description: "url to download data from. either Url or Data is required Url string Filename of data file. extension is used for filetype detection " - data: - type: object - description: "Data is the file as slice of bytes " + description: "RunID is only present if an automated script was executed durning the commit time Commits with non-empty `RunID`s imply the existance of a transform component " GetParams: type: object properties: @@ -2830,147 +2828,312 @@ components: offset: type: number description: "number of results to skip. only applies when selector is 'body' " - EmptyParams: - type: object - ProfileParams: - type: object - ActivityParams: - type: object - properties: - ref: - type: string - description: "Reference to data to fetch history for" - example: "b5/world_bank_population" - pull: - type: boolean - description: "if true, pull any datasets that aren't stored locally" - example: "false" - RegistryProfileParams: + Transform: type: object properties: - Profile: + config: type: object - - VersionInfoAggregator: - type: object - TransformResource: - type: object - properties: + description: "Config outlines any configuration that would affect the resulting hash " path: type: string - - ListParams: - type: object - properties: - term: - type: string - description: "term to filter list by" - example: "population" - username: + description: "location of the transform object, transient " + qri: type: string - description: "username to filter collection by" - example: "ramfox" - orderBy: + description: "Kind should always equal KindTransform " + resources: + type: object + description: "Resources is a map of all datasets referenced in this transform, with alphabetical keys generated by datasets in order of appearance within the transform " + scriptPath: type: string - description: "field name to order list by" - example: "created" - limit: - type: number - description: "maximum number of datasets to use. use -1 to list all datasets" - example: "50" - offset: - type: number - description: "number of items to skip" - example: "0" - public: - type: boolean - description: "Public only applies to listing datasets, shows only datasets that are set to visible " - SaveParams: - type: object - properties: - Dataset: + description: "ScriptPath is the path to the script that produced this transformation. Deprecated - use Steps instead " + text: + type: string + description: "Text contains the contents of the script, transient " + secrets: type: object - description: "dataset supplies params directly, all other param fields override values supplied by dataset " - ref: + description: "Secrets is a map of secret values used in the transformation, transient. TODO (b5): make this not-transient by censoring the values used, but not keys " + steps: + type: object + + syntax: type: string - description: "dataset reference string, the name to save to" - example: "b5/world_bank_population" - title: + description: "Syntax this transform was written in Deprecated - syntax is defined per-step " + syntaxVersion: type: string - description: "commit title, defaults to a generated string based on diff" - example: "update dataset meta" - Message: + description: "SyntaxVersion is an identifier for the application and version number that produced the result Deprecated - use steps.Syntax with a version suffix instead " + syntaxes: + type: object + description: "map of syntaxes used in this transform to their version identifier. " + CreateAuthTokenParams: + type: object + properties: + granteeUsername: type: string - description: "commit message, defaults to blank" - example: "reaname title & fill in supported langages" - bodyPath: + description: "username to grant auth" + example: "keyboard_cat" + granteeProfileID: type: string - description: "path to body data " - filePaths: + description: "profile Identifier to grant token for" + example: "QmemJQrK7PTQvD3n8gmo9JhyaByyLmETiNR1Y8wS7hv4sP" + ttl: type: object - description: "absolute path or URL to the list of dataset files or components to load " - secrets: + description: "lifespan of token in nanoseconds" + example: "2000000000000" + RenderParams: + type: object + properties: + ref: + type: string + description: "Ref is a string reference to the dataset to render " + dataset: type: object - description: "secrets for transform execution. Should be a set of key: value pairs " - ScriptOutput: + description: "Optionally pass an entire dataset in for rendering, if providing a dataset, the Ref field must be empty " + template: type: object - description: "optional writer to have transform script record standard output to note: this won't work over RPC, only on local calls " - apply: - type: boolean - description: "Apply runs a transform script to create the next version to save " - replace: + description: "Optional template override " + useFSI: type: boolean - description: "Replace writes the entire given dataset as a new snapshot instead of applying save params as augmentations to the existing history " - private: + description: "TODO (b5): investigate if this field is still in use " + format: + type: string + description: "Output format. defaults to 'html' " + selector: + type: string + description: "Selector " + User: + type: object + properties: + id: + type: string + + name: + type: string + + email: + type: string + + VersionInfo: + type: object + properties: + initID: + type: string + description: "Key as a stable identifier InitID is derived from the logbook for the dataset " + username: + type: string + description: "Fields from dsref.Ref Username of dataset owner " + profileID: + type: string + description: "ProfileID of dataset owner " + name: + type: string + description: "Unique name reference for this dataset " + path: + type: string + description: "Content-addressed path for this dataset " + published: type: boolean - description: "option to make dataset private. private data is not currently implimented, see https://github.com/qri-io/qri/issues/291 for updates " - convertFormatToPrev: + description: "State about the dataset that can change If true, this dataset has published versions " + foreign: type: boolean - description: "if true, convert body to the format of the previous version, if applicable " - drop: + description: "If true, this reference doesn't exist locally. Only makes sense if path is set, as this flag refers to specific versions, not to entire dataset histories. " + metaTitle: type: string - description: "comma separated list of component names to delete before saving " + description: "Meta fields Title from the meta structure " + themeList: + type: string + description: "List of themes from the meta structure, comma-separated list " + bodySize: + type: number + description: "Structure fields Size of the body in bytes " + bodyRows: + type: number + description: "Num of rows in the body " + bodyFormat: + type: string + description: "Format of the body, such as 'csv' or 'json' " + numErrors: + type: number + description: "Number of errors from the structure " + commitTime: + type: object + description: "Commit fields Timestamp field from the commit " + commitTitle: + type: string + description: "Title field from the commit " + commitMessage: + type: string + description: "Message field from the commit " + workflowID: + type: string + description: "Workflow fields " + workflowtriggerDescription: + type: string + + runID: + type: string + description: "Run Fields RunID is derived from from either the Commit.RunID, field or the runID of a failed run. In the latter case the Path value will be empty " + runStatus: + type: string + description: "RunStatus is a string version of the run.Status enumeration. This value will always be one of: ''|'waiting'|'running'|'succeeded'|'failed'|'unchanged'|'skipped' RunStatus is not stored on a dataset version, and instead must come from either run state or a cache of run state it's of type string to follow the 'plain old data' pattern " + runDuration: + type: object + description: "RunDuration is how long the run took/has currently taken in nanoseconds default value of 0 means no duration data is available. RunDuration is not stored on a dataset version, and instead must come from either run state or a cache of run state " + runCount: + type: number + description: "Aggregate Fields TODO (ramfox): These fields are only temporarily living on `VersionInfo`. They are needed by the frontend to display 'details' about the head of of the dataset. When we get more user feedback and settle what info users want about their datasets, these fields may move to a new struct store, or subsystem. These fields are not derived from any `dataset.Dataset` fields. These fields should only be used in the `collection` package. RunCount is the number of times this dataset's transform has been run " + commitCount: + type: number + description: "CommitCount is the number of commits in this dataset's history " + downloadCount: + type: number + description: "DownloadCount is the number of times this dataset has been directly downloaded from this Qri node " + followerCount: + type: number + description: "FollowerCount is the number of followers this dataset has on this Qri node " + openIssueCount: + type: number + description: "OpenIssueCount is the number of open issues this dataset has on this Qri node " + RemoveParams: + type: object + properties: + ref: + type: string + + revision: + type: object + force: type: boolean - description: "force a new commit, even if no changes are detected " - shouldRender: - type: boolean - description: "save a rendered version of the template along with the dataset " - newName: - type: boolean - description: "new dataset only, don't create a commit on an existing dataset, name will be unused " - ApplyParams: + + SearchParams: + type: object + properties: + q: + type: string + + limit: + type: number + + offset: + type: number + + EmptyParams: + type: object + PushParams: type: object properties: ref: type: string - transform: + remote: + type: string + + all: + type: boolean + description: "All indicates all versions of a dataset and the dataset namespace should be either published or removed " + DiffResponse: + type: object + properties: + stat: type: object - secrets: + schemaStat: type: object - wait: + schema: + type: object + + diff: + type: object + + CSVOptions: + type: object + properties: + headerRow: + type: boolean + description: "HeaderRow specifies weather this csv file has a header row or not " + lazyQuotes: type: boolean + description: "If LazyQuotes is true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field. " + separator: + type: object + description: "Separator is the field delimiter. It is set to comma (',') by NewReader. Comma must be a valid rune and must not be \r, \n, or the Unicode replacement character (0xFFFD). " + variadicFields: + type: boolean + description: "VariadicFields sets permits records to have a variable number of fields avoid using this " + ApplyResult: + type: object + properties: + Data: + type: object - ScriptOutput: + runID: + type: string + + ConnectionsParams: + type: object + properties: + limit: + type: number + + offset: + type: number + + TeardownParams: + type: object + properties: + Config: type: object - description: "TODO(arqu): substitute with websockets when working over the wire " - Hooks: + + RepoPath: + type: string + + ConfigFilepath: + type: string + + VersionInfoAggregator: + type: object + DeployParams: + type: object + properties: + Run: + type: boolean + + Workflow: type: object - WorkflowParams: + Dataset: + type: object + + RefListParams: + type: object + properties: + Ref: + type: string + description: "String value of a reference " + Offset: + type: number + description: "Pagination Parameters " + CollectionGetParams: + type: object + properties: + ref: + type: string + + initID: + type: string + + Stats: type: object properties: - workflowID: + path: type: string - initID: + qri: type: string - ref: - type: string + stats: + type: object Meta: type: object @@ -3026,73 +3189,73 @@ components: version: type: string description: "Version is the version identifier for this dataset " - Commit: + Dataset: type: object properties: - author: + body: type: object - description: "Author of this commit " - message: - type: string - description: "Message is an optional " - path: - type: string - description: "Path is the location of this commit, transient derived " - qri: + description: "Body represents dataset data with native go types. Datasets have at most one body. Body, BodyBytes, and BodyPath work together, often with only one field used at a time " + bodyBytes: + type: object + description: "BodyBytes is for representing dataset data as a slice of bytes " + bodyPath: type: string - description: "Qri is this commit's qri kind derived " - signature: + description: "BodyPath is the path to the hash of raw data as it resolves on the network " + commit: + type: object + description: "Commit contains author & change message information that describes this version of a dataset " + id: type: string - description: "Signature is a base58 encoded privateKey signing of Title " - timestamp: + description: "ID is an identifier string for this dataset. " + meta: type: object - description: "Time this dataset was created. Required. " - title: + description: "Meta contains all human-readable meta about this dataset intended to aid in discovery and organization of this document " + name: type: string - description: "Title of the commit. Required. " - runID: + description: "name reference for this dataset, transient " + path: type: string - description: "RunID is only present if an automated script was executed durning the commit time Commits with non-empty `RunID`s imply the existance of a transform component " - ConnectParamsPod: - type: object - properties: + description: "Location of this dataset, transient " peername: type: string - - profileID: + description: "Peername of dataset owner, transient " + previousPath: type: string - - networkID: + description: "PreviousPath connects datasets to form a historical merkle-DAG of snapshots of this document, creating a version history " + profileID: type: string - - multiaddr: + description: "ProfileID of dataset owner, transient " + readme: + type: object + description: "Readme is a path to the readme file for this dataset " + numVersions: + type: number + description: "Number of versions this dataset has, transient " + qri: type: string - - Viz: + description: "Qri is a key for both identifying this document type, and versioning the dataset document definition itself. derived " + structure: + type: object + description: "Structure of this dataset " + stats: + type: object + description: "Stats is a component containing statistical metadata about the dataset body " + transform: + type: object + description: "Transform is a path to the transformation that generated this resource " + viz: + type: object + description: "Viz stores configuration data related to representing a dataset as a visualization " + JSONOptions: type: object properties: - format: - type: string - description: "Format designates the visualization configuration syntax. currently the only supported syntax is 'html' " - path: - type: string - description: "Path is the location of a viz, transient derived " - qri: - type: string - description: "Qri should always be 'vc:0' derived " - scriptPath: - type: string - description: "ScriptPath is the path to the script that created this " - text: - type: string - description: "Text contains the contents of the script, transient " - renderedPath: - type: string - description: "RenderedPath is the path to the file rendered using the viz script and the body " - ManifestParams: + Options: + type: object + + TransformResource: type: object properties: - ref: + path: type: string SetupParams: @@ -3119,264 +3282,141 @@ components: Generator: type: object description: "setup requires a crypto source " - ChangeReportParams: + ConnectParamsPod: type: object properties: - leftRef: + peername: type: string - rightRef: + profileID: type: string - Structure: - type: object - properties: - checksum: - type: string - description: "Checksum is a bas58-encoded multihash checksum of the entire data file this structure points to. This is different from IPFS hashes, which are calculated after breaking the file into blocks derived " - compression: - type: string - description: "Compression specifies any compression on the source data, if empty assume no compression " - depth: - type: number - description: "Maximum nesting level of composite types in the dataset. eg: depth 1 == [], depth 2 == [[]] derived " - encoding: - type: string - description: "Encoding specifics character encoding, assume utf-8 if not specified " - errCount: - type: number - description: "ErrCount is the number of errors returned by validating data against this schema. required derived " - entries: - type: number - description: "Entries is number of top-level entries in the dataset. With tablular data this is the same as the number of 'rows' derived " - format: - type: string - description: "Format specifies the format of the raw data MIME type " - formatConfig: - type: object - description: "FormatConfig removes as much ambiguity as possible about how to interpret the speficied format. FormatConfig FormatConfig `json:'formatConfig,omitempty'` " - length: - type: number - description: "Length is the length of the data object in bytes. must always match & be present derived " - path: - type: string - description: "location of this structure, transient derived " - qri: - type: string - description: "Qri should always be KindStructure derived " - schema: - type: object - description: "Schema contains the schema definition for the underlying data, schemas are defined using the IETF json-schema specification. for more info on json-schema see: https://json-schema.org " - strict: - type: boolean - description: "Strict requires schema validation to pass without error. Datasets with strict: true can have additional functionality and performance speedups that comes with being able to assume that all data is valid " - PreviewParams: - type: object - properties: - ref: + networkID: type: string - SearchParams: - type: object - properties: - q: + multiaddr: type: string - limit: - type: number - - offset: - type: number - - ParseError: + WorkflowParams: type: object properties: - Message: + workflowID: type: string - Ref: - type: object - properties: initID: type: string - description: "InitID is the canonical identifer for a dataset history " - username: - type: string - description: "Username of dataset owner " - profileID: - type: string - description: "ProfileID of dataset owner deprecated - avoid using this field, we're working towards removing it generally profile IDs should come from request scopes, or be fetched from stores of identity info (profile.Store) " - name: - type: string - description: "Unique name reference for this dataset " - path: - type: string - description: "Content-addressed path for this dataset " - ManifestMissingParams: - type: object - properties: - manifest: - type: object - PullParams: - type: object - properties: ref: type: string - logsOnly: - type: boolean - description: "only fetch logbook data " - RemoveResponse: + ManifestParams: type: object properties: ref: - type: string - - numDeleted: - type: number - - message: - type: string - - unlinked: - type: boolean - - PeerInfoParams: - type: object - properties: - peername: - type: string - - profileID: - type: string - - verbose: - type: boolean - description: "Verbose adds network details from the p2p Peerstore " - SetProfileParams: - type: object - properties: - pro: - type: object - - License: - type: object - properties: - type: - type: string - - url: - type: string - - PeerListParams: - type: object - properties: - limit: - type: number - - offset: - type: number - - cached: - type: boolean - description: "Cached == true will return offline peers from the repo as well as online peers, default is to list connected peers only " - JSONOptions: + type: string + + RunParams: type: object properties: - Options: - type: object + ref: + type: string - CreateAuthTokenParams: + initID: + type: string + + workflowID: + type: string + + Readme: type: object properties: - granteeUsername: + format: type: string - description: "username to grant auth" - example: "keyboard_cat" - granteeProfileID: + description: "Format designates the visualization configuration syntax. Only supported formats are 'html' and 'md' " + path: type: string - description: "profile Identifier to grant token for" - example: "QmemJQrK7PTQvD3n8gmo9JhyaByyLmETiNR1Y8wS7hv4sP" - ttl: - type: object - description: "lifespan of token in nanoseconds" - example: "2000000000000" - GetResult: + description: "Path is the location of a readme, transient derived " + qri: + type: string + description: "Qri should always be 'rm:0' derived " + scriptPath: + type: string + description: "ScriptPath is the path to the script that created this " + text: + type: string + description: "Text contains the contents of the script, transient " + renderedPath: + type: string + description: "RenderedPath is the path to the file rendered using the readme script and the body " + MemResolver: type: object properties: - value: + Username: + type: string + + RefMap: type: object - bytes: + IDMap: type: object - TeardownParams: + DAGInfoParams: type: object properties: - Config: - type: object - - RepoPath: + ref: type: string - ConfigFilepath: + label: type: string - DiffResponse: + RegistryProfileParams: type: object properties: - stat: - type: object - - schemaStat: - type: object - - schema: - type: object - - diff: + Profile: type: object - ApplyResult: + Rev: type: object properties: - Data: - type: object - - runID: + Field: + type: string + description: "field scopt, currently can only be a component name, or the entire dataset " + Gen: + type: number + description: "the nth-generational ancestor of a history " + XLSXOptions: + type: object + properties: + sheetName: type: string - RemoveParams: + PullParams: type: object properties: ref: type: string - revision: - type: object - - force: + logsOnly: type: boolean - - DiffParams: + description: "only fetch logbook data " + ChangeReportParams: type: object properties: - leftPath: - type: string - description: "File paths or reference to datasets " - rightPath: + leftRef: type: string - WorkingDir: - type: string - description: "If not null, the working directory that the diff is using " - UseLeftPrevVersion: - type: boolean - description: "Whether to get the previous version of the left parameter " - Selector: + rightRef: type: string - description: "Which component or part of a dataset to compare " + + GetResult: + type: object + properties: + value: + type: object + + bytes: + type: object + ValidateResponse: type: object properties: @@ -3386,81 +3426,94 @@ components: errors: type: object description: "Validation Errors " - RawLogbookParams: - type: object - Stats: + License: type: object properties: - path: + type: type: string - qri: + url: type: string - stats: - type: object - - Readme: + Theme: type: object properties: - format: + description: type: string - description: "Format designates the visualization configuration syntax. Only supported formats are 'html' and 'md' " - path: + + display_name: type: string - description: "Path is the location of a readme, transient derived " - qri: + + image_display_url: type: string - description: "Qri should always be 'rm:0' derived " - scriptPath: + + id: type: string - description: "ScriptPath is the path to the script that created this " - text: + + name: type: string - description: "Text contains the contents of the script, transient " - renderedPath: + + title: type: string - description: "RenderedPath is the path to the file rendered using the readme script and the body " - Transform: + + SaveParams: type: object properties: - config: + Dataset: type: object - description: "Config outlines any configuration that would affect the resulting hash " - path: + description: "dataset supplies params directly, all other param fields override values supplied by dataset " + ref: type: string - description: "location of the transform object, transient " - qri: + description: "dataset reference string, the name to save to" + example: "b5/world_bank_population" + title: type: string - description: "Kind should always equal KindTransform " - resources: - type: object - description: "Resources is a map of all datasets referenced in this transform, with alphabetical keys generated by datasets in order of appearance within the transform " - scriptPath: + description: "commit title, defaults to a generated string based on diff" + example: "update dataset meta" + Message: type: string - description: "ScriptPath is the path to the script that produced this transformation. Deprecated - use Steps instead " - text: + description: "commit message, defaults to blank" + example: "reaname title & fill in supported langages" + bodyPath: type: string - description: "Text contains the contents of the script, transient " + description: "path to body data " + filePaths: + type: object + description: "absolute path or URL to the list of dataset files or components to load " secrets: type: object - description: "Secrets is a map of secret values used in the transformation, transient. TODO (b5): make this not-transient by censoring the values used, but not keys " - steps: + description: "secrets for transform execution. Should be a set of key: value pairs " + ScriptOutput: type: object - - syntax: - type: string - description: "Syntax this transform was written in Deprecated - syntax is defined per-step " - syntaxVersion: + description: "optional writer to have transform script record standard output to note: this won't work over RPC, only on local calls " + apply: + type: boolean + description: "Apply runs a transform script to create the next version to save " + replace: + type: boolean + description: "Replace writes the entire given dataset as a new snapshot instead of applying save params as augmentations to the existing history " + private: + type: boolean + description: "option to make dataset private. private data is not currently implimented, see https://github.com/qri-io/qri/issues/291 for updates " + convertFormatToPrev: + type: boolean + description: "if true, convert body to the format of the previous version, if applicable " + drop: type: string - description: "SyntaxVersion is an identifier for the application and version number that produced the result Deprecated - use steps.Syntax with a version suffix instead " - syntaxes: - type: object - description: "map of syntaxes used in this transform to their version identifier. " - XLSXOptions: + description: "comma separated list of component names to delete before saving " + force: + type: boolean + description: "force a new commit, even if no changes are detected " + shouldRender: + type: boolean + description: "save a rendered version of the template along with the dataset " + newName: + type: boolean + description: "new dataset only, don't create a commit on an existing dataset, name will be unused " + ParseError: type: object properties: - sheetName: + Message: type: string Citation: @@ -3475,6 +3528,30 @@ components: email: type: string + TransformStep: + type: object + properties: + name: + type: string + + path: + type: string + + syntax: + type: string + + category: + type: string + + script: + type: object + + PreviewParams: + type: object + properties: + ref: + type: string + ### Response Schemas ## Base APIResponse: diff --git a/api/spec/testdata/aggregate.json b/api/spec/testdata/aggregate.json index fccf4bd3d..7652a0f04 100644 --- a/api/spec/testdata/aggregate.json +++ b/api/spec/testdata/aggregate.json @@ -28,6 +28,20 @@ } } }, + { + "endpoint": "/collection/get", + "method": "POST", + "headers": { + "Content-Type": "application/json" + }, + "body": { + "ref": "peer/movies" + }, + "expect": { + "code": 200, + "Content-Type": "application/json" + } + }, { "endpoint": "/diff", "method": "POST", @@ -62,4 +76,4 @@ } } } -] \ No newline at end of file +] From 6dc15c80d604a55d068a81b3b11710be7737271e Mon Sep 17 00:00:00 2001 From: ramfox Date: Thu, 2 Sep 2021 11:23:55 -0400 Subject: [PATCH 6/6] feat(api): add `ETDatasetDownload` when a user hits certain "get" endpoints The frontend considers using the "http://localhost:2503/ds/get/username/dataset_name/body", "http://localhost:2503/ds/get/username/dataset_name/body.csv" and "http://localhost:2503/ds/get/username/dataset_name?format=zip" endpoints as performing a "dataset download". These events need to be tracked in the collection, so we emit the `ETDatasetDownload` event for each circumstance. --- api/handlers.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/api/handlers.go b/api/handlers.go index 5cf13c642..9e9544e72 100644 --- a/api/handlers.go +++ b/api/handlers.go @@ -1,6 +1,7 @@ package api import ( + "context" "encoding/json" "fmt" "io/ioutil" @@ -8,6 +9,7 @@ import ( "github.com/qri-io/qri/api/util" "github.com/qri-io/qri/base/archive" + "github.com/qri-io/qri/event" "github.com/qri-io/qri/lib" ) @@ -38,6 +40,7 @@ func GetBodyCSVHandler(inst *lib.Instance) http.HandlerFunc { util.RespondWithError(w, err) return } + publishDownloadEvent(r.Context(), inst, p.Ref) writeFileResponse(w, outBytes, "body.csv", "csv") } } @@ -71,6 +74,8 @@ func GetHandler(inst *lib.Instance, routePrefix string) http.HandlerFunc { util.RespondWithError(w, err) return } + + publishDownloadEvent(r.Context(), inst, p.Ref) writeFileResponse(w, outBytes, "body.csv", "csv") return @@ -87,6 +92,7 @@ func GetHandler(inst *lib.Instance, routePrefix string) http.HandlerFunc { util.RespondWithError(w, err) return } + publishDownloadEvent(r.Context(), inst, p.Ref) writeFileResponse(w, zipResults.Bytes, zipResults.GeneratedName, "zip") return @@ -192,3 +198,12 @@ func arrayContains(subject []string, target string) bool { } return false } + +func publishDownloadEvent(ctx context.Context, inst *lib.Instance, refStr string) { + ref, _, err := inst.ParseAndResolveRef(ctx, refStr, "local") + if err != nil { + log.Debugw("api.GetBodyCSVHandler - unable to resolve ref %q", err) + return + } + inst.Bus().Publish(ctx, event.ETDatasetDownload, ref.InitID) +}