Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion packages/ddex/ingester/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ type Release struct {
PublishErrors []string `bson:"publish_errors"`
FailureCount int `bson:"failure_count"`
ReleaseStatus string `bson:"release_status"`
CreatedAt time.Time `bson:"created_at"`
IsUpdate bool `bson:"is_update"`
LastParsed time.Time `bson:"last_parsed"`

// Parsed from the release's XML
ReleaseProfile ReleaseProfile `bson:"release_profile"` // "ReleaseProfileVersionId" from the DDEX XML
Expand Down
20 changes: 12 additions & 8 deletions packages/ddex/ingester/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,18 @@ const (
)

const (
ReleaseStatusAwaitingParse = "awaiting_parse" // The release is waiting to be published
ReleaseStatusAwaitingPublish = "awaiting_publish" // The release is waiting to be uploaded to Audius
ReleaseStatusErrorUserMatch = "error_user_match" // The release didn't have a user that matched with an OAuthed Audius user
ReleaseStatusErrorGenreMatch = "error_genre_match" // The release didn't have a genre that matched with an Audius genre
ReleaseStatusErrorParsing = "error_parsing" // Some other error occurred during parsing. See ParseErrors
ReleaseStatusErrorDuringUpload = "error_during_upload" // An error occurred while trying to publish to Audius
ReleaseStatusErrorAfterUpload = "error_after_upload" // The release was published to Audius, but there was an error after publishing
ReleaseStatusPublished = "published" // The release was successfully published to Audius
ReleaseStatusAwaitingParse = "awaiting_parse" // The release is waiting to be published
ReleaseStatusAwaitingPublish = "awaiting_publish" // The release is waiting to be uploaded to Audius
ReleaseStatusErrorUserMatch = "error_user_match" // The release didn't have a user that matched with an OAuthed Audius user
ReleaseStatusErrorGenreMatch = "error_genre_match" // The release didn't have a genre that matched with an Audius genre
ReleaseStatusErrorParsing = "error_parsing" // Some other error occurred during parsing. See ParseErrors
ReleaseStatusFailedDuringUpload = "failed_during_upload" // An error occurred while trying to publish to Audius
ReleaseStatusFailedAfterUpload = "failed_after_upload" // The release was published to Audius, but there was an error after publishing
ReleaseStatusPublished = "published" // The release was successfully published to Audius
ReleaseStatusAwaitingDelete = "awaiting_delete" // The release is waiting to be removed from Audius
ReleaseStatusDeleted = "deleted" // The release was successfully removed from Audius
ReleaseStatusFailedDuringDelete = "failed_during_delete" // An error occurred while trying to delete from Audius
ReleaseStatusFailedAfterDelete = "failed_after_delete" // The release was removed from Audius, but there was an error after deletion
)

var SkipFiles = []string{".DS_STORE", "__MACOSX"}
2 changes: 1 addition & 1 deletion packages/ddex/ingester/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ func (c *Crawler) upsertXML(key string, lastModified time.Time) (err error) {
ReleaseID: strings.TrimSuffix(filepath.Base(key), ".xml"),
XMLRemotePath: remotePath,
RawXML: primitive.Binary{Data: xmlBytes, Subtype: 0x00},
CreatedAt: lastModified,
LastParsed: lastModified,
ParseErrors: []string{},
PublishErrors: []string{},
FailureCount: 0,
Expand Down
107 changes: 81 additions & 26 deletions packages/ddex/ingester/parser/ern38x.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
package parser

import (
"context"
"fmt"
"ingester/common"
"ingester/constants"
"reflect"
"regexp"
"slices"
"sort"
"strconv"
"time"

"github.com/antchfx/xmlquery"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
)

// SoundRecording represents the parsed details of a sound recording
Expand Down Expand Up @@ -103,9 +108,45 @@ type ResourceGroupContentItem struct {
Image *Image
}

// purgeERN38x parses the given XML data and marks a release to be taken down from Audius.
// NOTE: This expects the ERN 3 format. See https://kb.ddex.net/implementing-each-standard/electronic-release-notification-message-suite-(ern)/ern-3-explained/
func purgeERN38x(doc *xmlquery.Node, release *common.Release, releasesColl *mongo.Collection) error {
// Parse <Release>s from <ReleaseList>
releaseNodes := xmlquery.Find(doc, "//ReleaseList/Release")
if len(releaseNodes) == 0 {
return fmt.Errorf("no <Release> found")
}
for _, rNode := range releaseNodes {
releaseIDNode := rNode.SelectElement("ReleaseId")
if releaseIDNode == nil {
return fmt.Errorf("no <ReleaseId> found")
}
releaseIDs := getReleaseIDs(releaseIDNode)

@michellebrier michellebrier Apr 30, 2024

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse all known release IDs (GRid, ISRC, ICPN) in a <ReleaseId> node. When required, as is here, try each of these IDs when searching the releases collection until the first match.

The _id in a releases document is the ID we pull from the filename. We don't actually know which ID type this corresponds to and in the examples we have, there are often multiple ID types listed in the <ReleaseId> node.

releaseIDsVal := reflect.ValueOf(releaseIDs)
for i := 0; i < releaseIDsVal.NumField(); i++ {
releaseID := releaseIDsVal.Field(i).String()
if releaseID == "" {
continue
}

// Take down the release with this ID, if any
existingRelease, err := getExistingRelease(releaseID, releasesColl)
if err == mongo.ErrNoDocuments {
continue
} else if err != nil {
return err
} else {
takedownRelease(existingRelease, release)
break
}
}
}
return nil
}

// parseERN38x parses the given XML data and returns a release ready to be uploaded to Audius.
// NOTE: This expects the ERN 3 format. See https://kb.ddex.net/implementing-each-standard/electronic-release-notification-message-suite-(ern)/ern-3-explained/
func parseERN38x(doc *xmlquery.Node, crawledBucket string, release *common.Release) (errs []error) {
func parseERN38x(doc *xmlquery.Node, crawledBucket string, release *common.Release, releasesColl *mongo.Collection) (errs []error) {
var (
soundRecordings []SoundRecording
images []Image
Expand Down Expand Up @@ -173,8 +214,19 @@ func parseERN38x(doc *xmlquery.Node, crawledBucket string, release *common.Relea
// Parse <ReleaseDeal>s from <DealList>
dealNodes := xmlquery.Find(doc, "//DealList/ReleaseDeal")
if len(dealNodes) == 0 {
errs = append(errs, fmt.Errorf("no <ReleaseDeal> found"))
return
// Check for an existing release to determine whether this is a takedown request or an invalid NewReleaseMessage
existingRelease, err := getExistingRelease(release.ReleaseID, releasesColl)
if err == mongo.ErrNoDocuments {
// This is a NewReleaseMessage that should have a deal
errs = append(errs, fmt.Errorf("no <ReleaseDeal> found"))
return
} else if err != nil {
errs = append(errs, err)
return
} else {
// This is a takedown request. Mark the release for deletion
takedownRelease(existingRelease, release)
}
}
for _, dNode := range dealNodes {
err := processDealNode(dNode, release)
Expand Down Expand Up @@ -444,7 +496,6 @@ func processReleaseNode(rNode *xmlquery.Node, soundRecordings *[]SoundRecording,
releaseRef := safeInnerText(rNode.SelectElement("ReleaseReference"))
globalOriginalReleaseDateStr := safeInnerText(rNode.SelectElement("GlobalOriginalReleaseDate")) // Some suppliers (not Fuga) use this
durationISOStr := safeInnerText(rNode.SelectElement("Duration")) // Only the Sony example uses this. Other suppliers use it in the SoundRecording
isrc := safeInnerText(rNode.SelectElement("ReleaseId/ISRC"))
copyrightYear := safeInnerText(rNode.SelectElement("CLine/Year"))
copyrightText := safeInnerText(rNode.SelectElement("CLine/CLineText"))
producerCopyrightYear := safeInnerText(rNode.SelectElement("PLine/Year"))
Expand Down Expand Up @@ -613,28 +664,12 @@ func processReleaseNode(rNode *xmlquery.Node, soundRecordings *[]SoundRecording,
}

r = &common.ParsedReleaseElement{
IsMainRelease: rNode.SelectAttr("IsMainRelease") == "true",
ReleaseRef: releaseRef,
ReleaseDate: releaseDate,
Resources: resources,
ReleaseType: releaseType,
ReleaseIDs: common.ReleaseIDs{
PartyID: safeInnerText(rNode.SelectElement("ReleaseId/PartyId")),
CatalogNumber: safeInnerText(rNode.SelectElement("ReleaseId/CatalogNumber")),
ICPN: safeInnerText(rNode.SelectElement("ReleaseId/ICPN")),
GRid: safeInnerText(rNode.SelectElement("ReleaseId/GRid")),
ISAN: safeInnerText(rNode.SelectElement("ReleaseId/ISAN")),
ISBN: safeInnerText(rNode.SelectElement("ReleaseId/ISBN")),
ISMN: safeInnerText(rNode.SelectElement("ReleaseId/ISMN")),
ISRC: isrc,
ISSN: safeInnerText(rNode.SelectElement("ReleaseId/ISSN")),
ISTC: safeInnerText(rNode.SelectElement("ReleaseId/ISTC")),
ISWC: safeInnerText(rNode.SelectElement("ReleaseId/ISWC")),
MWLI: safeInnerText(rNode.SelectElement("ReleaseId/MWLI")),
SICI: safeInnerText(rNode.SelectElement("ReleaseId/SICI")),
ProprietaryID: safeInnerText(rNode.SelectElement("ReleaseId/ProprietaryId")),
},

IsMainRelease: rNode.SelectAttr("IsMainRelease") == "true",
ReleaseRef: releaseRef,
ReleaseDate: releaseDate,
Resources: resources,
ReleaseType: releaseType,
ReleaseIDs: getReleaseIDs(rNode.SelectElement("ReleaseId")),
DisplayTitle: safeInnerText(releaseDetails.SelectElement("Title[@TitleType='DisplayTitle']/TitleText")), // TODO: This assumes there aren't multiple titles in different languages (ie, different `LanguageAndScriptCode` attributes)
DisplaySubtitle: stringPtr(safeInnerText(releaseDetails.SelectElement("Title[@TitleType='DisplayTitle']/SubTitle"))),
FormalTitle: stringPtr(safeInnerText(releaseDetails.SelectElement("Title[@TitleType='FormalTitle']/TitleText"))),
Expand Down Expand Up @@ -1240,6 +1275,26 @@ func processResourceGroup(node *xmlquery.Node, parentSequence int, contentItems
}
}

func getExistingRelease(releaseID string, releasesColl *mongo.Collection) (common.Release, error) {
var existingRelease common.Release
filter := bson.M{"_id": releaseID}
err := releasesColl.FindOne(context.Background(), filter).Decode(&existingRelease)
return existingRelease, err
}

func takedownRelease(existingRelease common.Release, releaseToUpsert *common.Release) {
switch existingRelease.ReleaseStatus {
case constants.ReleaseStatusPublished, constants.ReleaseStatusFailedAfterUpload, constants.ReleaseStatusFailedDuringDelete:
// Has been published to Audius. Mark for deletion by the publisher
releaseToUpsert.ReleaseStatus = constants.ReleaseStatusAwaitingDelete
releaseToUpsert.EntityID = existingRelease.EntityID
releaseToUpsert.SDKUploadMetadata = existingRelease.SDKUploadMetadata
default:
// Has not yet been published to Audius. Mark as deleted
releaseToUpsert.ReleaseStatus = constants.ReleaseStatusDeleted
}
}

func safeInnerText(node *xmlquery.Node) string {
if node != nil {
return node.InnerText()
Expand Down
Loading