Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions fieldpath/element.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,27 +226,19 @@ func KeyByFields(nameValues ...interface{}) *value.FieldList {
// PathElementSet is a set of path elements.
// TODO: serialize as a list.
type PathElementSet struct {
members sortedPathElements
members []PathElement
}

func MakePathElementSet(size int) PathElementSet {
return PathElementSet{
members: make(sortedPathElements, 0, size),
members: make([]PathElement, 0, size),
}
}

type sortedPathElements []PathElement

// Implement the sort interface; this would permit bulk creation, which would
// be faster than doing it one at a time via Insert.
func (spe sortedPathElements) Len() int { return len(spe) }
func (spe sortedPathElements) Less(i, j int) bool { return spe[i].Less(spe[j]) }
func (spe sortedPathElements) Swap(i, j int) { spe[i], spe[j] = spe[j], spe[i] }

// Copy returns a copy of the PathElementSet.
// This is not a full deep copy as any contained value.Value is not copied.
func (s PathElementSet) Copy() PathElementSet {
out := make(sortedPathElements, len(s.members))
out := make([]PathElement, len(s.members))
for i := range s.members {
out[i] = s.members[i].Copy()
}
Expand Down
151 changes: 58 additions & 93 deletions fieldpath/serialize-pe.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,103 +17,70 @@ limitations under the License.
package fieldpath

import (
"bytes"
"errors"
"fmt"
"io"
"strconv"
"strings"

jsoniter "github.com/json-iterator/go"
"github.com/go-json-experiment/json"
"sigs.k8s.io/structured-merge-diff/v6/value"
)

var ErrUnknownPathElementType = errors.New("unknown path element type")

const (
// Field indicates that the content of this path element is a field's name
peField = "f"
peField = 'f'

// Value indicates that the content of this path element is a field's value
peValue = "v"
peValue = 'v'

// Index indicates that the content of this path element is an index in an array
peIndex = "i"
peIndex = 'i'

// Key indicates that the content of this path element is a key value map
peKey = "k"
peKey = 'k'

// Separator separates the type of a path element from the contents
peSeparator = ":"
peSeparator = ':'
)

var (
peFieldSepBytes = []byte(peField + peSeparator)
peValueSepBytes = []byte(peValue + peSeparator)
peIndexSepBytes = []byte(peIndex + peSeparator)
peKeySepBytes = []byte(peKey + peSeparator)
peSepBytes = []byte(peSeparator)
peFieldSepBytes = []byte{peField, peSeparator}
peValueSepBytes = []byte{peValue, peSeparator}
peIndexSepBytes = []byte{peIndex, peSeparator}
peKeySepBytes = []byte{peKey, peSeparator}
)

// readJSONIter reads a Value from a JSON iterator.
// DO NOT EXPORT
// TODO: eliminate this https://github.com/kubernetes-sigs/structured-merge-diff/issues/202
func readJSONIter(iter *jsoniter.Iterator) (value.Value, error) {
v := iter.Read()
if iter.Error != nil && iter.Error != io.EOF {
return nil, iter.Error
}
return value.NewValueInterface(v), nil
}

// writeJSONStream writes a value into a JSON stream.
// DO NOT EXPORT
// TODO: eliminate this https://github.com/kubernetes-sigs/structured-merge-diff/issues/202
func writeJSONStream(v value.Value, stream *jsoniter.Stream) {
stream.WriteVal(v.Unstructured())
}

// DeserializePathElement parses a serialized path element
func DeserializePathElement(s string) (PathElement, error) {
Copy link
Contributor

@liggitt liggitt Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jpbetz @lalitc375: let's augment the unit tests that exercise successful DeserializePathElement calls (TestPathElementRoundTrip?) to also assert the in-memory state returned, not just the round-trip

b := []byte(s)
if len(b) < 2 {
return PathElement{}, errors.New("key must be 2 characters long:")
if len(s) < 2 {
return PathElement{}, errors.New("key must be 2 characters long")
}
typeSep, b := b[:2], b[2:]
if typeSep[1] != peSepBytes[0] {
typeSep0, typeSep1 := s[0], s[1]
if typeSep1 != peSeparator {
return PathElement{}, fmt.Errorf("missing colon: %v", s)
}
switch typeSep[0] {
switch typeSep0 {
case peFieldSepBytes[0]:
// Slice s rather than convert b, to save on
// allocations.
str := s[2:]
return PathElement{
FieldName: &str,
}, nil
case peValueSepBytes[0]:
iter := readPool.BorrowIterator(b)
defer readPool.ReturnIterator(iter)
v, err := readJSONIter(iter)
if err != nil {
var v any
if err := json.UnmarshalRead(strings.NewReader(s[2:]), &v); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jpbetz @lalitc375: can we add test-cases to ensure behave identically between readJSONIter / json.UnmarshalRead

  • zero-length - json-iter masks, json/v2 returns EOF error? https://go.dev/play/p/D80jALZ7ZRq
  • non-zero length with EOF (not sure this is possible)
  • leading / trailing whitespace in value
  • multi-token should fail identically

return PathElement{}, err
}
return PathElement{Value: &v}, nil
interfaceValue := value.NewValueInterface(v)
return PathElement{Value: &interfaceValue}, nil
case peKeySepBytes[0]:
iter := readPool.BorrowIterator(b)
defer readPool.ReturnIterator(iter)
fields := value.FieldList{}

iter.ReadObjectCB(func(iter *jsoniter.Iterator, key string) bool {
v, err := readJSONIter(iter)
if err != nil {
iter.Error = err
return false
}
fields = append(fields, value.Field{Name: key, Value: v})
return true
})
fields.Sort()
return PathElement{Key: &fields}, iter.Error
var fields value.FieldList
if err := json.UnmarshalRead(strings.NewReader(s[2:]), &fields); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jpbetz @lalitc375: can we add test-cases to exercise

  • zero-length
  • EOF
  • leading / trailing whitespace in value
  • multi-token should fail identically

return PathElement{}, err
}
return PathElement{Key: &fields}, nil
case peIndexSepBytes[0]:
i, err := strconv.Atoi(s[2:])
if err != nil {
Expand All @@ -127,60 +94,58 @@ func DeserializePathElement(s string) (PathElement, error) {
}
}

var (
readPool = jsoniter.NewIterator(jsoniter.ConfigCompatibleWithStandardLibrary).Pool()
writePool = jsoniter.NewStream(jsoniter.ConfigCompatibleWithStandardLibrary, nil, 1024).Pool()
)

// SerializePathElement serializes a path element
func SerializePathElement(pe PathElement) (string, error) {
buf := strings.Builder{}
err := serializePathElementToWriter(&buf, pe)
return buf.String(), err
serializer := pathElementSerializer{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there was another question about whether the pool is giving us value, but if we end up keeping it, is there a reason to construct this instance here instead of using one from the pool?

if err := serializer.serialize(pe); err != nil {
return "", err
}
return serializer.builder.String(), nil
}

func serializePathElementToWriter(w io.Writer, pe PathElement) error {
stream := writePool.BorrowStream(w)
defer writePool.ReturnStream(stream)
type pathElementSerializer struct {
builder bytes.Buffer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we ultimately want a string out of SerializePathElement, is it possible to use a strings.Builder here and get the same performance out of json/v2?

let's try each of these steps, benchmarking as we go:

  1. switch to strings.Builder
  2. see if we still need the pathElementSerializer pool and drop it if not

fastValue value.FastMarshalValue
}

func (pes *pathElementSerializer) reset() {
pes.builder.Reset()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we end up keeping the pool, if the builder is "big", we want to nil it out instead of Resetting it and retaining a potentially very large buffer. Make a constant, set it to something like 1kb for now, and if the builder is bigger than that, nil it out instead of calling reset

pes.fastValue.Value = nil
}

func (pes *pathElementSerializer) serialize(pe PathElement) error {
switch {
case pe.FieldName != nil:
if _, err := stream.Write(peFieldSepBytes); err != nil {
if _, err := pes.builder.Write(peFieldSepBytes); err != nil {
return err
}
if _, err := pes.builder.WriteString(*pe.FieldName); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jpbetz @lalitc375: ensure there's test coverage hitting this path with values like these:

  • values that must be escaped/encoded ("\u... \n ...")
  • multi-byte keys ("Iñtërnâtiônàlizætiøn,💝🐹🌇⛔")
  • keys containing bytes that are invalid unicode characters

we want to make sure the new code behaves like json-iterator (or that we understand the differences)

return err
}
stream.WriteRaw(*pe.FieldName)
case pe.Key != nil:
if _, err := stream.Write(peKeySepBytes); err != nil {
if _, err := pes.builder.Write(peKeySepBytes); err != nil {
return err
}
stream.WriteObjectStart()

for i, field := range *pe.Key {
if i > 0 {
stream.WriteMore()
}
stream.WriteObjectField(field.Name)
writeJSONStream(field.Value, stream)
if err := json.MarshalWrite(&pes.builder, pe.Key, json.Deterministic(true)); err != nil {
return err
}
stream.WriteObjectEnd()
case pe.Value != nil:
if _, err := stream.Write(peValueSepBytes); err != nil {
if _, err := pes.builder.Write(peValueSepBytes); err != nil {
return err
}
pes.fastValue.Value = pe.Value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we only using the fastValue field to wrap the Value to get the FastMarshalValue#MarshalJSONTo signature?

it's pretty strange to mutate pathElementSerializer as part of calling serialize

is there a different way to get the custom encoding behavior without that struct field? something like one of these:

  • making FastMarshalValue a type alias instead of an instance, like json.MarshalWrite(&pes.builder, value.FastMarshalValue(pe.Value), json.Deterministic(true))
  • defining custom marshalers for reflectValue and unstructuredValue, like json.MarshalWrite(&pes.builder, pe.Value, json.Deterministic(true), json.WithMarshalers(... custom Value marshalers ...))

That would let us drop the fastValue struct field, avoid mutating the serializer, while still avoiding allocs of a new FastMarshalValue instance

cc @jpbetz @lalitc375 for help investigating these two options and picking one

if err := json.MarshalWrite(&pes.builder, &pes.fastValue, json.Deterministic(true)); err != nil {
return err
}
writeJSONStream(*pe.Value, stream)
case pe.Index != nil:
if _, err := stream.Write(peIndexSepBytes); err != nil {
if _, err := pes.builder.Write(peIndexSepBytes); err != nil {
return err
}
if _, err := pes.builder.WriteString(strconv.Itoa(*pe.Index)); err != nil {
return err
}
stream.WriteInt(*pe.Index)
default:
return errors.New("invalid PathElement")
}
b := stream.Buffer()
err := stream.Flush()
// Help jsoniter manage its buffers--without this, the next
// use of the stream is likely to require an allocation. Look
// at the jsoniter stream code to understand why. They were probably
// optimizing for folks using the buffer directly.
stream.SetBuffer(b[:0])
return err
return nil
}
Loading