-
Notifications
You must be signed in to change notification settings - Fork 196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
yamldot: Dotted-path YAML manipulation #4465
Open
weikanglim
wants to merge
2
commits into
Azure:main
Choose a base branch
from
weikanglim:weilim-yamlnode
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -227,5 +227,6 @@ webfrontend | |
westus2 | ||
wireinject | ||
yacspin | ||
yamlnode | ||
ymlt | ||
zerr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,203 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
// yamlnode allows for manipulation of YAML nodes using a dotted-path syntax. | ||
// | ||
// Examples of dotted-path syntax: | ||
// - a.object.key | ||
// - b.item_list[1] | ||
package yamlnode | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/braydonk/yaml" | ||
) | ||
|
||
var ErrNodeNotFound = errors.New("node not found") | ||
|
||
// ErrNodeWrongKind is returned when the node kind is not as expected. | ||
// | ||
// This error may be useful for nodes that have multiple possible kinds. | ||
var ErrNodeWrongKind = errors.New("unexpected node kind") | ||
|
||
// Find retrieves a node at the given path. | ||
func Find(root *yaml.Node, path string) (*yaml.Node, error) { | ||
parts, err := parsePath(path) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
found := find(root, parts) | ||
if found == nil { | ||
return nil, fmt.Errorf("%w: %s", ErrNodeNotFound, path) | ||
} | ||
|
||
return found, nil | ||
} | ||
|
||
// Set sets the node at the given path to the provided value. | ||
func Set(root *yaml.Node, path string, value *yaml.Node) error { | ||
parts, err := parsePath(path) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// find the anchor node | ||
anchor := find(root, parts[:len(parts)-1]) | ||
if anchor == nil { | ||
return fmt.Errorf("%w: %s", ErrNodeNotFound, path) | ||
} | ||
|
||
// set the node | ||
seek, isKey := parts[len(parts)-1].(string) | ||
idx, isSequence := parts[len(parts)-1].(int) | ||
|
||
if isKey { | ||
if anchor.Kind != yaml.MappingNode { | ||
return fmt.Errorf("%w: %s is not a mapping node", ErrNodeWrongKind, parts[len(parts)-1]) | ||
} | ||
|
||
for i := 0; i < len(anchor.Content); i += 2 { | ||
if anchor.Content[i].Value == seek { | ||
anchor.Content[i+1] = value | ||
return nil | ||
} | ||
} | ||
|
||
anchor.Content = append(anchor.Content, &yaml.Node{Kind: yaml.ScalarNode, Value: seek}) | ||
anchor.Content = append(anchor.Content, value) | ||
} else if isSequence { | ||
if anchor.Kind != yaml.SequenceNode { | ||
return fmt.Errorf("%w: %s is not a sequence node", ErrNodeWrongKind, parts[len(parts)-1]) | ||
} | ||
|
||
if idx < 0 || idx > len(anchor.Content) { | ||
return fmt.Errorf("array index out of bounds: %d", idx) | ||
} | ||
|
||
anchor.Content[idx] = value | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// Append appends a node to the sequence (array) node at the given path. | ||
// | ||
// If the node at the path is not a sequence node, ErrNodeWrongKind is returned. | ||
func Append(root *yaml.Node, path string, node *yaml.Node) error { | ||
parts, err := parsePath(path) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// find the anchor node | ||
found := find(root, parts) | ||
if found == nil { | ||
return fmt.Errorf("%w: %s", ErrNodeNotFound, path) | ||
} | ||
|
||
if found.Kind != yaml.SequenceNode { | ||
return fmt.Errorf("%w %d for append", ErrNodeWrongKind, found.Kind) | ||
} | ||
|
||
found.Content = append(found.Content, node) | ||
return nil | ||
} | ||
|
||
// Encode encodes a value into a YAML node. | ||
func Encode(value interface{}) (*yaml.Node, error) { | ||
var node yaml.Node | ||
err := node.Encode(value) | ||
if err != nil { | ||
return nil, fmt.Errorf("encoding yaml node: %w", err) | ||
} | ||
|
||
return &node, nil | ||
} | ||
|
||
func find(current *yaml.Node, parts []any) *yaml.Node { | ||
if len(parts) == 0 { | ||
// we automatically skip the document node to avoid having to specify it in the path | ||
if current.Kind == yaml.DocumentNode { | ||
return current.Content[0] | ||
} | ||
|
||
return current | ||
} | ||
|
||
seek, _ := parts[0].(string) | ||
idx, isArray := parts[0].(int) | ||
|
||
switch current.Kind { | ||
case yaml.DocumentNode: | ||
// we automatically skip the document node to avoid having to specify it in the path | ||
return find(current.Content[0], parts) | ||
case yaml.MappingNode: | ||
for i := 0; i < len(current.Content); i += 2 { | ||
if current.Content[i].Value == seek { | ||
return find(current.Content[i+1], parts[1:]) | ||
} | ||
} | ||
case yaml.SequenceNode: | ||
if isArray && idx < len(current.Content) { | ||
return find(current.Content[idx], parts[1:]) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// parsePath parses a dotted path into a slice of parts, where each part is either a string or an integer. | ||
// The integer parts represent array indexes, and the string parts represent keys. | ||
func parsePath(path string) ([]any, error) { | ||
if path == "" { | ||
return nil, errors.New("empty path") | ||
} | ||
|
||
// future: support escaping dots | ||
parts := strings.Split(path, ".") | ||
expanded, err := expandArrays(parts) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return expanded, nil | ||
} | ||
|
||
// expandArrays expands array indexing into individual elements. | ||
func expandArrays(parts []string) (expanded []any, err error) { | ||
expanded = make([]interface{}, 0, len(parts)) | ||
for _, s := range parts { | ||
before, after := cutBrackets(s) | ||
expanded = append(expanded, before) | ||
|
||
if len(after) > 0 { | ||
content := after[1 : len(after)-1] | ||
idx, err := strconv.Atoi(content) | ||
if err != nil { | ||
return nil, fmt.Errorf("invalid array index: %s in %s", content, after) | ||
} | ||
|
||
expanded = append(expanded, idx) | ||
} | ||
} | ||
|
||
return expanded, nil | ||
} | ||
|
||
// cutBrackets splits a string into two parts, before the brackets, and after the brackets. | ||
func cutBrackets(s string) (before string, after string) { | ||
if len(s) > 0 && s[len(s)-1] == ']' { // reverse check for faster exit | ||
for i := len(s) - 1; i >= 0; i-- { | ||
if s[i] == '[' { | ||
return s[:i], s[i:] | ||
} | ||
} | ||
} | ||
|
||
return s, "" | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just FYI, I wrote this after not finding any suitable libraries out there that we could benefit from directly:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm asuming the expectation from the consuption side is to call
Encode( someYaml )
to get the yaml.node abstracion and then work directly with that object to set, find, append, etc. (using dotted-path syntax) and then write the ouput yaml at any moment.Can we just use the
Config
interface we have already, which works withmap[string]any
?The expectation would be to parse a YAML file into a
Config
object. The config object already supports dottet-notation. Then, when you want a YAML back, you would call Raw() and use the YAML library to produce the yaml out of it.I think that's what we are doing for JSON (for azd config) and should be the same for YAML
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We would lose important AST metadata (comments, whitespacing) that we'd want to preserve while unmarshaling a YAML structure into any different type, including
Config
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The use case here is to programmatically manipulate an existing YAML file. While doing this, we want to preserve as much of the file content as possible. Perhaps the only acceptable thing for us to do is to format the file in a consistent way, like
gofmt
. Otherwise, we should preserve as much of the existing file as possible.For these reasons, we want to interact with YAML at the AST layer (syntax) and not at the semantic layer.
The other thing I'd mention is that in our defacto library for
yaml
,Unmarshal
will not preserve items like binary data, and multiline blocks. These items ofyaml
are also captured here as part of theyamlfmt
tool.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if this is enough reason to move the resources definition out of
Azure.yaml
I don't think it is a good idea, neither a good design/practice, to have a file that is both
auto-generated
andmanually-human-created
. Usually, whatever that is system-autogenerated has a title about it and it's not expected to be manually manipulated.It looks like it might be appreciated by customers, but I think it is a ticket for easy trouble and bugs.
For example, after customers run
azd init
to generate azure.yaml, it is not expected that if you runazd init
again, azd would save any changes you previously added to azure.yaml.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, but that isn't the design being proposed here, we won't be regenerating files from scratch. We are modifying an existing file with an
azd add
gesture.Do you feel that any CLI configuration file that can be manually edited inside the file, as well as via it's CLI
config
command to be an exception to this rule?Another popular example here that comes to mind is VSCode's UI-based editor that builds on top of an existing
settings.json
file. A programmatic layer built on top of a file layer.Also, consider
go.mod
-- it's a configuration file that is auto generated, can be manually edited, and programmatically updated as well.For various reasons, I wouldn't recommend this. In my mind, our north star goal is to create a complete "app model" in azure.yaml. Splitting the file wouldn't necessarily move us in that direction.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Leaving a potential suggestion here: if we're unhappy about having these types of lower-level libraries in azd, I'm also more than happy to tender this as a personal library so that we shift the maintenance burden elsewhere.
The thing I'd point out is that the deeper we can integrate with YAML as a language (which we will have to do by simply supporting AKS), the more feature-heavy rich experiences we can offer. For that reason, I would recommend us developing technologies in this area.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My push is not about the new library, but the expected experience.
In the mentioned examples, since it is JSON, comments are not allowed. And if you manually add extra spaces to it, it is re-formated as soon you use the VSCode settings UI. I think it can even change the ordering of the config.
In that model, the user needs to adjust to how the system expect those files, never the other way around.
I'm OK with expecting folks to manually update azure.yaml, but I am not happy creating the expectation for Folks that the system will also update the file and keep all the details, like ordering, style, spaces, comments, etc. etc.
As a user, I know that I can manually update system/app files, but I need to make sure that I don't break anything and the only thing that I expect is that it makes the system/app to work the way I want. I don't care about maintaining the config file with my own style