diff --git a/README.md b/README.md index 56786f5..f66dfc2 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ type Attributes interface { // Finder represents a set of methods for finding nodes. type Finder interface { - // Find searches for a single node in the parse tree based on the specified find method and filters. + // Find searches for the first matched node in the parse tree based on the specified find method and filters. Find(FindMethod, TagFilter, ...Filter) Node // FindN searches for up to n nodes in the parse tree based on the specified find method and filters. @@ -164,7 +164,7 @@ type Finder interface { // FindAll searches for all nodes in the parse tree based on the specified find method and filters. FindAll(FindMethod, TagFilter, ...Filter) []Node - // FindString searches for a single text node in the parse tree based on the specified find method and filters. + // FindString searches for the first matched text node in the parse tree based on the specified find method and filters. FindString(FindMethod, StringFilter) TextNode // FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters. @@ -172,6 +172,25 @@ type Finder interface { // FindAllString searches for all text nodes in the parse tree based on the specified find method and filters. FindAllString(FindMethod, StringFilter) []TextNode + + // CSS selectors support + + // Select searches for the first matched node in the parse tree based on the css selector. + // Will panics if the selector cannot be parsed. + Select(string) Node + + // SelectAll searches for all nodes in the parse tree based on the css selector. + // Will panics if the selector cannot be parsed. + SelectAll(string) []Node + + // xpath support + + // XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed. + XPath(string) []Node + + // Evaluate returns the result of the xpath expression. + // The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator. + Evaluate(string) (any, error) } // FindMethod represents the method used to search for nodes in the parse tree. @@ -223,6 +242,13 @@ type StringFilter interface { } ``` +## Credits + +This repo relies on the following third-party projects: + + * [ericchiang/css](https://github.com/ericchiang/css) + * [antchfx/xpath](https://github.com/antchfx/xpath) + ## License [The MIT License (MIT)](https://raw.githubusercontent.com/sunshineplan/node/main/LICENSE) diff --git a/class_test.go b/class_test.go index d06e6b0..9514d4b 100644 --- a/class_test.go +++ b/class_test.go @@ -9,11 +9,7 @@ func TestClass(t *testing.T) { if nodes := soup.FindAll(0, A, Class("sister")); len(nodes) != 3 { t.Errorf("expected nodes %d; got %d", 3, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Lacie`, - `Tillie`, - } + expected := []string{elsie, lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -22,21 +18,15 @@ func TestClass(t *testing.T) { } if nodes := soup.FindAll(0, nil, Class(regexp.MustCompile("itl"))); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != `

The Dormouse's story

` { - t.Errorf("expected html %q; got %q", `

The Dormouse's story

`, html) - } + } else if html := nodes[0].Readable(); html != `

The Dormouse's story

` { + t.Errorf("expected html %q; got %q", `

The Dormouse's story

`, html) } if nodes := soup.FindAll(0, A, Class(func(class string, node Node) bool { return node.HasAttr("class") && len(class) == 6 })); len(nodes) != 3 { t.Errorf("expected nodes %d; got %d", 3, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Lacie`, - `Tillie`, - } + expected := []string{elsie, lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -46,11 +36,7 @@ func TestClass(t *testing.T) { if nodes := soup.FindAll(0, A, Attr("class", "sister")); len(nodes) != 3 { t.Errorf("expected nodes %d; got %d", 3, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Lacie`, - `Tillie`, - } + expected := []string{elsie, lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) diff --git a/example_test.go b/example_test.go index 7e0f8d3..4b0af49 100644 --- a/example_test.go +++ b/example_test.go @@ -19,7 +19,12 @@ func ExampleAttr() { if err != nil { log.Fatal(err) } - if nodes := node.FindAll(0, nil, Attr("name", "email")); len(nodes) != 1 { + if nodes := node.SelectAll(`[name="email"]`); len(nodes) != 1 { + log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) + } else { + fmt.Println(nodes[0].Readable()) + } + if nodes := node.XPath(`//*[@name="email"]`); len(nodes) != 1 { log.Fatalf("expected nodes %d; got %d", 1, len(nodes)) } else { fmt.Println(nodes[0].Readable()) @@ -27,6 +32,7 @@ func ExampleAttr() { // Output: //
foo!
// + // } func ExampleClass() { diff --git a/filter_test.go b/filter_test.go index 7ea0c22..348d8ab 100644 --- a/filter_test.go +++ b/filter_test.go @@ -11,10 +11,8 @@ import ( func TestFilter(t *testing.T) { if nodes := soup.FindAll(0, B); len(nodes) != 1 { t.Errorf("expected b %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != "The Dormouse's story" { - t.Errorf("expected html %q; got %q", "The Dormouse's story", html) - } + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) } if nodes := soup.FindAll(0, Tag(regexp.MustCompile("^b"))); len(nodes) != 2 { t.Errorf("expected ^b %d; got %d", 2, len(nodes)) @@ -39,12 +37,7 @@ func TestFilter(t *testing.T) { if nodes := soup.FindAll(0, Tags("a", "b")); len(nodes) != 4 { t.Errorf("expected nodes %d; got %d", 4, len(nodes)) } else { - expected := []string{ - "The Dormouse's story", - `Elsie`, - `Lacie`, - `Tillie`, - } + expected := []string{"The Dormouse's story", elsie, lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -82,10 +75,7 @@ func TestFilter(t *testing.T) { })); len(nodes) != 2 { t.Errorf("expected nodes %d; got %d", 2, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Tillie`, - } + expected := []string{elsie, tillie} for i, node := range nodes { if html := node.Readable(); !strings.HasPrefix(html, expected[i]) { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) diff --git a/finder.go b/finder.go index 2daaa8a..ba58646 100644 --- a/finder.go +++ b/finder.go @@ -3,12 +3,15 @@ package node import ( "context" + "github.com/antchfx/htmlquery" + "github.com/antchfx/xpath" + "github.com/ericchiang/css" "golang.org/x/net/html" ) // Finder represents a set of methods for finding nodes. type Finder interface { - // Find searches for a single node in the parse tree based on the specified find method and filters. + // Find searches for the first matched node in the parse tree based on the specified find method and filters. Find(FindMethod, TagFilter, ...Filter) Node // FindN searches for up to n nodes in the parse tree based on the specified find method and filters. @@ -17,7 +20,7 @@ type Finder interface { // FindAll searches for all nodes in the parse tree based on the specified find method and filters. FindAll(FindMethod, TagFilter, ...Filter) []Node - // FindString searches for a single text node in the parse tree based on the specified find method and filters. + // FindString searches for the first matched text node in the parse tree based on the specified find method and filters. FindString(FindMethod, StringFilter) TextNode // FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters. @@ -25,6 +28,25 @@ type Finder interface { // FindAllString searches for all text nodes in the parse tree based on the specified find method and filters. FindAllString(FindMethod, StringFilter) []TextNode + + // CSS selectors support + + // Select searches for the first matched node in the parse tree based on the css selector. + // Will panics if the selector cannot be parsed. + Select(string) Node + + // SelectAll searches for all nodes in the parse tree based on the css selector. + // Will panics if the selector cannot be parsed. + SelectAll(string) []Node + + // xpath support + + // XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed. + XPath(string) []Node + + // Evaluate returns the result of the xpath expression. + // The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator. + Evaluate(string) (any, error) } // FindMethod represents the method used to search for nodes in the parse tree. @@ -177,3 +199,33 @@ func (n *htmlNode) FindAllString(method FindMethod, filter StringFilter) (res [] } return } + +func (n *htmlNode) Select(sel string) Node { + nodes := n.SelectAll(sel) + if len(nodes) == 0 { + return nil + } + return nodes[0] +} + +func (n *htmlNode) SelectAll(sel string) (res []Node) { + for _, i := range css.MustParse(sel).Select(n.Raw()) { + res = append(res, NewNode(i)) + } + return +} + +func (n *htmlNode) XPath(expr string) (res []Node) { + for _, i := range htmlquery.Find(n.Raw(), expr) { + res = append(res, NewNode(i)) + } + return +} + +func (n *htmlNode) Evaluate(expr string) (any, error) { + exp, err := xpath.Compile(expr) + if err != nil { + return nil, err + } + return exp.Evaluate(htmlquery.CreateXPathNavigator(n.Raw())), nil +} diff --git a/finder_test.go b/finder_test.go index f470e93..ba44542 100644 --- a/finder_test.go +++ b/finder_test.go @@ -1,6 +1,7 @@ package node import ( + "reflect" "regexp" "strings" "testing" @@ -9,26 +10,18 @@ import ( func TestFindAll(t *testing.T) { if nodes := soup.FindAll(0, Title); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != "The Dormouse's story" { - t.Errorf("expected html %q; got %q", "The Dormouse's story", html) - } + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) } if nodes := soup.FindAll(0, P, Class("title")); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != `

The Dormouse's story

` { - t.Errorf("expected html %q; got %q", `

The Dormouse's story

`, html) - } + } else if html := nodes[0].Readable(); html != `

The Dormouse's story

` { + t.Errorf("expected html %q; got %q", `

The Dormouse's story

`, html) } if nodes := soup.FindAll(0, A); len(nodes) != 3 { t.Errorf("expected nodes %d; got %d", 3, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Lacie`, - `Tillie`, - } + expected := []string{elsie, lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -37,33 +30,23 @@ func TestFindAll(t *testing.T) { } if nodes := soup.FindAll(0, nil, Id("link2")); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != `Lacie` { - t.Errorf("expected html %q; got %q", `Lacie`, html) - } + } else if html := nodes[0].Readable(); html != lacie { + t.Errorf("expected html %q; got %q", lacie, html) } if nodes := soup.FindAll(0, nil, String(regexp.MustCompile("sisters"))); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if text := nodes[0].Readable(); text != "Once upon a time there were three little sisters; and their names were\n" { - t.Errorf("expected text %q; got %q", "Once upon a time there were three little sisters; and their names were\n", text) - } + } else if text := nodes[0].Readable(); text != "Once upon a time there were three little sisters; and their names were\n" { + t.Errorf("expected text %q; got %q", "Once upon a time there were three little sisters; and their names were\n", text) } if nodes := soup.FindAll(0, nil, Attr("href", regexp.MustCompile("elsie"))); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != `Elsie` { - t.Errorf("expected html %q; got %q", `Elsie`, html) - } + } else if html := nodes[0].Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) } if nodes := soup.FindAll(0, nil, Id(True)); len(nodes) != 3 { t.Errorf("expected nodes %d; got %d", 3, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Lacie`, - `Tillie`, - } + expected := []string{elsie, lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -72,10 +55,8 @@ func TestFindAll(t *testing.T) { } if nodes := soup.FindAll(0, nil, Attr("href", regexp.MustCompile("elsie")), Id("link1")); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != `Elsie` { - t.Errorf("expected html %q; got %q", `Elsie`, html) - } + } else if html := nodes[0].Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) } } @@ -83,10 +64,7 @@ func TestFindN(t *testing.T) { if nodes := soup.FindN(0, 2, A); len(nodes) != 2 { t.Errorf("expected nodes %d; got %d", 2, len(nodes)) } else { - expected := []string{ - `Elsie`, - `Lacie`, - } + expected := []string{elsie, lacie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -98,10 +76,8 @@ func TestFindN(t *testing.T) { func TestFind(t *testing.T) { if nodes := soup.FindN(0, 1, Title); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != "The Dormouse's story" { - t.Errorf("expected html %q; got %q", "The Dormouse's story", html) - } + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) } if html := soup.Find(0, Title).Readable(); html != "The Dormouse's story" { t.Errorf("expected html %q; got %q", "The Dormouse's story", html) @@ -121,10 +97,8 @@ func TestFindMethod(t *testing.T) { } if nodes := aString.FindAll(Parent, A); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if html := nodes[0].Readable(); html != `Lacie` { - t.Errorf("expected html %q; got %q", `Lacie`, html) - } + } else if html := nodes[0].Readable(); html != lacie { + t.Errorf("expected html %q; got %q", lacie, html) } if html := aString.Find(Parent, P).Readable(); !strings.HasPrefix(html, `

Once upon a time there were`) { t.Errorf("expected html %q; got %q", `

Once upon a time there were`, html) @@ -136,10 +110,7 @@ func TestFindMethod(t *testing.T) { if nodes := firstLink.FindAll(NextSibling, A); len(nodes) != 2 { t.Errorf("expected nodes %d; got %d", 2, len(nodes)) } else { - expected := []string{ - `Lacie`, - `Tillie`, - } + expected := []string{lacie, tillie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -153,10 +124,7 @@ func TestFindMethod(t *testing.T) { if nodes := lastLink.FindAll(PrevSibling, A); len(nodes) != 2 { t.Errorf("expected nodes %d; got %d", 2, len(nodes)) } else { - expected := []string{ - `Lacie`, - `Elsie`, - } + expected := []string{lacie, elsie} for i, node := range nodes { if html := node.Readable(); html != expected[i] { t.Errorf("expected html #%d %q; got %q", i, expected[i], html) @@ -200,3 +168,183 @@ func TestFindMethod(t *testing.T) { t.Errorf("expected nodes %d; got %d", 0, len(nodes)) } } + +func TestSelectAll(t *testing.T) { + if nodes := soup.SelectAll("title"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) + } + if nodes := soup.SelectAll("p:nth-of-type(3)"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != `

...

` { + t.Errorf("expected html %q; got %q", `

...

`, html) + } + if nodes := soup.SelectAll("body a"); len(nodes) != 3 { + t.Errorf("expected nodes %d; got %d", 3, len(nodes)) + } else { + expected := []string{elsie, lacie, tillie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll("html head title"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) + } + if nodes := soup.SelectAll("head > title"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) + } + if nodes := soup.SelectAll("p > a"); len(nodes) != 3 { + t.Errorf("expected nodes %d; got %d", 3, len(nodes)) + } else { + expected := []string{elsie, lacie, tillie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll("p > a:nth-of-type(2)"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != lacie { + t.Errorf("expected html %q; got %q", lacie, html) + } + if nodes := soup.SelectAll("p > #link1"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) + } + if nodes := soup.SelectAll("body > a"); len(nodes) != 0 { + t.Errorf("expected nodes %d; got %d", 0, len(nodes)) + } + if nodes := soup.SelectAll(".sister"); len(nodes) != 3 { + t.Errorf("expected nodes %d; got %d", 3, len(nodes)) + } else { + expected := []string{elsie, lacie, tillie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll("[class~=sister]"); len(nodes) != 3 { + t.Errorf("expected nodes %d; got %d", 3, len(nodes)) + } else { + expected := []string{elsie, lacie, tillie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll("#link1"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) + } + if nodes := soup.SelectAll("a#link2"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != lacie { + t.Errorf("expected html %q; got %q", lacie, html) + } + if nodes := soup.SelectAll("#link1,#link2"); len(nodes) != 2 { + t.Errorf("expected nodes %d; got %d", 2, len(nodes)) + } else { + expected := []string{elsie, lacie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll("a[href]"); len(nodes) != 3 { + t.Errorf("expected nodes %d; got %d", 3, len(nodes)) + } else { + expected := []string{elsie, lacie, tillie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll(`a[href="http://example.com/elsie"]`); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) + } + if nodes := soup.SelectAll(`a[href^="http://example.com/"]`); len(nodes) != 3 { + t.Errorf("expected nodes %d; got %d", 3, len(nodes)) + } else { + expected := []string{elsie, lacie, tillie} + for i, node := range nodes { + if html := node.Readable(); html != expected[i] { + t.Errorf("expected html #%d %q; got %q", i, expected[i], html) + } + } + } + if nodes := soup.SelectAll(`a[href$="tillie"]`); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != tillie { + t.Errorf("expected html %q; got %q", tillie, html) + } + if nodes := soup.SelectAll(`a[href*=".com/el"]`); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) + } +} + +func TestSelect(t *testing.T) { + if html := soup.Select(".sister").Readable(); html != elsie { + t.Errorf("expected html %q; got %q", elsie, html) + } +} + +func TestXPath(t *testing.T) { + if nodes := soup.XPath("//title[1]"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) + } + if node := soup.XPath("//nosuchtag"); len(node) != 0 { + t.Errorf("expected node nil; got %q", node[0].Readable()) + } + if nodes := soup.XPath("//head/title"); len(nodes) != 1 { + t.Errorf("expected nodes %d; got %d", 1, len(nodes)) + } else if html := nodes[0].Readable(); html != "The Dormouse's story" { + t.Errorf("expected html %q; got %q", "The Dormouse's story", html) + } +} + +func TestEvaluate(t *testing.T) { + if _, err := soup.Evaluate("$test"); err == nil { + t.Errorf("expected error; got nil") + } + if res, err := soup.Evaluate("count(//a)"); err != nil { + t.Error(err) + } else if v, ok := res.(float64); !ok { + t.Errorf("expect type float64; got %s", reflect.TypeOf(res)) + } else if v != 3 { + t.Errorf("expected count 3; got %g", v) + } + if res, err := soup.Evaluate(`local-name(//a)`); err != nil { + t.Error(err) + } else if v, ok := res.(string); !ok { + t.Errorf("expect type string; got %s", reflect.TypeOf(res)) + } else if v != "a" { + t.Errorf("expected %q; got %q", "a", v) + } + if res, err := soup.Evaluate(`boolean(//table)`); err != nil { + t.Error(err) + } else if v, ok := res.(bool); !ok { + t.Errorf("expect type bool; got %s", reflect.TypeOf(res)) + } else if v { + t.Error("expected false; got true") + } +} diff --git a/go.mod b/go.mod index 15e88d6..bbb8999 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,15 @@ module github.com/sunshineplan/node -go 1.21.0 +go 1.21 -require golang.org/x/net v0.18.0 +require ( + github.com/antchfx/htmlquery v1.3.0 + github.com/antchfx/xpath v1.2.3 + github.com/ericchiang/css v1.3.0 + golang.org/x/net v0.18.0 +) + +require ( + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + golang.org/x/text v0.14.0 // indirect +) diff --git a/go.sum b/go.sum index ee9cb73..b13cfe3 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,45 @@ +github.com/antchfx/htmlquery v1.3.0 h1:5I5yNFOVI+egyia5F2s/5Do2nFWxJz41Tr3DyfKD25E= +github.com/antchfx/htmlquery v1.3.0/go.mod h1:zKPDVTMhfOmcwxheXUsx4rKJy8KEY/PU6eXr/2SebQ8= +github.com/antchfx/xpath v1.2.3 h1:CCZWOzv5bAqjVv0offZ2LVgVYFbeldKQVuLNbViZdes= +github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/ericchiang/css v1.3.0 h1:e0vS+vpujMjtT3/SYu7qTHn1LVzXWcLCCDjlfq3YlLY= +github.com/ericchiang/css v1.3.0/go.mod h1:sVSdL+MFR9Q4cKJMQzpIkHIDOLiK+7Wmjjhq7D+MubA= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/node_test.go b/node_test.go index 98c8b72..318d694 100644 --- a/node_test.go +++ b/node_test.go @@ -2,7 +2,8 @@ package node import "testing" -var soup, _ = ParseHTML(`The Dormouse's story +var ( + soup, _ = ParseHTML(`The Dormouse's story

The Dormouse's story

@@ -15,6 +16,11 @@ and they lived at the bottom of a well.

...

`) + elsie = `Elsie` + lacie = `Lacie` + tillie = `Tillie` +) + func TestSoup(t *testing.T) { title := soup.Find(0, Title) if html := title.Readable(); html != "The Dormouse's story" { @@ -36,8 +42,8 @@ func TestSoup(t *testing.T) { if class, _ := p.Attrs().Get("class"); class != "title" { t.Errorf("expected class %q; got %q", "title", class) } - if a := soup.Find(0, A).Readable(); a != `Elsie` { - t.Errorf("expected a %q; got %q", `Elsie`, a) + if a := soup.Find(0, A).Readable(); a != elsie { + t.Errorf("expected a %q; got %q", elsie, a) } if a := soup.FindAll(0, A); len(a) != 3 { t.Errorf("expected a %d; got %d", 3, len(a)) @@ -49,8 +55,8 @@ func TestSoup(t *testing.T) { } } } - if a := soup.Find(0, nil, Id("link3")).Readable(); a != `Tillie` { - t.Errorf("expected a %q; got %q", `Tillie`, a) + if a := soup.Find(0, nil, Id("link3")).Readable(); a != tillie { + t.Errorf("expected a %q; got %q", tillie, a) } s := `The Dormouse's story @@ -176,17 +182,17 @@ func TestGoingUp(t *testing.T) { func TestGoingSideways(t *testing.T) { if node := soup.Find(0, A).NextSibling(); node.Readable() != ",\n" { t.Errorf("expected string %q; got %q", ",\n ", node.GetText()) - } else if html := node.NextSibling().Readable(); html != `Lacie` { - t.Errorf("expected html %q; got %q", `Lacie`, html) + } else if html := node.NextSibling().Readable(); html != lacie { + t.Errorf("expected html %q; got %q", lacie, html) } if nextSiblings := soup.Find(0, A).NextSiblings(); len(nextSiblings) != 5 { t.Errorf("expected next_siblings %d; got %d", 5, len(nextSiblings)) } else { expected := []string{ ",\n", - `Lacie`, + lacie, " and\n", - `Tillie`, + tillie, ";\nand they lived at the bottom of a well.", } for i, nextSibling := range nextSiblings { @@ -200,9 +206,9 @@ func TestGoingSideways(t *testing.T) { } else { expected := []string{ " and\n", - `Lacie`, + lacie, ",\n", - `Elsie`, + elsie, "Once upon a time there were three little sisters; and their names were\n", } for i, nextSibling := range prevSiblings { @@ -215,8 +221,8 @@ func TestGoingSideways(t *testing.T) { func TestGoingBackAndForth(t *testing.T) { a := soup.Find(0, A, Id("link3")) - if html := a.Readable(); html != `Tillie` { - t.Errorf("expected html %q; got %q", `Tillie`, html) + if html := a.Readable(); html != tillie { + t.Errorf("expected html %q; got %q", tillie, html) } if html := a.NextSibling().Readable(); html != ";\nand they lived at the bottom of a well." { t.Errorf("expected html %q; got %q", ";\nand they lived at the bottom of a well.", html) @@ -227,8 +233,8 @@ func TestGoingBackAndForth(t *testing.T) { if html := a.PrevNode().Readable(); html != " and\n" { t.Errorf("expected html %q; got %q", " and\n", html) } - if html := a.PrevNode().NextNode().Readable(); html != `Tillie` { - t.Errorf("expected html %q; got %q", `Tillie`, html) + if html := a.PrevNode().NextNode().Readable(); html != tillie { + t.Errorf("expected html %q; got %q", tillie, html) } if nextNodes := a.NextNodes(); len(nextNodes) != 6 { t.Errorf("expected next_elements %d; got %d", 6, len(nextNodes)) diff --git a/string_test.go b/string_test.go index c364f3f..26e2b55 100644 --- a/string_test.go +++ b/string_test.go @@ -8,10 +8,8 @@ import ( func TestString(t *testing.T) { if nodes := soup.FindAllString(0, String("Elsie")); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if text := nodes[0].String(); text != "Elsie" { - t.Errorf("expected string %q; got %q", "Elsie", text) - } + } else if text := nodes[0].String(); text != "Elsie" { + t.Errorf("expected string %q; got %q", "Elsie", text) } if nodes := soup.FindAllString(0, String([]string{"Tillie", "Elsie", "Lacie"})); len(nodes) != 3 { t.Errorf("expected nodes %d; got %d", 3, len(nodes)) @@ -49,9 +47,7 @@ func TestString(t *testing.T) { } if nodes := soup.FindAllString(0, Text("Elsie")); len(nodes) != 1 { t.Errorf("expected nodes %d; got %d", 1, len(nodes)) - } else { - if text := nodes[0].String(); text != "Elsie" { - t.Errorf("expected string %q; got %q", "Elsie", text) - } + } else if text := nodes[0].String(); text != "Elsie" { + t.Errorf("expected string %q; got %q", "Elsie", text) } } diff --git a/tag.go b/tag.go index 070a14c..b3e7211 100644 --- a/tag.go +++ b/tag.go @@ -16,6 +16,8 @@ var ( B = Tag("b") Body = Tag("body") Div = Tag("div") + Em = Tag("em") + Form = Tag("form") H1 = Tag("h1") H2 = Tag("h2") Head = Tag("head")