diff --git a/README.md b/README.md
index 56786f5..f66dfc2 100644
--- a/README.md
+++ b/README.md
@@ -155,7 +155,7 @@ type Attributes interface {
// Finder represents a set of methods for finding nodes.
type Finder interface {
- // Find searches for a single node in the parse tree based on the specified find method and filters.
+ // Find searches for the first matched node in the parse tree based on the specified find method and filters.
Find(FindMethod, TagFilter, ...Filter) Node
// FindN searches for up to n nodes in the parse tree based on the specified find method and filters.
@@ -164,7 +164,7 @@ type Finder interface {
// FindAll searches for all nodes in the parse tree based on the specified find method and filters.
FindAll(FindMethod, TagFilter, ...Filter) []Node
- // FindString searches for a single text node in the parse tree based on the specified find method and filters.
+ // FindString searches for the first matched text node in the parse tree based on the specified find method and filters.
FindString(FindMethod, StringFilter) TextNode
// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters.
@@ -172,6 +172,25 @@ type Finder interface {
// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters.
FindAllString(FindMethod, StringFilter) []TextNode
+
+ // CSS selectors support
+
+ // Select searches for the first matched node in the parse tree based on the css selector.
+ // Will panics if the selector cannot be parsed.
+ Select(string) Node
+
+ // SelectAll searches for all nodes in the parse tree based on the css selector.
+ // Will panics if the selector cannot be parsed.
+ SelectAll(string) []Node
+
+ // xpath support
+
+ // XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed.
+ XPath(string) []Node
+
+ // Evaluate returns the result of the xpath expression.
+ // The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator.
+ Evaluate(string) (any, error)
}
// FindMethod represents the method used to search for nodes in the parse tree.
@@ -223,6 +242,13 @@ type StringFilter interface {
}
```
+## Credits
+
+This repo relies on the following third-party projects:
+
+ * [ericchiang/css](https://github.com/ericchiang/css)
+ * [antchfx/xpath](https://github.com/antchfx/xpath)
+
## License
[The MIT License (MIT)](https://raw.githubusercontent.com/sunshineplan/node/main/LICENSE)
diff --git a/class_test.go b/class_test.go
index d06e6b0..9514d4b 100644
--- a/class_test.go
+++ b/class_test.go
@@ -9,11 +9,7 @@ func TestClass(t *testing.T) {
if nodes := soup.FindAll(0, A, Class("sister")); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -22,21 +18,15 @@ func TestClass(t *testing.T) {
}
if nodes := soup.FindAll(0, nil, Class(regexp.MustCompile("itl"))); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != `
The Dormouse's story
` {
- t.Errorf("expected html %q; got %q", `
The Dormouse's story
`, html)
- }
+ } else if html := nodes[0].Readable(); html != `
The Dormouse's story
` {
+ t.Errorf("expected html %q; got %q", `
The Dormouse's story
`, html)
}
if nodes := soup.FindAll(0, A, Class(func(class string, node Node) bool {
return node.HasAttr("class") && len(class) == 6
})); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -46,11 +36,7 @@ func TestClass(t *testing.T) {
if nodes := soup.FindAll(0, A, Attr("class", "sister")); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
diff --git a/example_test.go b/example_test.go
index 7e0f8d3..4b0af49 100644
--- a/example_test.go
+++ b/example_test.go
@@ -19,7 +19,12 @@ func ExampleAttr() {
if err != nil {
log.Fatal(err)
}
- if nodes := node.FindAll(0, nil, Attr("name", "email")); len(nodes) != 1 {
+ if nodes := node.SelectAll(`[name="email"]`); len(nodes) != 1 {
+ log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
+ } else {
+ fmt.Println(nodes[0].Readable())
+ }
+ if nodes := node.XPath(`//*[@name="email"]`); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
@@ -27,6 +32,7 @@ func ExampleAttr() {
// Output:
//
foo!
//
+ //
}
func ExampleClass() {
diff --git a/filter_test.go b/filter_test.go
index 7ea0c22..348d8ab 100644
--- a/filter_test.go
+++ b/filter_test.go
@@ -11,10 +11,8 @@ import (
func TestFilter(t *testing.T) {
if nodes := soup.FindAll(0, B); len(nodes) != 1 {
t.Errorf("expected b %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != "The Dormouse's story" {
- t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
- }
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
}
if nodes := soup.FindAll(0, Tag(regexp.MustCompile("^b"))); len(nodes) != 2 {
t.Errorf("expected ^b %d; got %d", 2, len(nodes))
@@ -39,12 +37,7 @@ func TestFilter(t *testing.T) {
if nodes := soup.FindAll(0, Tags("a", "b")); len(nodes) != 4 {
t.Errorf("expected nodes %d; got %d", 4, len(nodes))
} else {
- expected := []string{
- "The Dormouse's story",
- `Elsie`,
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{"The Dormouse's story", elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -82,10 +75,7 @@ func TestFilter(t *testing.T) {
})); len(nodes) != 2 {
t.Errorf("expected nodes %d; got %d", 2, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Tillie`,
- }
+ expected := []string{elsie, tillie}
for i, node := range nodes {
if html := node.Readable(); !strings.HasPrefix(html, expected[i]) {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
diff --git a/finder.go b/finder.go
index 2daaa8a..ba58646 100644
--- a/finder.go
+++ b/finder.go
@@ -3,12 +3,15 @@ package node
import (
"context"
+ "github.com/antchfx/htmlquery"
+ "github.com/antchfx/xpath"
+ "github.com/ericchiang/css"
"golang.org/x/net/html"
)
// Finder represents a set of methods for finding nodes.
type Finder interface {
- // Find searches for a single node in the parse tree based on the specified find method and filters.
+ // Find searches for the first matched node in the parse tree based on the specified find method and filters.
Find(FindMethod, TagFilter, ...Filter) Node
// FindN searches for up to n nodes in the parse tree based on the specified find method and filters.
@@ -17,7 +20,7 @@ type Finder interface {
// FindAll searches for all nodes in the parse tree based on the specified find method and filters.
FindAll(FindMethod, TagFilter, ...Filter) []Node
- // FindString searches for a single text node in the parse tree based on the specified find method and filters.
+ // FindString searches for the first matched text node in the parse tree based on the specified find method and filters.
FindString(FindMethod, StringFilter) TextNode
// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters.
@@ -25,6 +28,25 @@ type Finder interface {
// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters.
FindAllString(FindMethod, StringFilter) []TextNode
+
+ // CSS selectors support
+
+ // Select searches for the first matched node in the parse tree based on the css selector.
+ // Will panics if the selector cannot be parsed.
+ Select(string) Node
+
+ // SelectAll searches for all nodes in the parse tree based on the css selector.
+ // Will panics if the selector cannot be parsed.
+ SelectAll(string) []Node
+
+ // xpath support
+
+ // XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed.
+ XPath(string) []Node
+
+ // Evaluate returns the result of the xpath expression.
+ // The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator.
+ Evaluate(string) (any, error)
}
// FindMethod represents the method used to search for nodes in the parse tree.
@@ -177,3 +199,33 @@ func (n *htmlNode) FindAllString(method FindMethod, filter StringFilter) (res []
}
return
}
+
+func (n *htmlNode) Select(sel string) Node {
+ nodes := n.SelectAll(sel)
+ if len(nodes) == 0 {
+ return nil
+ }
+ return nodes[0]
+}
+
+func (n *htmlNode) SelectAll(sel string) (res []Node) {
+ for _, i := range css.MustParse(sel).Select(n.Raw()) {
+ res = append(res, NewNode(i))
+ }
+ return
+}
+
+func (n *htmlNode) XPath(expr string) (res []Node) {
+ for _, i := range htmlquery.Find(n.Raw(), expr) {
+ res = append(res, NewNode(i))
+ }
+ return
+}
+
+func (n *htmlNode) Evaluate(expr string) (any, error) {
+ exp, err := xpath.Compile(expr)
+ if err != nil {
+ return nil, err
+ }
+ return exp.Evaluate(htmlquery.CreateXPathNavigator(n.Raw())), nil
+}
diff --git a/finder_test.go b/finder_test.go
index f470e93..ba44542 100644
--- a/finder_test.go
+++ b/finder_test.go
@@ -1,6 +1,7 @@
package node
import (
+ "reflect"
"regexp"
"strings"
"testing"
@@ -9,26 +10,18 @@ import (
func TestFindAll(t *testing.T) {
if nodes := soup.FindAll(0, Title); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != "The Dormouse's story" {
- t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
- }
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
}
if nodes := soup.FindAll(0, P, Class("title")); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != `
The Dormouse's story
` {
- t.Errorf("expected html %q; got %q", `
The Dormouse's story
`, html)
- }
+ } else if html := nodes[0].Readable(); html != `
The Dormouse's story
` {
+ t.Errorf("expected html %q; got %q", `
The Dormouse's story
`, html)
}
if nodes := soup.FindAll(0, A); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -37,33 +30,23 @@ func TestFindAll(t *testing.T) {
}
if nodes := soup.FindAll(0, nil, Id("link2")); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != `Lacie` {
- t.Errorf("expected html %q; got %q", `Lacie`, html)
- }
+ } else if html := nodes[0].Readable(); html != lacie {
+ t.Errorf("expected html %q; got %q", lacie, html)
}
if nodes := soup.FindAll(0, nil, String(regexp.MustCompile("sisters"))); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if text := nodes[0].Readable(); text != "Once upon a time there were three little sisters; and their names were\n" {
- t.Errorf("expected text %q; got %q", "Once upon a time there were three little sisters; and their names were\n", text)
- }
+ } else if text := nodes[0].Readable(); text != "Once upon a time there were three little sisters; and their names were\n" {
+ t.Errorf("expected text %q; got %q", "Once upon a time there were three little sisters; and their names were\n", text)
}
if nodes := soup.FindAll(0, nil, Attr("href", regexp.MustCompile("elsie"))); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != `Elsie` {
- t.Errorf("expected html %q; got %q", `Elsie`, html)
- }
+ } else if html := nodes[0].Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
}
if nodes := soup.FindAll(0, nil, Id(True)); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -72,10 +55,8 @@ func TestFindAll(t *testing.T) {
}
if nodes := soup.FindAll(0, nil, Attr("href", regexp.MustCompile("elsie")), Id("link1")); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != `Elsie` {
- t.Errorf("expected html %q; got %q", `Elsie`, html)
- }
+ } else if html := nodes[0].Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
}
}
@@ -83,10 +64,7 @@ func TestFindN(t *testing.T) {
if nodes := soup.FindN(0, 2, A); len(nodes) != 2 {
t.Errorf("expected nodes %d; got %d", 2, len(nodes))
} else {
- expected := []string{
- `Elsie`,
- `Lacie`,
- }
+ expected := []string{elsie, lacie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -98,10 +76,8 @@ func TestFindN(t *testing.T) {
func TestFind(t *testing.T) {
if nodes := soup.FindN(0, 1, Title); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != "The Dormouse's story" {
- t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
- }
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
}
if html := soup.Find(0, Title).Readable(); html != "The Dormouse's story" {
t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
@@ -121,10 +97,8 @@ func TestFindMethod(t *testing.T) {
}
if nodes := aString.FindAll(Parent, A); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if html := nodes[0].Readable(); html != `Lacie` {
- t.Errorf("expected html %q; got %q", `Lacie`, html)
- }
+ } else if html := nodes[0].Readable(); html != lacie {
+ t.Errorf("expected html %q; got %q", lacie, html)
}
if html := aString.Find(Parent, P).Readable(); !strings.HasPrefix(html, `
Once upon a time there were`) {
t.Errorf("expected html %q; got %q", `
Once upon a time there were`, html)
@@ -136,10 +110,7 @@ func TestFindMethod(t *testing.T) {
if nodes := firstLink.FindAll(NextSibling, A); len(nodes) != 2 {
t.Errorf("expected nodes %d; got %d", 2, len(nodes))
} else {
- expected := []string{
- `Lacie`,
- `Tillie`,
- }
+ expected := []string{lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -153,10 +124,7 @@ func TestFindMethod(t *testing.T) {
if nodes := lastLink.FindAll(PrevSibling, A); len(nodes) != 2 {
t.Errorf("expected nodes %d; got %d", 2, len(nodes))
} else {
- expected := []string{
- `Lacie`,
- `Elsie`,
- }
+ expected := []string{lacie, elsie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
@@ -200,3 +168,183 @@ func TestFindMethod(t *testing.T) {
t.Errorf("expected nodes %d; got %d", 0, len(nodes))
}
}
+
+func TestSelectAll(t *testing.T) {
+ if nodes := soup.SelectAll("title"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != "
The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
+ }
+ if nodes := soup.SelectAll("p:nth-of-type(3)"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != `
...
` {
+ t.Errorf("expected html %q; got %q", `
...
`, html)
+ }
+ if nodes := soup.SelectAll("body a"); len(nodes) != 3 {
+ t.Errorf("expected nodes %d; got %d", 3, len(nodes))
+ } else {
+ expected := []string{elsie, lacie, tillie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll("html head title"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
+ }
+ if nodes := soup.SelectAll("head > title"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
+ }
+ if nodes := soup.SelectAll("p > a"); len(nodes) != 3 {
+ t.Errorf("expected nodes %d; got %d", 3, len(nodes))
+ } else {
+ expected := []string{elsie, lacie, tillie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll("p > a:nth-of-type(2)"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != lacie {
+ t.Errorf("expected html %q; got %q", lacie, html)
+ }
+ if nodes := soup.SelectAll("p > #link1"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
+ }
+ if nodes := soup.SelectAll("body > a"); len(nodes) != 0 {
+ t.Errorf("expected nodes %d; got %d", 0, len(nodes))
+ }
+ if nodes := soup.SelectAll(".sister"); len(nodes) != 3 {
+ t.Errorf("expected nodes %d; got %d", 3, len(nodes))
+ } else {
+ expected := []string{elsie, lacie, tillie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll("[class~=sister]"); len(nodes) != 3 {
+ t.Errorf("expected nodes %d; got %d", 3, len(nodes))
+ } else {
+ expected := []string{elsie, lacie, tillie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll("#link1"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
+ }
+ if nodes := soup.SelectAll("a#link2"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != lacie {
+ t.Errorf("expected html %q; got %q", lacie, html)
+ }
+ if nodes := soup.SelectAll("#link1,#link2"); len(nodes) != 2 {
+ t.Errorf("expected nodes %d; got %d", 2, len(nodes))
+ } else {
+ expected := []string{elsie, lacie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll("a[href]"); len(nodes) != 3 {
+ t.Errorf("expected nodes %d; got %d", 3, len(nodes))
+ } else {
+ expected := []string{elsie, lacie, tillie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll(`a[href="http://example.com/elsie"]`); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
+ }
+ if nodes := soup.SelectAll(`a[href^="http://example.com/"]`); len(nodes) != 3 {
+ t.Errorf("expected nodes %d; got %d", 3, len(nodes))
+ } else {
+ expected := []string{elsie, lacie, tillie}
+ for i, node := range nodes {
+ if html := node.Readable(); html != expected[i] {
+ t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
+ }
+ }
+ }
+ if nodes := soup.SelectAll(`a[href$="tillie"]`); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != tillie {
+ t.Errorf("expected html %q; got %q", tillie, html)
+ }
+ if nodes := soup.SelectAll(`a[href*=".com/el"]`); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
+ }
+}
+
+func TestSelect(t *testing.T) {
+ if html := soup.Select(".sister").Readable(); html != elsie {
+ t.Errorf("expected html %q; got %q", elsie, html)
+ }
+}
+
+func TestXPath(t *testing.T) {
+ if nodes := soup.XPath("//title[1]"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
+ }
+ if node := soup.XPath("//nosuchtag"); len(node) != 0 {
+ t.Errorf("expected node nil; got %q", node[0].Readable())
+ }
+ if nodes := soup.XPath("//head/title"); len(nodes) != 1 {
+ t.Errorf("expected nodes %d; got %d", 1, len(nodes))
+ } else if html := nodes[0].Readable(); html != "The Dormouse's story" {
+ t.Errorf("expected html %q; got %q", "The Dormouse's story", html)
+ }
+}
+
+func TestEvaluate(t *testing.T) {
+ if _, err := soup.Evaluate("$test"); err == nil {
+ t.Errorf("expected error; got nil")
+ }
+ if res, err := soup.Evaluate("count(//a)"); err != nil {
+ t.Error(err)
+ } else if v, ok := res.(float64); !ok {
+ t.Errorf("expect type float64; got %s", reflect.TypeOf(res))
+ } else if v != 3 {
+ t.Errorf("expected count 3; got %g", v)
+ }
+ if res, err := soup.Evaluate(`local-name(//a)`); err != nil {
+ t.Error(err)
+ } else if v, ok := res.(string); !ok {
+ t.Errorf("expect type string; got %s", reflect.TypeOf(res))
+ } else if v != "a" {
+ t.Errorf("expected %q; got %q", "a", v)
+ }
+ if res, err := soup.Evaluate(`boolean(//table)`); err != nil {
+ t.Error(err)
+ } else if v, ok := res.(bool); !ok {
+ t.Errorf("expect type bool; got %s", reflect.TypeOf(res))
+ } else if v {
+ t.Error("expected false; got true")
+ }
+}
diff --git a/go.mod b/go.mod
index 15e88d6..bbb8999 100644
--- a/go.mod
+++ b/go.mod
@@ -1,5 +1,15 @@
module github.com/sunshineplan/node
-go 1.21.0
+go 1.21
-require golang.org/x/net v0.18.0
+require (
+ github.com/antchfx/htmlquery v1.3.0
+ github.com/antchfx/xpath v1.2.3
+ github.com/ericchiang/css v1.3.0
+ golang.org/x/net v0.18.0
+)
+
+require (
+ github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+ golang.org/x/text v0.14.0 // indirect
+)
diff --git a/go.sum b/go.sum
index ee9cb73..b13cfe3 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,45 @@
+github.com/antchfx/htmlquery v1.3.0 h1:5I5yNFOVI+egyia5F2s/5Do2nFWxJz41Tr3DyfKD25E=
+github.com/antchfx/htmlquery v1.3.0/go.mod h1:zKPDVTMhfOmcwxheXUsx4rKJy8KEY/PU6eXr/2SebQ8=
+github.com/antchfx/xpath v1.2.3 h1:CCZWOzv5bAqjVv0offZ2LVgVYFbeldKQVuLNbViZdes=
+github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
+github.com/ericchiang/css v1.3.0 h1:e0vS+vpujMjtT3/SYu7qTHn1LVzXWcLCCDjlfq3YlLY=
+github.com/ericchiang/css v1.3.0/go.mod h1:sVSdL+MFR9Q4cKJMQzpIkHIDOLiK+7Wmjjhq7D+MubA=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
+github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg=
golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/node_test.go b/node_test.go
index 98c8b72..318d694 100644
--- a/node_test.go
+++ b/node_test.go
@@ -2,7 +2,8 @@ package node
import "testing"
-var soup, _ = ParseHTML(`The Dormouse's story
+var (
+ soup, _ = ParseHTML(`The Dormouse's story
The Dormouse's story
@@ -15,6 +16,11 @@ and they lived at the bottom of a well.
...
`)
+ elsie = `Elsie`
+ lacie = `Lacie`
+ tillie = `Tillie`
+)
+
func TestSoup(t *testing.T) {
title := soup.Find(0, Title)
if html := title.Readable(); html != "The Dormouse's story" {
@@ -36,8 +42,8 @@ func TestSoup(t *testing.T) {
if class, _ := p.Attrs().Get("class"); class != "title" {
t.Errorf("expected class %q; got %q", "title", class)
}
- if a := soup.Find(0, A).Readable(); a != `Elsie` {
- t.Errorf("expected a %q; got %q", `Elsie`, a)
+ if a := soup.Find(0, A).Readable(); a != elsie {
+ t.Errorf("expected a %q; got %q", elsie, a)
}
if a := soup.FindAll(0, A); len(a) != 3 {
t.Errorf("expected a %d; got %d", 3, len(a))
@@ -49,8 +55,8 @@ func TestSoup(t *testing.T) {
}
}
}
- if a := soup.Find(0, nil, Id("link3")).Readable(); a != `Tillie` {
- t.Errorf("expected a %q; got %q", `Tillie`, a)
+ if a := soup.Find(0, nil, Id("link3")).Readable(); a != tillie {
+ t.Errorf("expected a %q; got %q", tillie, a)
}
s := `The Dormouse's story
@@ -176,17 +182,17 @@ func TestGoingUp(t *testing.T) {
func TestGoingSideways(t *testing.T) {
if node := soup.Find(0, A).NextSibling(); node.Readable() != ",\n" {
t.Errorf("expected string %q; got %q", ",\n ", node.GetText())
- } else if html := node.NextSibling().Readable(); html != `Lacie` {
- t.Errorf("expected html %q; got %q", `Lacie`, html)
+ } else if html := node.NextSibling().Readable(); html != lacie {
+ t.Errorf("expected html %q; got %q", lacie, html)
}
if nextSiblings := soup.Find(0, A).NextSiblings(); len(nextSiblings) != 5 {
t.Errorf("expected next_siblings %d; got %d", 5, len(nextSiblings))
} else {
expected := []string{
",\n",
- `Lacie`,
+ lacie,
" and\n",
- `Tillie`,
+ tillie,
";\nand they lived at the bottom of a well.",
}
for i, nextSibling := range nextSiblings {
@@ -200,9 +206,9 @@ func TestGoingSideways(t *testing.T) {
} else {
expected := []string{
" and\n",
- `Lacie`,
+ lacie,
",\n",
- `Elsie`,
+ elsie,
"Once upon a time there were three little sisters; and their names were\n",
}
for i, nextSibling := range prevSiblings {
@@ -215,8 +221,8 @@ func TestGoingSideways(t *testing.T) {
func TestGoingBackAndForth(t *testing.T) {
a := soup.Find(0, A, Id("link3"))
- if html := a.Readable(); html != `Tillie` {
- t.Errorf("expected html %q; got %q", `Tillie`, html)
+ if html := a.Readable(); html != tillie {
+ t.Errorf("expected html %q; got %q", tillie, html)
}
if html := a.NextSibling().Readable(); html != ";\nand they lived at the bottom of a well." {
t.Errorf("expected html %q; got %q", ";\nand they lived at the bottom of a well.", html)
@@ -227,8 +233,8 @@ func TestGoingBackAndForth(t *testing.T) {
if html := a.PrevNode().Readable(); html != " and\n" {
t.Errorf("expected html %q; got %q", " and\n", html)
}
- if html := a.PrevNode().NextNode().Readable(); html != `Tillie` {
- t.Errorf("expected html %q; got %q", `Tillie`, html)
+ if html := a.PrevNode().NextNode().Readable(); html != tillie {
+ t.Errorf("expected html %q; got %q", tillie, html)
}
if nextNodes := a.NextNodes(); len(nextNodes) != 6 {
t.Errorf("expected next_elements %d; got %d", 6, len(nextNodes))
diff --git a/string_test.go b/string_test.go
index c364f3f..26e2b55 100644
--- a/string_test.go
+++ b/string_test.go
@@ -8,10 +8,8 @@ import (
func TestString(t *testing.T) {
if nodes := soup.FindAllString(0, String("Elsie")); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if text := nodes[0].String(); text != "Elsie" {
- t.Errorf("expected string %q; got %q", "Elsie", text)
- }
+ } else if text := nodes[0].String(); text != "Elsie" {
+ t.Errorf("expected string %q; got %q", "Elsie", text)
}
if nodes := soup.FindAllString(0, String([]string{"Tillie", "Elsie", "Lacie"})); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
@@ -49,9 +47,7 @@ func TestString(t *testing.T) {
}
if nodes := soup.FindAllString(0, Text("Elsie")); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
- } else {
- if text := nodes[0].String(); text != "Elsie" {
- t.Errorf("expected string %q; got %q", "Elsie", text)
- }
+ } else if text := nodes[0].String(); text != "Elsie" {
+ t.Errorf("expected string %q; got %q", "Elsie", text)
}
}
diff --git a/tag.go b/tag.go
index 070a14c..b3e7211 100644
--- a/tag.go
+++ b/tag.go
@@ -16,6 +16,8 @@ var (
B = Tag("b")
Body = Tag("body")
Div = Tag("div")
+ Em = Tag("em")
+ Form = Tag("form")
H1 = Tag("h1")
H2 = Tag("h2")
Head = Tag("head")