Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ type Attributes interface {

// Finder represents a set of methods for finding nodes.
type Finder interface {
// Find searches for a single node in the parse tree based on the specified find method and filters.
// Find searches for the first matched node in the parse tree based on the specified find method and filters.
Find(FindMethod, TagFilter, ...Filter) Node

// FindN searches for up to n nodes in the parse tree based on the specified find method and filters.
Expand All @@ -164,14 +164,33 @@ type Finder interface {
// FindAll searches for all nodes in the parse tree based on the specified find method and filters.
FindAll(FindMethod, TagFilter, ...Filter) []Node

// FindString searches for a single text node in the parse tree based on the specified find method and filters.
// FindString searches for the first matched text node in the parse tree based on the specified find method and filters.
FindString(FindMethod, StringFilter) TextNode

// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters.
FindStringN(FindMethod, int, StringFilter) []TextNode

// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters.
FindAllString(FindMethod, StringFilter) []TextNode

// CSS selectors support

// Select searches for the first matched node in the parse tree based on the css selector.
// Will panics if the selector cannot be parsed.
Select(string) Node

// SelectAll searches for all nodes in the parse tree based on the css selector.
// Will panics if the selector cannot be parsed.
SelectAll(string) []Node

// xpath support

// XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed.
XPath(string) []Node

// Evaluate returns the result of the xpath expression.
// The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator.
Evaluate(string) (any, error)
}

// FindMethod represents the method used to search for nodes in the parse tree.
Expand Down Expand Up @@ -223,6 +242,13 @@ type StringFilter interface {
}
```

## Credits

This repo relies on the following third-party projects:

* [ericchiang/css](https://github.com/ericchiang/css)
* [antchfx/xpath](https://github.com/antchfx/xpath)

## License

[The MIT License (MIT)](https://github.com/sunshineplan/node/main/LICENSE)
24 changes: 5 additions & 19 deletions class_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ func TestClass(t *testing.T) {
if nodes := soup.FindAll(0, A, Class("sister")); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
expected := []string{
`<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>`,
`<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>`,
`<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>`,
}
expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
Expand All @@ -22,21 +18,15 @@ func TestClass(t *testing.T) {
}
if nodes := soup.FindAll(0, nil, Class(regexp.MustCompile("itl"))); len(nodes) != 1 {
t.Errorf("expected nodes %d; got %d", 1, len(nodes))
} else {
if html := nodes[0].Readable(); html != `<p class="title"><b>The Dormouse's story</b></p>` {
t.Errorf("expected html %q; got %q", `<p class="title"><b>The Dormouse's story</b></p>`, html)
}
} else if html := nodes[0].Readable(); html != `<p class="title"><b>The Dormouse's story</b></p>` {
t.Errorf("expected html %q; got %q", `<p class="title"><b>The Dormouse's story</b></p>`, html)
}
if nodes := soup.FindAll(0, A, Class(func(class string, node Node) bool {
return node.HasAttr("class") && len(class) == 6
})); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
expected := []string{
`<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>`,
`<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>`,
`<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>`,
}
expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
Expand All @@ -46,11 +36,7 @@ func TestClass(t *testing.T) {
if nodes := soup.FindAll(0, A, Attr("class", "sister")); len(nodes) != 3 {
t.Errorf("expected nodes %d; got %d", 3, len(nodes))
} else {
expected := []string{
`<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>`,
`<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>`,
`<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>`,
}
expected := []string{elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
Expand Down
8 changes: 7 additions & 1 deletion example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@ func ExampleAttr() {
if err != nil {
log.Fatal(err)
}
if nodes := node.FindAll(0, nil, Attr("name", "email")); len(nodes) != 1 {
if nodes := node.SelectAll(`[name="email"]`); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
if nodes := node.XPath(`//*[@name="email"]`); len(nodes) != 1 {
log.Fatalf("expected nodes %d; got %d", 1, len(nodes))
} else {
fmt.Println(nodes[0].Readable())
}
// Output:
// <div data-foo="value">foo!</div>
// <input name="email"/>
// <input name="email"/>
}

func ExampleClass() {
Expand Down
18 changes: 4 additions & 14 deletions filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@ import (
func TestFilter(t *testing.T) {
if nodes := soup.FindAll(0, B); len(nodes) != 1 {
t.Errorf("expected b %d; got %d", 1, len(nodes))
} else {
if html := nodes[0].Readable(); html != "<b>The Dormouse's story</b>" {
t.Errorf("expected html %q; got %q", "<b>The Dormouse's story</b>", html)
}
} else if html := nodes[0].Readable(); html != "<b>The Dormouse's story</b>" {
t.Errorf("expected html %q; got %q", "<b>The Dormouse's story</b>", html)
}
if nodes := soup.FindAll(0, Tag(regexp.MustCompile("^b"))); len(nodes) != 2 {
t.Errorf("expected ^b %d; got %d", 2, len(nodes))
Expand All @@ -39,12 +37,7 @@ func TestFilter(t *testing.T) {
if nodes := soup.FindAll(0, Tags("a", "b")); len(nodes) != 4 {
t.Errorf("expected nodes %d; got %d", 4, len(nodes))
} else {
expected := []string{
"<b>The Dormouse's story</b>",
`<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>`,
`<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a>`,
`<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>`,
}
expected := []string{"<b>The Dormouse's story</b>", elsie, lacie, tillie}
for i, node := range nodes {
if html := node.Readable(); html != expected[i] {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
Expand Down Expand Up @@ -82,10 +75,7 @@ func TestFilter(t *testing.T) {
})); len(nodes) != 2 {
t.Errorf("expected nodes %d; got %d", 2, len(nodes))
} else {
expected := []string{
`<a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>`,
`<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>`,
}
expected := []string{elsie, tillie}
for i, node := range nodes {
if html := node.Readable(); !strings.HasPrefix(html, expected[i]) {
t.Errorf("expected html #%d %q; got %q", i, expected[i], html)
Expand Down
56 changes: 54 additions & 2 deletions finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ package node
import (
"context"

"github.com/antchfx/htmlquery"
"github.com/antchfx/xpath"
"github.com/ericchiang/css"
"golang.org/x/net/html"
)

// Finder represents a set of methods for finding nodes.
type Finder interface {
// Find searches for a single node in the parse tree based on the specified find method and filters.
// Find searches for the first matched node in the parse tree based on the specified find method and filters.
Find(FindMethod, TagFilter, ...Filter) Node

// FindN searches for up to n nodes in the parse tree based on the specified find method and filters.
Expand All @@ -17,14 +20,33 @@ type Finder interface {
// FindAll searches for all nodes in the parse tree based on the specified find method and filters.
FindAll(FindMethod, TagFilter, ...Filter) []Node

// FindString searches for a single text node in the parse tree based on the specified find method and filters.
// FindString searches for the first matched text node in the parse tree based on the specified find method and filters.
FindString(FindMethod, StringFilter) TextNode

// FindStringN searches for up to n text nodes in the parse tree based on the specified find method and filters.
FindStringN(FindMethod, int, StringFilter) []TextNode

// FindAllString searches for all text nodes in the parse tree based on the specified find method and filters.
FindAllString(FindMethod, StringFilter) []TextNode

// CSS selectors support

// Select searches for the first matched node in the parse tree based on the css selector.
// Will panics if the selector cannot be parsed.
Select(string) Node

// SelectAll searches for all nodes in the parse tree based on the css selector.
// Will panics if the selector cannot be parsed.
SelectAll(string) []Node

// xpath support

// XPath searches for all node that matches by the specified XPath expr. Will panics if the expression cannot be parsed.
XPath(string) []Node

// Evaluate returns the result of the xpath expression.
// The result type of the expression is one of the follow: bool, float64, string, *xpath.NodeIterator.
Evaluate(string) (any, error)
}

// FindMethod represents the method used to search for nodes in the parse tree.
Expand Down Expand Up @@ -177,3 +199,33 @@ func (n *htmlNode) FindAllString(method FindMethod, filter StringFilter) (res []
}
return
}

func (n *htmlNode) Select(sel string) Node {
nodes := n.SelectAll(sel)
if len(nodes) == 0 {
return nil
}
return nodes[0]
}

func (n *htmlNode) SelectAll(sel string) (res []Node) {
for _, i := range css.MustParse(sel).Select(n.Raw()) {
res = append(res, NewNode(i))
}
return
}

func (n *htmlNode) XPath(expr string) (res []Node) {
for _, i := range htmlquery.Find(n.Raw(), expr) {
res = append(res, NewNode(i))
}
return
}

func (n *htmlNode) Evaluate(expr string) (any, error) {
exp, err := xpath.Compile(expr)
if err != nil {
return nil, err
}
return exp.Evaluate(htmlquery.CreateXPathNavigator(n.Raw())), nil
}
Loading