diff --git a/src-rust/rsc/html_filter_rules/itsfoss.com.toml b/src-rust/rsc/html_filter_rules/itsfoss.com.toml
deleted file mode 100644
index 5fff284..0000000
--- a/src-rust/rsc/html_filter_rules/itsfoss.com.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-tags = [
- "script", "style", "link", "meta", "li", "desc", "title", "svg", "path", "dialog", "select", "head", "header",
- "foot", "footer", "ul", "nav", "button", "form", "input", "picture", "time", "h2", "h3", "h4", "i", "aside",
- "FreeStarVideoAdContainer", "freestar-video-parent", "reestar-video-child"
-]
-
-classes = [
- "progress-bar", "js-menu", "social-share", "post-info__readtime", "cta__description", "cta__inner", "cta__content",
- "hide-mobile", "js-toc", "author-card", "related-posts"
-]
diff --git a/src-rust/toolkit-core/src/models/html_filter_rule.rs b/src-rust/toolkit-core/src/models/html_filter_rule.rs
index 00927b0..3514b21 100644
--- a/src-rust/toolkit-core/src/models/html_filter_rule.rs
+++ b/src-rust/toolkit-core/src/models/html_filter_rule.rs
@@ -1,14 +1,34 @@
-#[derive(PartialEq, Eq, Debug, serde::Deserialize)]
-pub struct HTMLFilterRule {
- pub tags: Vec,
- pub classes: Vec,
+pub struct HTMLFilterRule<'a> {
+ pub tags: Vec<&'a str>,
+ pub classes: Vec<&'a str>,
}
-impl HTMLFilterRule {
- pub fn new(tags: Vec, classes: Vec) -> Self {
+impl<'a> HTMLFilterRule<'a> {
+ fn new(tags: Vec<&'a str>, classes: Vec<&'a str>) -> Self {
Self {
tags,
classes,
}
}
+
+ pub fn get_filter_rule(url: &str) -> Self {
+ match url {
+ "itsfoss.com" | "news.itsfoss.com" => {
+ Self::new(
+ vec![
+ "script", "style", "link", "meta", "li", "desc", "title", "svg", "path",
+ "dialog", "select", "head", "header", "foot", "footer", "ul", "nav", "button",
+ "form", "input", "picture", "time", "h2", "h3", "h4", "i", "aside",
+ "FreeStarVideoAdContainer", "freestar-video-parent", "reestar-video-child",
+ ],
+ vec![
+ "progress-bar", "js-menu", "social-share", "post-info__readtime",
+ "cta__description", "cta__inner", "cta__content", "hide-mobile", "js-toc",
+ "author-card", "related-posts",
+ ],
+ )
+ }
+ _ => Self::new(vec![], vec![])
+ }
+ }
}
diff --git a/src-rust/toolkit-core/src/workflow/translate/select.rs b/src-rust/toolkit-core/src/workflow/translate/select.rs
index fbd1506..b9eb796 100644
--- a/src-rust/toolkit-core/src/workflow/translate/select.rs
+++ b/src-rust/toolkit-core/src/workflow/translate/select.rs
@@ -23,32 +23,13 @@ pub fn get_content(url: &str) -> Result {
host.unwrap()
};
- let html_filter_rule_path = format!("rsc/html_filter_rules/{}.toml", host);
- let html_filter_rule_str = std::fs::read_to_string(html_filter_rule_path);
- let html_filter_rule = match html_filter_rule_str {
- Ok(html_filter_rule_str) => {
- let html_filter_rule: Result = toml::from_str(&html_filter_rule_str);
- match html_filter_rule {
- Ok(html_filter_rule) => html_filter_rule,
- Err(_) => {
- let error_msg = format!(
- "Failed to parse the HTML filter rule for the website: {}", host
- );
- return Err(error_msg);
- }
- }
- },
- Err(_) => {
- // Use the default HTML filter rule (no tags and classes to filter)
- HTMLFilterRule::new(Vec::new(), Vec::new())
- }
- };
+ let html_filter_rule = HTMLFilterRule::get_filter_rule(host);
// Filter the HTML content
let filtered_html = libhtmlfilter::get_filtered_html_fullurl_removeref(
url,
- html_filter_rule.tags.iter().map(|s| s.as_str()).collect::>().as_slice(),
- html_filter_rule.classes.iter().map(|s| s.as_str()).collect::>().as_slice()
+ &*html_filter_rule.tags,
+ &*html_filter_rule.classes
);
// Parse HTML to markdown