Skip to content

Commit

Permalink
test: ✅ add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
holmofy committed Sep 1, 2024
1 parent fd1b553 commit 8c04850
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 11 deletions.
44 changes: 43 additions & 1 deletion src/css_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,16 @@ use scraper::ElementRef;

/// Html Response
pub struct Html {
pub(crate) value: scraper::Html,
value: scraper::Html,
}

impl Html {
/// constructor
pub fn new(html_str: &str) -> Self {
Self {
value: scraper::Html::parse_fragment(html_str),
}
}
/// Select elements in HTML using CSS selector
pub fn select(&self, selector: &str) -> Result<Selectable<scraper::Html>> {
Selectable::wrap(selector, &self.value)
Expand Down Expand Up @@ -164,3 +170,39 @@ impl<'a> SelectItem<'a> {
Selectable::wrap(selector, &self.element)
}
}

mod tests {

#[test]
fn test_css_selecctor() {
use super::*;

let html_str = r#"
<html>
<body>
<div id="content">
<p>Hello, World!</p>
<p>This is a test.</p>
</div>
</body>
</html>
"#;

let html = Html::new(html_str);
let content = html.select("#content").unwrap();
let content = content.first();
assert!(content.is_some());
let content = content.unwrap();
assert_eq!(content.attr("id").unwrap(), "content");

let p1 = content.select("p:nth-child(1)").ok().unwrap();
let p1 = p1.first();
assert!(p1.is_some());
assert_eq!(p1.unwrap().text(), "Hello, World!");

let p2 = content.select("p:nth-child(2)").ok().unwrap();
let p2 = p2.first();
assert!(p2.is_some());
assert_eq!(p2.unwrap().text(), "This is a test.");
}
}
73 changes: 72 additions & 1 deletion src/jsonpath.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,16 @@ use serde::de::DeserializeOwned;

/// Json Response
pub struct Json {
pub(crate) value: serde_json::Value,
value: serde_json::Value,
}

impl Json {
/// constructor
pub fn new(json: &str) -> Result<Self> {
let value = serde_json::from_str(json)?;
Ok(Self { value })
}

/// Use jsonpath to select json fragments and convert them into structures
pub fn select<T: DeserializeOwned>(&self, path: &str) -> Result<Vec<T>> {
jsonpath::Selector::new()
Expand Down Expand Up @@ -45,4 +51,69 @@ impl Json {
.select_as_str()
.map_err(ScraperError::from)
}

/// Use jsonpath to select one json fields as string
pub fn select_one_as_str(&self, path: &str) -> Result<String> {
let result = jsonpath::Selector::new()
.str_path(path)?
.value(&self.value)
.select()?;
let v = result
.first()
.ok_or_else(|| {
ScraperError::JsonPathMatchError(format!(
"The \"{}\" jsonpath did not find data in json",
path
))
})?
.to_owned();
Ok(v.to_string())
}
}

mod tests {

#[test]
#[allow(clippy::get_first)]
fn test_jsonpath() {
use super::Json;
use serde::Deserialize;
let json = r#"
{
"time":"2020.10.12 21:22:34",
"data":[
{"a":"A1","B":"b1","c":1},
{"a":"A2","B":"b2","c":2}
]
}
"#;

let json = Json::new(json).unwrap();

assert_eq!(
json.select_one_as_str("$.time").unwrap(),
r#""2020.10.12 21:22:34""#
);

#[derive(Deserialize)]
struct DataItem {
a: String,
#[serde(rename = "B")]
b: String,
c: i32,
}

let data: Vec<DataItem> = json.select("$.data[*]").unwrap();
assert_eq!(data.len(), 2);

let d1 = data.get(0).unwrap();
assert_eq!(d1.a, "A1");
assert_eq!(d1.b, "b1");
assert_eq!(d1.c, 1);

let d2 = data.get(1).unwrap();
assert_eq!(d2.a, "A2");
assert_eq!(d2.b, "b2");
assert_eq!(d2.c, 2);
}
}
12 changes: 4 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ impl ScraperResponse for Response {
#[cfg(feature = "jsonpath")]
async fn jsonpath(self) -> Result<Json> {
if self.status().is_success() {
let json_value = self.json().await?;
Ok(Json { value: json_value })
let json = self.text().await?;
Ok(Json::new(json.as_str())?)
} else {
let url = self.url().to_string();
let status_code = self.status().as_u16();
Expand All @@ -86,9 +86,7 @@ impl ScraperResponse for Response {
async fn css_selector(self) -> Result<Html> {
if self.status().is_success() {
let html_str = self.html().await?;
Ok(Html {
value: scraper::Html::parse_fragment(html_str.as_str()),
})
Ok(Html::new(html_str.as_str()))
} else {
let url = self.url().to_string();
let status_code = self.status().as_u16();
Expand All @@ -101,9 +99,7 @@ impl ScraperResponse for Response {
async fn xpath(self) -> Result<XHtml> {
if self.status().is_success() {
let html_str = self.html().await?;
let parser = libxml::parser::Parser::default_html();
let doc = parser.parse_string(html_str)?;
Ok(XHtml { doc })
Ok(XHtml::new(html_str)?)
} else {
let url = self.url().to_string();
let status_code = self.status().as_u16();
Expand Down
45 changes: 44 additions & 1 deletion src/xpath.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::collections::HashSet;
/// Html Response
#[derive(Clone)]
pub struct XHtml {
pub(crate) doc: Document,
doc: Document,
}

/// Wrap HTML document and compiled xpath
Expand All @@ -20,6 +20,12 @@ pub struct XPathResult {
}

impl XHtml {
/// constructor
pub fn new<S: Into<String>>(html_str: S) -> Result<Self> {
let parser = libxml::parser::Parser::default_html();
let doc = parser.parse_string(html_str.into())?;
Ok(Self { doc })
}
/// Using xpath to extract results from html
pub fn select(&self, xpath: &str) -> Result<XPathResult> {
let context = Context::new(&self.doc)
Expand Down Expand Up @@ -175,3 +181,40 @@ impl Node {
.map(|v| v.trim().into()))
}
}

mod tests {

#[test]
fn test_select_xpath() {
use super::*;
let html_str = r#"
<html>
<body>
<div id="content">
<p>Hello, World!</p>
<p>This is a test.</p>
</div>
</body>
</html>
"#;

let xhtml = XHtml::new(html_str).expect("parse xhtml failed");

let content = xhtml.select("//div[@id='content']").ok();
assert!(content.is_some());

let content = content.unwrap().as_node();
assert!(content.is_some());

let content = content.unwrap();
assert_eq!(content.attr("id").unwrap(), "content");
let children = content.children();
assert_eq!(children.len(), 2);
assert_eq!(children[0].text(), "Hello, World!");
assert_eq!(children[1].text(), "This is a test.");

let p1 = content.findvalue("./p[position()=1]").unwrap();
assert!(p1.is_some());
assert_eq!(p1.unwrap(), "Hello, World!");
}
}

0 comments on commit 8c04850

Please sign in to comment.