From edf657157a5d1a4b6b310b0b4cc418a354b666e4 Mon Sep 17 00:00:00 2001 From: Sapan Shah Date: Sun, 13 Oct 2024 12:18:03 +0530 Subject: [PATCH] fix: invalid url --- src/scrapper/scrape.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/scrapper/scrape.rs b/src/scrapper/scrape.rs index 4e5dfda..ebc9df4 100644 --- a/src/scrapper/scrape.rs +++ b/src/scrapper/scrape.rs @@ -17,6 +17,7 @@ pub async fn handle(mut url: String, runtime: &TargetRuntime) -> anyhow::Result< let mut htmls = vec![]; let mut root = handle_inner(&url, runtime).await?; htmls.push(root.clone()); + let mut links = vec![]; loop { let parsed = Html::parse_document(&root); @@ -34,6 +35,8 @@ pub async fn handle(mut url: String, runtime: &TargetRuntime) -> anyhow::Result< match val { Some(val) => { let next = val.value().attr("href").unwrap_or("#"); + links.push(next.to_string()); + url = format!("{}{}", url, next); let next = handle_inner(&url, runtime).await?; @@ -51,5 +54,12 @@ pub async fn handle(mut url: String, runtime: &TargetRuntime) -> anyhow::Result< .flatten() .collect::>(); let text = mds.join("\n"); + let mut text = text.replace("../", "/"); + + links.into_iter().for_each(|v| { + let v = v.replace("../", "/"); + let x = format!("{}{}", url, v); + text = text.replace(&v, &x); + }); Ok(text) } -- 2.45.2