From 83136d715803d5fb8d8d5de1aa04505d2b5746b2 Mon Sep 17 00:00:00 2001 From: mykola2312 Date: Sat, 24 Feb 2024 15:20:38 +0200 Subject: [PATCH] write function to parse url via url crate --- Cargo.lock | 17 +++++++++-------- Cargo.toml | 1 + src/bot/sanitize.rs | 18 +++++++++++++++--- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e11d37..d07e0f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -294,9 +294,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] @@ -523,9 +523,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -668,6 +668,7 @@ dependencies = [ "serde_json", "teloxide", "tokio", + "url", ] [[package]] @@ -777,9 +778,9 @@ dependencies = [ [[package]] name = "percent-encoding" -version = "2.3.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pin-project" @@ -1423,9 +1424,9 @@ dependencies = [ [[package]] name = "url" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", diff --git a/Cargo.toml b/Cargo.toml index 34f5b2f..a072f31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,4 @@ serde = { version = "1.0.196", features = ["derive"] } serde_json = "1.0.113" ordered-float = "4.2.0" regex = "1.10.3" +url = "2.5.0" diff --git a/src/bot/sanitize.rs b/src/bot/sanitize.rs index ca47400..7954af7 100644 --- a/src/bot/sanitize.rs +++ b/src/bot/sanitize.rs @@ -1,4 +1,5 @@ use regex::Regex; +use url::Url; // https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string const RE_URL: &str = @@ -12,16 +13,27 @@ pub fn extract_url(text: &str) -> Option<&str> { } } +pub fn parse_url(url: &str) -> Option { + Url::parse(url).ok() +} + #[cfg(test)] mod tests { - use crate::bot::sanitize::extract_url; + use crate::bot::sanitize::{extract_url, parse_url}; #[test] fn test_extract_url() { - // https://www.youtube.com/watch?v=00000000000 - assert_eq!(extract_url("test http://www.test.com/id/1"), Some("http://www.test.com/id/1")); assert_eq!(extract_url("https://www.test.com 3"), Some("https://www.test.com")); assert_eq!(extract_url("there is no any url"), None); } + + #[test] + fn test_parse_url() { + let url = parse_url("https://www.youtube.com/watch?v=00000000000").unwrap(); + assert_eq!(url.host_str().unwrap(), "www.youtube.com"); + + let url = parse_url("https://youtu.be/00000000000").unwrap(); + assert_eq!(url.host_str().unwrap(), "youtu.be"); + } } \ No newline at end of file