write function to parse url via url crate

This commit is contained in:
mykola2312 2024-02-24 15:20:38 +02:00
parent 475afbc7ce
commit 9420ad681b
3 changed files with 25 additions and 11 deletions

17
Cargo.lock generated
View file

@ -294,9 +294,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]]
name = "form_urlencoded"
version = "1.2.0"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
]
@ -523,9 +523,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.4.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
dependencies = [
"unicode-bidi",
"unicode-normalization",
@ -668,6 +668,7 @@ dependencies = [
"serde_json",
"teloxide",
"tokio",
"url",
]
[[package]]
@ -777,9 +778,9 @@ dependencies = [
[[package]]
name = "percent-encoding"
version = "2.3.0"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pin-project"
@ -1423,9 +1424,9 @@ dependencies = [
[[package]]
name = "url"
version = "2.4.1"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5"
checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
dependencies = [
"form_urlencoded",
"idna",

View file

@ -14,3 +14,4 @@ serde = { version = "1.0.196", features = ["derive"] }
serde_json = "1.0.113"
ordered-float = "4.2.0"
regex = "1.10.3"
url = "2.5.0"

View file

@ -1,4 +1,5 @@
use regex::Regex;
use url::Url;
// https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string
const RE_URL: &str =
@ -12,16 +13,27 @@ pub fn extract_url(text: &str) -> Option<&str> {
}
}
pub fn parse_url(url: &str) -> Option<Url> {
Url::parse(url).ok()
}
#[cfg(test)]
mod tests {
use crate::bot::sanitize::extract_url;
use crate::bot::sanitize::{extract_url, parse_url};
#[test]
fn test_extract_url() {
// https://www.youtube.com/watch?v=00000000000
assert_eq!(extract_url("test http://www.test.com/id/1"), Some("http://www.test.com/id/1"));
assert_eq!(extract_url("https://www.test.com 3"), Some("https://www.test.com"));
assert_eq!(extract_url("there is no any url"), None);
}
#[test]
fn test_parse_url() {
let url = parse_url("https://www.youtube.com/watch?v=00000000000").unwrap();
assert_eq!(url.host_str().unwrap(), "www.youtube.com");
let url = parse_url("https://youtu.be/00000000000").unwrap();
assert_eq!(url.host_str().unwrap(), "youtu.be");
}
}