write function to find all urls in text

This commit is contained in:
mykola2312 2024-02-24 14:32:08 +02:00
parent 0aa70432d5
commit 95a341f77c
6 changed files with 54 additions and 2 deletions

39
Cargo.lock generated
View file

@ -30,6 +30,15 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.75" version = "1.0.75"
@ -654,6 +663,7 @@ dependencies = [
"anyhow", "anyhow",
"dotenv", "dotenv",
"ordered-float", "ordered-float",
"regex",
"serde", "serde",
"serde_json", "serde_json",
"teloxide", "teloxide",
@ -869,6 +879,35 @@ dependencies = [
"bitflags 1.3.2", "bitflags 1.3.2",
] ]
[[package]]
name = "regex"
version = "1.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]] [[package]]
name = "reqwest" name = "reqwest"
version = "0.11.22" version = "0.11.22"

View file

@ -13,3 +13,4 @@ teloxide = { version = "0.12.2", git ="https://github.com/teloxide/teloxide", fe
serde = { version = "1.0.196", features = ["derive"] } serde = { version = "1.0.196", features = ["derive"] }
serde_json = "1.0.113" serde_json = "1.0.113"
ordered-float = "4.2.0" ordered-float = "4.2.0"
regex = "1.10.3"

View file

@ -1 +1,2 @@
pub mod bot; pub mod bot;
pub mod sanitize;

View file

@ -104,6 +104,5 @@ async fn cmd_download(bot: Bot, msg: Message, url: String) -> HandlerResult {
} }
async fn handle_message(_bot: Bot, _dialogue: MyDialogue, msg: Message) -> HandlerResult { async fn handle_message(_bot: Bot, _dialogue: MyDialogue, msg: Message) -> HandlerResult {
Ok(()) Ok(())
} }

12
src/bot/sanitize.rs Normal file
View file

@ -0,0 +1,12 @@
use regex::Regex;
// https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string
const RE_URL: &str =
r"(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])";
pub fn extract_urls(text: &str) -> Vec<&str> {
let re = Regex::new(RE_URL).unwrap();
re.find_iter(text)
.map(|m| m.as_str())
.collect::<Vec<&str>>()
}

View file

@ -8,7 +8,7 @@ mod dl;
#[tokio::main] #[tokio::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
dotenv().ok(); dotenv().ok();
bot_main().await?; bot_main().await?;
Ok(()) Ok(())
} }