write function to find all urls in text
This commit is contained in:
parent
0aa70432d5
commit
95a341f77c
6 changed files with 54 additions and 2 deletions
39
Cargo.lock
generated
39
Cargo.lock
generated
|
|
@ -30,6 +30,15 @@ dependencies = [
|
|||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.75"
|
||||
|
|
@ -654,6 +663,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"dotenv",
|
||||
"ordered-float",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"teloxide",
|
||||
|
|
@ -869,6 +879,35 @@ dependencies = [
|
|||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.22"
|
||||
|
|
|
|||
|
|
@ -13,3 +13,4 @@ teloxide = { version = "0.12.2", git ="https://github.com/teloxide/teloxide", fe
|
|||
serde = { version = "1.0.196", features = ["derive"] }
|
||||
serde_json = "1.0.113"
|
||||
ordered-float = "4.2.0"
|
||||
regex = "1.10.3"
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
pub mod bot;
|
||||
pub mod sanitize;
|
||||
|
|
|
|||
|
|
@ -104,6 +104,5 @@ async fn cmd_download(bot: Bot, msg: Message, url: String) -> HandlerResult {
|
|||
}
|
||||
|
||||
async fn handle_message(_bot: Bot, _dialogue: MyDialogue, msg: Message) -> HandlerResult {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
12
src/bot/sanitize.rs
Normal file
12
src/bot/sanitize.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
use regex::Regex;
|
||||
|
||||
// https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string
|
||||
const RE_URL: &str =
|
||||
r"(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])";
|
||||
|
||||
pub fn extract_urls(text: &str) -> Vec<&str> {
|
||||
let re = Regex::new(RE_URL).unwrap();
|
||||
re.find_iter(text)
|
||||
.map(|m| m.as_str())
|
||||
.collect::<Vec<&str>>()
|
||||
}
|
||||
|
|
@ -8,7 +8,7 @@ mod dl;
|
|||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
dotenv().ok();
|
||||
|
||||
|
||||
bot_main().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue