write function to find all urls in text
This commit is contained in:
parent
f72e31ab15
commit
df1d3fc05b
6 changed files with 54 additions and 2 deletions
39
Cargo.lock
generated
39
Cargo.lock
generated
|
|
@ -30,6 +30,15 @@ dependencies = [
|
||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyhow"
|
name = "anyhow"
|
||||||
version = "1.0.75"
|
version = "1.0.75"
|
||||||
|
|
@ -654,6 +663,7 @@ dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"dotenv",
|
"dotenv",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"teloxide",
|
"teloxide",
|
||||||
|
|
@ -869,6 +879,35 @@ dependencies = [
|
||||||
"bitflags 1.3.2",
|
"bitflags 1.3.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.10.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest"
|
name = "reqwest"
|
||||||
version = "0.11.22"
|
version = "0.11.22"
|
||||||
|
|
|
||||||
|
|
@ -13,3 +13,4 @@ teloxide = { version = "0.12.2", git ="https://github.com/teloxide/teloxide", fe
|
||||||
serde = { version = "1.0.196", features = ["derive"] }
|
serde = { version = "1.0.196", features = ["derive"] }
|
||||||
serde_json = "1.0.113"
|
serde_json = "1.0.113"
|
||||||
ordered-float = "4.2.0"
|
ordered-float = "4.2.0"
|
||||||
|
regex = "1.10.3"
|
||||||
|
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
pub mod bot;
|
pub mod bot;
|
||||||
|
pub mod sanitize;
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,5 @@ async fn cmd_download(bot: Bot, msg: Message, url: String) -> HandlerResult {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_message(_bot: Bot, _dialogue: MyDialogue, msg: Message) -> HandlerResult {
|
async fn handle_message(_bot: Bot, _dialogue: MyDialogue, msg: Message) -> HandlerResult {
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
12
src/bot/sanitize.rs
Normal file
12
src/bot/sanitize.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
// https://stackoverflow.com/questions/6038061/regular-expression-to-find-urls-within-a-string
|
||||||
|
const RE_URL: &str =
|
||||||
|
r"(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-])";
|
||||||
|
|
||||||
|
pub fn extract_urls(text: &str) -> Vec<&str> {
|
||||||
|
let re = Regex::new(RE_URL).unwrap();
|
||||||
|
re.find_iter(text)
|
||||||
|
.map(|m| m.as_str())
|
||||||
|
.collect::<Vec<&str>>()
|
||||||
|
}
|
||||||
|
|
@ -8,7 +8,7 @@ mod dl;
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
dotenv().ok();
|
dotenv().ok();
|
||||||
|
|
||||||
bot_main().await?;
|
bot_main().await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue