fetch feeds, post to zulip
This commit is contained in:
parent
d3aa796412
commit
9f8b6cb177
4 changed files with 266 additions and 19 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
|||
/target
|
||||
blogdor.db
|
||||
secrets
|
||||
|
|
|
|||
175
Cargo.lock
generated
175
Cargo.lock
generated
|
|
@ -24,7 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "17e913097e1a2124b46746c980134e8c954bc17a6a59bb3fde96f088d126dde6"
|
||||
dependencies = [
|
||||
"cssparser",
|
||||
"html5ever",
|
||||
"html5ever 0.35.0",
|
||||
"maplit",
|
||||
"tendril",
|
||||
"url",
|
||||
|
|
@ -207,15 +207,19 @@ dependencies = [
|
|||
"axum",
|
||||
"clap",
|
||||
"feed-rs",
|
||||
"html2md",
|
||||
"justerror",
|
||||
"rand 0.9.2",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_urlencoded",
|
||||
"sqlx",
|
||||
"thiserror",
|
||||
"thiserror 2.0.17",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -246,6 +250,12 @@ dependencies = [
|
|||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cesu8"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
|
|
@ -312,6 +322,16 @@ version = "1.0.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "combine"
|
||||
version = "4.6.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "concurrent-queue"
|
||||
version = "2.5.0"
|
||||
|
|
@ -802,6 +822,34 @@ dependencies = [
|
|||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html2md"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8cff9891f2e0d9048927fbdfc28b11bf378f6a93c7ba70b23d0fbee9af6071b4"
|
||||
dependencies = [
|
||||
"html5ever 0.27.0",
|
||||
"jni",
|
||||
"lazy_static",
|
||||
"markup5ever_rcdom",
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.27.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever 0.12.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.35.0"
|
||||
|
|
@ -809,7 +857,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
|
||||
dependencies = [
|
||||
"log",
|
||||
"markup5ever",
|
||||
"markup5ever 0.35.0",
|
||||
"match_token",
|
||||
]
|
||||
|
||||
|
|
@ -1103,6 +1151,26 @@ version = "1.0.15"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||
|
||||
[[package]]
|
||||
name = "jni"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
|
||||
dependencies = [
|
||||
"cesu8",
|
||||
"combine",
|
||||
"jni-sys",
|
||||
"log",
|
||||
"thiserror 1.0.69",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jni-sys"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.83"
|
||||
|
|
@ -1206,6 +1274,20 @@ version = "1.0.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
|
||||
dependencies = [
|
||||
"log",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"string_cache",
|
||||
"string_cache_codegen",
|
||||
"tendril",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever"
|
||||
version = "0.35.0"
|
||||
|
|
@ -1217,6 +1299,18 @@ dependencies = [
|
|||
"web_atoms",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "markup5ever_rcdom"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
|
||||
dependencies = [
|
||||
"html5ever 0.27.0",
|
||||
"markup5ever 0.12.1",
|
||||
"tendril",
|
||||
"xml5ever",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "match_token"
|
||||
version = "0.35.0"
|
||||
|
|
@ -1841,6 +1935,15 @@ version = "1.0.20"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "schannel"
|
||||
version = "0.1.28"
|
||||
|
|
@ -2092,7 +2195,7 @@ dependencies = [
|
|||
"serde_json",
|
||||
"sha2",
|
||||
"smallvec",
|
||||
"thiserror",
|
||||
"thiserror 2.0.17",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
|
|
@ -2175,7 +2278,7 @@ dependencies = [
|
|||
"smallvec",
|
||||
"sqlx-core",
|
||||
"stringprep",
|
||||
"thiserror",
|
||||
"thiserror 2.0.17",
|
||||
"tracing",
|
||||
"whoami",
|
||||
]
|
||||
|
|
@ -2213,7 +2316,7 @@ dependencies = [
|
|||
"smallvec",
|
||||
"sqlx-core",
|
||||
"stringprep",
|
||||
"thiserror",
|
||||
"thiserror 2.0.17",
|
||||
"tracing",
|
||||
"whoami",
|
||||
]
|
||||
|
|
@ -2238,7 +2341,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_urlencoded",
|
||||
"sqlx-core",
|
||||
"thiserror",
|
||||
"thiserror 2.0.17",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
|
@ -2384,13 +2487,33 @@ dependencies = [
|
|||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
"thiserror-impl 2.0.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2656,6 +2779,12 @@ version = "0.1.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
|
|
@ -2721,6 +2850,16 @@ version = "0.9.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "want"
|
||||
version = "0.3.1"
|
||||
|
|
@ -2841,6 +2980,15 @@ dependencies = [
|
|||
"wasite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.2"
|
||||
|
|
@ -3169,6 +3317,17 @@ version = "0.6.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
|
||||
|
||||
[[package]]
|
||||
name = "xml5ever"
|
||||
version = "0.18.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69"
|
||||
dependencies = [
|
||||
"log",
|
||||
"mac",
|
||||
"markup5ever 0.12.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke"
|
||||
version = "0.8.1"
|
||||
|
|
|
|||
|
|
@ -8,14 +8,18 @@ edition = "2024"
|
|||
axum = { version = "0.8.7", default-features = false, features = ["http1", "http2", "json", "macros", "tokio"] }
|
||||
clap = { version = "4.5.53", features = ["derive"] }
|
||||
feed-rs = { version = "2.3.1", features = ["sanitize"] }
|
||||
html2md = "0.2.15"
|
||||
justerror = "1.1.0"
|
||||
reqwest = "0.12.24"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
sqlx = { version = "0.8.6", default-features = false, features = ["chrono", "derive", "macros", "migrate", "runtime-tokio", "sqlite", "tls-none"] }
|
||||
thiserror = "2.0.17"
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
tokio-util = "0.7.17"
|
||||
tracing = "0.1.43"
|
||||
tracing-subscriber = { version = "0.3.22", features = ["env-filter"] }
|
||||
unicode-segmentation = "1.12.0"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.9.2"
|
||||
|
|
|
|||
105
src/lib.rs
105
src/lib.rs
|
|
@ -1,10 +1,7 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use feed_rs::parser::parse;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
pub mod server;
|
||||
|
||||
use reqwest::StatusCode;
|
||||
use sqlx::{
|
||||
SqlitePool,
|
||||
sqlite::{SqliteConnectOptions, SqliteJournalMode, SqlitePoolOptions},
|
||||
|
|
@ -12,6 +9,10 @@ use sqlx::{
|
|||
};
|
||||
use tokio::task::{JoinHandle, JoinSet};
|
||||
use tokio_util::{bytes::Buf, sync::CancellationToken};
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub mod server;
|
||||
|
||||
const MAX_CONNS: u32 = 200;
|
||||
const MIN_CONNS: u32 = 5;
|
||||
|
|
@ -37,6 +38,16 @@ pub struct FeedEntry {
|
|||
body: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
struct ZulipMessage<'s> {
|
||||
to: u32,
|
||||
#[serde(rename = "type")]
|
||||
typ: &'s str,
|
||||
content: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
topic: Option<&'s str>,
|
||||
}
|
||||
|
||||
impl BlogdorTheAggregator {
|
||||
pub async fn new() -> Self {
|
||||
tracing_subscriber::registry()
|
||||
|
|
@ -96,6 +107,15 @@ async fn check_feeds(db: &SqlitePool, client: &reqwest::Client) {
|
|||
}
|
||||
};
|
||||
|
||||
let endpoint = std::env::var("ZULIP_URL").expect("ZULIP_URL must be set");
|
||||
let channel_id: u32 = std::env::var("ZULIP_CHANNEL")
|
||||
.expect("ZULIP_CHANNEL must be set")
|
||||
.parse()
|
||||
.expect("ZULIP_CHANNEL must be an integer");
|
||||
|
||||
let email = std::env::var("BLOGDOR_EMAIL").expect("BLOGDOR_EMAIL must be set");
|
||||
let password = std::env::var("ZULIP_TOKEN").expect("ZULIP_TOKEN must be set");
|
||||
|
||||
let mut handles = JoinSet::new();
|
||||
for feed in feeds {
|
||||
handles.spawn(check_feed(db.clone(), feed.id, client.clone(), feed.url));
|
||||
|
|
@ -110,10 +130,61 @@ async fn check_feeds(db: &SqlitePool, client: &reqwest::Client) {
|
|||
Err(s) => {
|
||||
tracing::warn!("could not fetch feed: {s}")
|
||||
}
|
||||
Ok(posts) => {
|
||||
// send to zulip
|
||||
for post in posts {
|
||||
tracing::debug!("{post:?}");
|
||||
Ok(None) => {}
|
||||
Ok(Some(posts)) => {
|
||||
let FeedEntry { feed_id, .. } = posts.last().unwrap();
|
||||
let mut success = true;
|
||||
for post in posts.iter() {
|
||||
let body = post
|
||||
.body
|
||||
.iter()
|
||||
.next()
|
||||
.cloned()
|
||||
.unwrap_or("Blogdor Says: NO BODY!".to_string());
|
||||
let content = format!(
|
||||
"{body}\n\n---\noriginally posted to {}, on {}",
|
||||
post.url, post.published
|
||||
);
|
||||
let msg = ZulipMessage {
|
||||
to: channel_id,
|
||||
typ: "stream",
|
||||
content,
|
||||
topic: Some(&post.title),
|
||||
};
|
||||
let msg = serde_urlencoded::to_string(msg).expect("serialize msg");
|
||||
dbg!(&msg);
|
||||
|
||||
match client
|
||||
.post(&endpoint)
|
||||
.basic_auth(&email, Some(&password))
|
||||
.body(msg)
|
||||
.header("Content-Type", "application/x-www-form-urlencoded")
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Err(e) => {
|
||||
tracing::error!("got error sending to zulip: {e}");
|
||||
success = false;
|
||||
}
|
||||
Ok(r) => {
|
||||
if r.status() == StatusCode::OK {
|
||||
success &= true;
|
||||
} else {
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
}
|
||||
if success
|
||||
&& let Err(e) = sqlx::query!(
|
||||
"insert into runs (feed, succeeded) values (?, true)",
|
||||
feed_id
|
||||
)
|
||||
.execute(db)
|
||||
.await
|
||||
{
|
||||
tracing::error!("could not insert run for {feed_id}, got {e}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -125,7 +196,7 @@ async fn check_feed(
|
|||
feed_id: i64,
|
||||
client: reqwest::Client,
|
||||
url: String,
|
||||
) -> Result<Vec<FeedEntry>, String> {
|
||||
) -> Result<Option<Vec<FeedEntry>>, String> {
|
||||
let rec = sqlx::query!(
|
||||
"select date_time from runs where succeeded = true and feed = ? order by id desc limit 1",
|
||||
feed_id
|
||||
|
|
@ -165,13 +236,25 @@ async fn check_feed(
|
|||
published: post.published.unwrap_or(now),
|
||||
received: now,
|
||||
feed_description: feed.description.to_owned().map(|d| d.content),
|
||||
body: post.content.and_then(|c| c.body),
|
||||
body: post.content.and_then(|c| {
|
||||
c.body.map(|f| {
|
||||
let s = html2md::parse_html(&f)
|
||||
.graphemes(false)
|
||||
.take(500)
|
||||
.collect::<String>();
|
||||
s.to_string()
|
||||
})
|
||||
}),
|
||||
};
|
||||
out.push(entry);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
if out.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(out))
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_db_pool() -> SqlitePool {
|
||||
|
|
|
|||
Loading…
Reference in a new issue