Files
Mangalord/backend/Cargo.toml
MechaCat02 7a6815661f feat(crawler): reliability fixes — heartbeat, streaming, jitter, timeout, breaker
A1 Lease heartbeat: jobs::renew keeps a long-but-healthy job's lease fresh
so it is never stolen mid-flight nor inflated toward max_attempts.
A2 Stream chapter pages straight to storage (peak memory = one image) and
persist rows + page_count in one short transaction off the network path
(S3-ready); roll back stored blobs on failure via Storage::delete.
A3 ±20% jitter on exponential backoff to avoid a retry thundering herd.
A4 Outer per-dispatch timeout (CRAWLER_JOB_TIMEOUT_SECS, default 600) so a
hung job is acked-failed instead of wedging a worker.
A5 Metadata circuit-breaker (CRAWLER_METADATA_MAX_CONSECUTIVE_FAILURES,
default 10): abort a pass on a source outage without marking a clean exit,
so the next tick recovery-sweeps.

Adds CRAWLER_BROWSER_RESTART_THRESHOLD config (used by the upcoming
coordinated browser restart). Bumps version 0.52.0 -> 0.53.0.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 20:13:17 +02:00

70 lines
2.1 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
[package]
name = "mangalord"
version = "0.53.0"
edition = "2021"
default-run = "mangalord"
[lib]
path = "src/lib.rs"
[[bin]]
name = "mangalord"
path = "src/main.rs"
[[bin]]
name = "crawler"
path = "src/bin/crawler.rs"
[dependencies]
axum = { version = "0.7", features = ["macros", "multipart"] }
tokio = { version = "1", features = ["full"] }
sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "uuid", "chrono", "macros", "migrate"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
uuid = { version = "1", features = ["v4", "serde"] }
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.9"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tower = { version = "0.5", features = ["util"] }
tower-http = { version = "0.6", features = ["trace", "cors"] }
thiserror = "1"
anyhow = "1"
async-trait = "0.1"
dotenvy = "0.15"
argon2 = "0.5"
rand = "0.8"
sha2 = "0.10"
subtle = "2"
base64 = "0.22"
axum-extra = { version = "0.9", features = ["cookie", "typed-header"] }
time = "0.3"
infer = "0.16"
tokio-util = { version = "0.7", features = ["io"] }
futures-core = "0.3"
futures-util = "0.3"
bytes = "1"
chromiumoxide = { version = "0.7", features = ["tokio-runtime", "_fetcher-rusttls-tokio"], default-features = false }
sysinfo = { version = "0.32", default-features = false, features = ["system"] }
nix = { version = "0.29", features = ["fs"] }
scraper = "0.20"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "socks", "cookies", "stream"] }
[dev-dependencies]
tempfile = "3"
tower = { version = "0.5", features = ["util"] }
http-body-util = "0.1"
mime = "0.3"
futures-util = "0.3"
tokio = { version = "1", features = ["test-util"] }
# Trim debug builds: keep line numbers in panics / backtraces but drop the
# full DWARF info (variable-level inspection in gdb/lldb). With a sqlx +
# axum + tokio dep tree the default ("full") leaves backend/target on the
# order of tens of GiB; this typically cuts ~5070% off that.
[profile.dev]
debug = "line-tables-only"
[profile.test]
debug = "line-tables-only"