- TargetSource: first concrete impl of the Source trait, modeled on
the old Puppeteer crawler's selectors (+ status normalization,
tag-count stripping, chapter list)
- DiscoverMode::Backfill walks pagination last->1, reverse within each
page (oldest-first); Incremental walks forward
- RateLimiter (tokio-time aware) plumbed through FetchContext so the
pagination walk honors the same per-host budget as the outer loop
- repo::crawler: ensure_source, upsert_manga_from_source (returns
New/Updated/Unchanged + current cover_image_path for backfill
decisions), sync_manga_chapters, mark_dropped_mangas — all
transactional, with case-insensitive lookups and source-insertable
genres
- Cover image download via reqwest+infer; stored under
mangas/{id}/cover.{ext} via the Storage trait
- Single CRAWLER_PROXY env wires both Chromium (--proxy-server) and
reqwest::Proxy::all (HTTP/HTTPS/SOCKS5)
- Crawler binary: positional start URL or $CRAWLER_START_URL,
$CRAWLER_LIMIT (cap fetches + skip drop pass on partial runs),
$CRAWLER_SKIP_CHAPTERS (disable selector AND sync), $CRAWLER_RATE_MS
- Silences chromiumoxide 0.7's known CDP deserialize log spam via
default tracing filter + CdpError::Serde downgrade
- 9 sqlx integration tests + 11 selector/rate-limit unit tests
57 lines
1.5 KiB
TOML
57 lines
1.5 KiB
TOML
[package]
|
|
name = "mangalord"
|
|
version = "0.23.0"
|
|
edition = "2021"
|
|
default-run = "mangalord"
|
|
|
|
[lib]
|
|
path = "src/lib.rs"
|
|
|
|
[[bin]]
|
|
name = "mangalord"
|
|
path = "src/main.rs"
|
|
|
|
[[bin]]
|
|
name = "crawler"
|
|
path = "src/bin/crawler.rs"
|
|
|
|
[dependencies]
|
|
axum = { version = "0.7", features = ["macros", "multipart"] }
|
|
tokio = { version = "1", features = ["full"] }
|
|
sqlx = { version = "0.8", features = ["runtime-tokio", "postgres", "uuid", "chrono", "macros", "migrate"] }
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
uuid = { version = "1", features = ["v4", "serde"] }
|
|
chrono = { version = "0.4", features = ["serde"] }
|
|
tracing = "0.1"
|
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
|
tower = { version = "0.5", features = ["util"] }
|
|
tower-http = { version = "0.6", features = ["trace", "cors"] }
|
|
thiserror = "1"
|
|
anyhow = "1"
|
|
async-trait = "0.1"
|
|
dotenvy = "0.15"
|
|
argon2 = "0.5"
|
|
rand = "0.8"
|
|
sha2 = "0.10"
|
|
subtle = "2"
|
|
base64 = "0.22"
|
|
axum-extra = { version = "0.9", features = ["cookie", "typed-header"] }
|
|
time = "0.3"
|
|
infer = "0.16"
|
|
tokio-util = { version = "0.7", features = ["io"] }
|
|
futures-core = "0.3"
|
|
futures-util = "0.3"
|
|
bytes = "1"
|
|
chromiumoxide = { version = "0.7", features = ["tokio-runtime", "_fetcher-rusttls-tokio"], default-features = false }
|
|
scraper = "0.20"
|
|
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "socks"] }
|
|
|
|
[dev-dependencies]
|
|
tempfile = "3"
|
|
tower = { version = "0.5", features = ["util"] }
|
|
http-body-util = "0.1"
|
|
mime = "0.3"
|
|
futures-util = "0.3"
|
|
tokio = { version = "1", features = ["test-util"] }
|