Compare commits

..

3 Commits

Author SHA1 Message Date
bd61a64c70 ci: build via host docker socket (plain build); fix missing daemon socket
Some checks failed
deploy / test-frontend (pull_request) Waiting to run
deploy / test-backend (pull_request) Failing after 1m44s
deploy / build-and-push (pull_request) Has been cancelled
deploy / deploy (pull_request) Has been cancelled
build-and-push failed at docker/setup-buildx-action: the job had no
/var/run/docker.sock, so buildx's docker-container driver couldn't reach
the daemon. Mount the host socket into build-and-push and deploy, and
replace setup-buildx + build-push-action (+ the unsupported gha cache)
with a plain docker build/push against the host daemon (DooD), reusing
the host's layer cache.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 19:40:12 +02:00
3b3d13a0f6 fix(crawler): walk list pages incrementally; stop on empty page (0.45.1) (#4)
Some checks failed
deploy / test-backend (push) Successful in 18m58s
deploy / test-frontend (push) Successful in 9m43s
deploy / build-and-push (push) Failing after 2m26s
deploy / deploy (push) Has been skipped
2026-05-31 16:37:14 +00:00
0f90af80cb ci(test-backend): ubuntu-latest + rustup (fix node-not-found) (#3)
Some checks failed
deploy / test-backend (push) Has been cancelled
deploy / test-frontend (push) Has been cancelled
deploy / build-and-push (push) Has been cancelled
deploy / deploy (push) Has been cancelled
2026-05-31 16:18:21 +00:00
5 changed files with 58 additions and 138 deletions

View File

@@ -72,9 +72,17 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [test-backend, test-frontend] needs: [test-backend, test-frontend]
# PRs only run the test jobs; build + deploy are reserved for # PRs only run the test jobs; build + deploy are reserved for
# post-merge pushes to main. Without this gate every PR would push # post-merge pushes to main.
# a tagged image to the registry and SSH-deploy to prod.
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
# Build on the host docker daemon directly (docker-outside-of-docker):
# the runner shares the deploy host's daemon, so a plain `docker build`
# reuses the host's layer cache and avoids buildx's docker-container
# driver + the gha cache exporter — neither works against this single-host
# act_runner, and there is no in-job daemon socket unless we mount it.
container:
image: docker.gitea.com/runner-images:ubuntu-latest
volumes:
- /var/run/docker.sock:/var/run/docker.sock
outputs: outputs:
image_tag: ${{ steps.meta.outputs.image_tag }} image_tag: ${{ steps.meta.outputs.image_tag }}
version: ${{ steps.meta.outputs.version }} version: ${{ steps.meta.outputs.version }}
@@ -93,48 +101,32 @@ jobs:
echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT" echo "image_tag=${GITHUB_SHA}" >> "$GITHUB_OUTPUT"
echo "version=${version}" >> "$GITHUB_OUTPUT" echo "version=${version}" >> "$GITHUB_OUTPUT"
- uses: docker/setup-buildx-action@v3 - name: Build & push backend + frontend
env:
- name: docker login REGISTRY_URL: ${{ secrets.REGISTRY_URL }}
uses: docker/login-action@v3 REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
with: REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }}
registry: ${{ secrets.REGISTRY_URL }} IMAGE_TAG: ${{ steps.meta.outputs.image_tag }}
username: ${{ secrets.REGISTRY_USERNAME }} VERSION: ${{ steps.meta.outputs.version }}
password: ${{ secrets.REGISTRY_PASSWORD }} run: |
set -eu
- name: Build & push backend echo "$REGISTRY_PASSWORD" | docker login "$REGISTRY_URL" -u "$REGISTRY_USERNAME" --password-stdin
uses: docker/build-push-action@v5 for svc in backend frontend; do
with: img="$REGISTRY_URL/mangalord-$svc"
context: ./backend docker build -t "$img:$IMAGE_TAG" -t "$img:latest" -t "$img:$VERSION" "./$svc"
push: true for tag in "$IMAGE_TAG" latest "$VERSION"; do docker push "$img:$tag"; done
tags: | done
${{ secrets.REGISTRY_URL }}/mangalord-backend:latest docker logout "$REGISTRY_URL"
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.image_tag }}
${{ secrets.REGISTRY_URL }}/mangalord-backend:${{ steps.meta.outputs.version }}
cache-from: type=gha,scope=backend
cache-to: type=gha,mode=max,scope=backend
- name: Build & push frontend
uses: docker/build-push-action@v5
with:
context: ./frontend
push: true
tags: |
${{ secrets.REGISTRY_URL }}/mangalord-frontend:latest
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.image_tag }}
${{ secrets.REGISTRY_URL }}/mangalord-frontend:${{ steps.meta.outputs.version }}
cache-from: type=gha,scope=frontend
cache-to: type=gha,mode=max,scope=frontend
deploy: deploy:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build-and-push needs: build-and-push
if: github.event_name != 'pull_request' if: github.event_name != 'pull_request'
# Single-host deploy: the runner lives on the same box as the stack, so we # Single-host deploy: the runner lives on the same box as the stack, so we
# drive the host docker daemon directly (act_runner shares its socket via # drive the host docker daemon directly (the job mounts the host docker
# `docker_host: "-"`) instead of SSHing out. The compose dir is bind-mounted # socket) instead of SSHing out. The compose dir is bind-mounted at its
# at its REAL host path so compose's relative bind-mounts (./mangalord/..., # REAL host path so compose's relative bind-mounts (./mangalord/...,
# ./Caddyfile) resolve; this requires `/mnt/ssd/docker-data` in the runner's # ./Caddyfile) resolve; both paths must be in the runner's
# container.valid_volumes. The central compose references the images as # container.valid_volumes. The central compose references the images as
# registry.mc02.dev/mangalord-*:${MANGALORD_TAG:-latest}, so we only pull # registry.mc02.dev/mangalord-*:${MANGALORD_TAG:-latest}, so we only pull
# and recreate the two mangalord services at the freshly built SHA. # and recreate the two mangalord services at the freshly built SHA.
@@ -142,6 +134,7 @@ jobs:
image: docker:cli image: docker:cli
volumes: volumes:
- /mnt/ssd/docker-data:/mnt/ssd/docker-data - /mnt/ssd/docker-data:/mnt/ssd/docker-data
- /var/run/docker.sock:/var/run/docker.sock
steps: steps:
- name: Deploy to the local stack - name: Deploy to the local stack
working-directory: /mnt/ssd/docker-data working-directory: /mnt/ssd/docker-data

2
backend/Cargo.lock generated
View File

@@ -1470,7 +1470,7 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "mangalord" name = "mangalord"
version = "0.45.0" version = "0.45.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"argon2", "argon2",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "mangalord" name = "mangalord"
version = "0.45.0" version = "0.45.1"
edition = "2021" edition = "2021"
default-run = "mangalord" default-run = "mangalord"

View File

@@ -7,7 +7,6 @@
//! (`td:has(label:contains("Author:"))`) are implemented by walking //! (`td:has(label:contains("Author:"))`) are implemented by walking
//! the parsed tree. //! the parsed tree.
use std::collections::VecDeque;
use std::time::Duration; use std::time::Duration;
use anyhow::Context; use anyhow::Context;
@@ -75,10 +74,11 @@ impl Source for TargetSource {
&self, &self,
ctx: &FetchContext<'_>, ctx: &FetchContext<'_>,
) -> anyhow::Result<Box<dyn DiscoverWalk + Send>> { ) -> anyhow::Result<Box<dyn DiscoverWalk + Send>> {
// Always visit page 1 first because that's the only way to // Probe page 1 up front (with transient retry) for two reasons:
// discover `last_page`. Retry it on transient — a broken first // a broken first page should abort cleanly rather than mid-walk,
// page would otherwise abort the whole walk before we've even // and the HTML is handed straight to the first `next_batch` call
// started. // so the walker doesn't re-fetch it. Page count is discovered
// incrementally — see `TargetSourceWalker::next_batch`.
let first_html = retry_on_transient( let first_html = retry_on_transient(
|| async { || async {
navigate(ctx, self.base_url.as_str(), LIST_PAGE_MARKER).await navigate(ctx, self.base_url.as_str(), LIST_PAGE_MARKER).await
@@ -87,21 +87,10 @@ impl Source for TargetSource {
PAGE_TRANSIENT_RETRY_DELAY, PAGE_TRANSIENT_RETRY_DELAY,
) )
.await?; .await?;
let last_page = {
let doc = scraper::Html::parse_document(&first_html);
parse_last_page(&doc)
};
let order = build_page_order(last_page);
tracing::info!(
last_page = ?last_page,
page_count = order.len(),
"walking pagination"
);
Ok(Box::new(TargetSourceWalker { Ok(Box::new(TargetSourceWalker {
base_url: self.base_url.clone(), base_url: self.base_url.clone(),
pages_remaining: order, next_page: 1,
first_page_html: Some(first_html), first_page_html: Some(first_html),
})) }))
} }
@@ -147,24 +136,19 @@ impl Source for TargetSource {
} }
} }
/// Build the queue of page numbers `TargetSource::discover` will walk. /// Walker returned by [`TargetSource::discover`]. Walks pages `1..` in
/// The site orders by `update_date DESC`, so newest-first is just the /// order, terminating as soon as a page renders cleanly with zero entries
/// natural page order: `1..=last`. If `last_page` is unknown (source /// — that's the "we ran off the end of the index" signal. Page 1's HTML
/// surfaces no pagination) only page 1 is visited. /// is cached at construction time (discover already had to fetch it for
fn build_page_order(last_page: Option<i32>) -> VecDeque<i32> { /// the transient probe) so the first batch doesn't re-fetch.
match last_page { ///
None => VecDeque::from([1]), /// A genuinely empty `Ok(vec![])` from `parse_manga_list_from` is what
Some(last) => (1..=last).collect(), /// stops us: the parser's `#logo` sentinel converts unrendered pages
} /// into transient errors before they reach this loop, so an empty
} /// parse result reliably means "no more entries."
/// Walker returned by [`TargetSource::discover`]. Pops one source-index
/// page per `next_batch` call. Page 1's HTML is cached at construction
/// time (the discover call needed it to read `last_page` anyway) so the
/// batch covering page 1 doesn't re-fetch.
struct TargetSourceWalker { struct TargetSourceWalker {
base_url: String, base_url: String,
pages_remaining: VecDeque<i32>, next_page: i32,
first_page_html: Option<String>, first_page_html: Option<String>,
} }
@@ -174,13 +158,11 @@ impl DiscoverWalk for TargetSourceWalker {
&mut self, &mut self,
ctx: &FetchContext<'_>, ctx: &FetchContext<'_>,
) -> anyhow::Result<Option<Vec<SourceMangaRef>>> { ) -> anyhow::Result<Option<Vec<SourceMangaRef>>> {
let Some(page_num) = self.pages_remaining.pop_front() else { let page_num = self.next_page;
return Ok(None);
};
let page_refs = if page_num == 1 { let page_refs = if page_num == 1 {
// Reuse the cached page-1 HTML from the initial probe. Take // Reuse the cached page-1 HTML from the initial probe. Take
// it (rather than clone) so a malformed page-order queue // it (rather than clone) so a future re-entry that somehow
// that re-visits page 1 still falls back to a real fetch. // revisits page 1 still falls back to a real fetch.
match self.first_page_html.take() { match self.first_page_html.take() {
Some(html) => { Some(html) => {
let doc = scraper::Html::parse_document(&html); let doc = scraper::Html::parse_document(&html);
@@ -218,6 +200,10 @@ impl DiscoverWalk for TargetSourceWalker {
.await? .await?
}; };
tracing::info!(page_num, count = page_refs.len(), "page walked"); tracing::info!(page_num, count = page_refs.len(), "page walked");
if page_refs.is_empty() {
return Ok(None);
}
self.next_page += 1;
Ok(Some(page_refs)) Ok(Some(page_refs))
} }
} }
@@ -288,20 +274,6 @@ fn classify_navigate_html(html: String) -> Result<String, PageError> {
Ok(html) Ok(html)
} }
fn parse_last_page(doc: &scraper::Html) -> Option<i32> {
// Pagination links carry their page number as text. Take the
// numeric maximum so we don't depend on a specific layout (Prev,
// Next, ellipses, etc. all get filtered out by .parse).
let sel = scraper::Selector::parse("#left_side .pagination a").unwrap();
doc.select(&sel)
.filter_map(|a| {
collapse_whitespace(&a.text().collect::<String>())
.parse::<i32>()
.ok()
})
.max()
}
/// Substitutes the first `/N/` path segment with the target page /// Substitutes the first `/N/` path segment with the target page
/// number. Source impls that paginate via a different URL shape can /// number. Source impls that paginate via a different URL shape can
/// override this — for the modeled site the segment is always present. /// override this — for the modeled site the segment is always present.
@@ -853,29 +825,6 @@ mod tests {
assert_eq!(parse_chapter_number("Special"), None); assert_eq!(parse_chapter_number("Special"), None);
} }
#[test]
fn parse_last_page_picks_highest_pagination_link() {
let html = r#"
<div id="left_side"><div class="pagination">
<a href="/list/1/">Prev</a>
<ol>
<li><a href="/list/1/">1</a></li>
<li><a href="/list/2/">2</a></li>
<li><a href="/list/47/">47</a></li>
<li><a href="/list/2/">Next</a></li>
</ol>
</div></div>
"#;
let doc = scraper::Html::parse_document(html);
assert_eq!(parse_last_page(&doc), Some(47));
}
#[test]
fn parse_last_page_none_when_no_pagination() {
let doc = scraper::Html::parse_document("<html></html>");
assert!(parse_last_page(&doc).is_none());
}
#[test] #[test]
fn page_url_substitutes_numeric_path_segment() { fn page_url_substitutes_numeric_path_segment() {
assert_eq!( assert_eq!(
@@ -1024,28 +973,6 @@ mod tests {
assert!(err.is_transient(), "got non-transient: {err}"); assert!(err.is_transient(), "got non-transient: {err}");
} }
#[test]
fn build_page_order_is_natural_one_to_last() {
// Newest-first is just the source's natural pagination order:
// (update_date DESC) lives at page 1, oldest at the last page.
let order = build_page_order(Some(3));
assert_eq!(Vec::from(order), vec![1, 2, 3]);
}
#[test]
fn build_page_order_falls_back_to_page_one_only_without_pagination() {
// Source surfaced no pagination control — visit page 1 alone
// and let the walk end after one batch.
let order = build_page_order(None);
assert_eq!(Vec::from(order), vec![1]);
}
#[test]
fn build_page_order_single_page_index_yields_one_entry() {
let order = build_page_order(Some(1));
assert_eq!(Vec::from(order), vec![1]);
}
#[test] #[test]
fn parse_chapter_list_returns_transient_when_table_missing() { fn parse_chapter_list_returns_transient_when_table_missing() {
// Partial render (post-load JS hadn't injected the table, layout // Partial render (post-load JS hadn't injected the table, layout

View File

@@ -1,6 +1,6 @@
{ {
"name": "mangalord-frontend", "name": "mangalord-frontend",
"version": "0.45.0", "version": "0.45.1",
"private": true, "private": true,
"type": "module", "type": "module",
"scripts": { "scripts": {