feat(crawler): live status via SSE instead of polling
Replace the dashboard's 5s polling with a Server-Sent Events stream: - StatusHandle gains a tokio `watch` version bumped on every mutation; GET /admin/crawler/stream subscribes and pushes a composed snapshot immediately on connect, then on every status change (instant, no lost-wakeup) plus a 5s backstop for DB queue counts / browser phase. - Non-status signals poke the notifier so they push immediately too: session-expired (worker), session update / clear-expired / browser restart (endpoints). - compose_status is shared by the one-shot GET and the stream; the stream tolerates transient DB errors with a keep-alive comment instead of tearing down. Frontend: the crawler page opens an EventSource on mount and closes it on destroy, so the subscription is scoped to the active page (no global subscription). A one-shot fetch still paints initial state / serves as a fallback if SSE is blocked; a live/reconnecting indicator reflects the connection. The existing reverse proxy already streams SSE (its abort timer is cleared once response headers arrive), so no proxy change needed. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -6,9 +6,14 @@
|
||||
//! ([`crate::crawler::status`]) with DB-derived queue counts and the
|
||||
//! session/browser flags.
|
||||
|
||||
use std::convert::Infallible;
|
||||
use std::time::Duration;
|
||||
|
||||
use axum::extract::{Query, State};
|
||||
use axum::response::sse::{Event, KeepAlive, Sse};
|
||||
use axum::routing::{get, post};
|
||||
use axum::{Json, Router};
|
||||
use futures_util::stream::Stream;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use uuid::Uuid;
|
||||
@@ -21,9 +26,16 @@ use crate::error::{AppError, AppResult};
|
||||
use crate::repo;
|
||||
use crate::repo::crawler::{DeadJob, RequeueScope};
|
||||
|
||||
/// Backstop recompose interval for the SSE stream. Phase/worker/session
|
||||
/// changes push instantly via the status `watch`; this only bounds the
|
||||
/// staleness of DB-derived queue counts and the browser phase when those
|
||||
/// change without an accompanying status poke.
|
||||
const SSE_BACKSTOP: Duration = Duration::from_secs(5);
|
||||
|
||||
pub fn routes() -> Router<AppState> {
|
||||
Router::new()
|
||||
.route("/admin/crawler", get(get_status))
|
||||
.route("/admin/crawler/stream", get(stream_status))
|
||||
.route("/admin/crawler/run", post(run_now))
|
||||
.route("/admin/crawler/browser/restart", post(restart_browser))
|
||||
.route("/admin/crawler/session", post(update_session))
|
||||
@@ -75,10 +87,10 @@ fn browser_phase_str(p: RestartPhase) -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_status(
|
||||
State(state): State<AppState>,
|
||||
_admin: RequireAdmin,
|
||||
) -> AppResult<Json<CrawlerStatusResponse>> {
|
||||
/// Compose a full status snapshot from the in-memory status, the
|
||||
/// browser/session flags, and a fresh DB queue-count query. Shared by the
|
||||
/// one-shot `get_status` and the SSE `stream_status`.
|
||||
async fn compose_status(state: &AppState) -> AppResult<CrawlerStatusResponse> {
|
||||
let (pending, running, dead) = repo::crawler::job_state_counts(&state.db).await?;
|
||||
let queue = QueueCounts {
|
||||
pending,
|
||||
@@ -86,7 +98,7 @@ async fn get_status(
|
||||
dead,
|
||||
};
|
||||
|
||||
let resp = match state.crawler.as_ref() {
|
||||
Ok(match state.crawler.as_ref() {
|
||||
None => CrawlerStatusResponse {
|
||||
daemon: "disabled",
|
||||
phase: None,
|
||||
@@ -114,8 +126,64 @@ async fn get_status(
|
||||
queue,
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(Json(resp))
|
||||
})
|
||||
}
|
||||
|
||||
async fn get_status(
|
||||
State(state): State<AppState>,
|
||||
_admin: RequireAdmin,
|
||||
) -> AppResult<Json<CrawlerStatusResponse>> {
|
||||
Ok(Json(compose_status(&state).await?))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GET /admin/crawler/stream — Server-Sent Events live status
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Push live status to the dashboard instead of polling. Emits a snapshot
|
||||
/// immediately on connect, then on every status change (instant, via the
|
||||
/// `watch` notifier) and on a [`SSE_BACKSTOP`] tick (to refresh DB queue
|
||||
/// counts / browser phase that change without a status poke). The browser
|
||||
/// opens this only while the crawler page is mounted and closes it on
|
||||
/// navigate-away, so the subscription is scoped to the active page.
|
||||
async fn stream_status(
|
||||
State(state): State<AppState>,
|
||||
_admin: RequireAdmin,
|
||||
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
|
||||
// Subscribe before the first emit so no change between the initial
|
||||
// snapshot and the first await is lost.
|
||||
let rx = state.crawler.as_ref().map(|c| c.status.subscribe());
|
||||
|
||||
let stream = futures_util::stream::unfold(
|
||||
(state, rx, true),
|
||||
|(state, mut rx, first)| async move {
|
||||
// After the first immediate emit, wait for a change or the
|
||||
// backstop tick before recomposing.
|
||||
if !first {
|
||||
match rx.as_mut() {
|
||||
Some(rx) => {
|
||||
tokio::select! {
|
||||
_ = rx.changed() => {}
|
||||
_ = tokio::time::sleep(SSE_BACKSTOP) => {}
|
||||
}
|
||||
}
|
||||
None => tokio::time::sleep(SSE_BACKSTOP).await,
|
||||
}
|
||||
}
|
||||
// Compose; on a transient DB error, emit a keep-alive comment
|
||||
// rather than tearing down the stream.
|
||||
let event = match compose_status(&state).await {
|
||||
Ok(resp) => Event::default()
|
||||
.event("status")
|
||||
.json_data(&resp)
|
||||
.unwrap_or_else(|_| Event::default().comment("serialize error")),
|
||||
Err(_) => Event::default().comment("status unavailable"),
|
||||
};
|
||||
Some((Ok(event), (state, rx, false)))
|
||||
},
|
||||
);
|
||||
|
||||
Sse::new(stream).keep_alive(KeepAlive::default())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -165,6 +233,8 @@ async fn restart_browser(
|
||||
) -> AppResult<Json<RestartResponse>> {
|
||||
let c = require_crawler(&state)?;
|
||||
let result = c.browser_manager.coordinated_restart(c.drain_deadline).await;
|
||||
// Push the post-restart browser phase to live subscribers immediately.
|
||||
c.status.poke();
|
||||
repo::admin_audit::insert(
|
||||
&state.db,
|
||||
admin.0.id,
|
||||
@@ -215,6 +285,8 @@ async fn update_session(
|
||||
// Relaunch the browser so on_launch re-injects the new cookie and
|
||||
// re-probes — the restart's success IS the session-validity signal.
|
||||
let probe = c.browser_manager.coordinated_restart(c.drain_deadline).await;
|
||||
// Session + browser state changed — push to live subscribers.
|
||||
c.status.poke();
|
||||
repo::admin_audit::insert(
|
||||
&state.db,
|
||||
admin.0.id,
|
||||
@@ -247,6 +319,8 @@ async fn clear_session_expired(
|
||||
) -> AppResult<Json<ClearExpiredResponse>> {
|
||||
let c = require_crawler(&state)?;
|
||||
c.session.clear_expired();
|
||||
// session.expired flipped — push to live subscribers.
|
||||
c.status.poke();
|
||||
repo::admin_audit::insert(
|
||||
&state.db,
|
||||
admin.0.id,
|
||||
|
||||
Reference in New Issue
Block a user