Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ next-env.d.ts
.vercel

/data/
logs/
.locks/
77 changes: 76 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,83 @@ If you wish to replicate Second Reading independently, you can follow the steps
```
Open [http://localhost:4321](http://localhost:4321) to view the app.

## Daily Automation (macOS launchd + caffeinate)

The repository includes:
- `scripts/daily_pipeline.sh` (daily ingest + dedupe + summaries + change-gated build/deploy)
- `launchd/com.secondreading.daily.plist` (LaunchAgent template scheduled for 13:00 Asia/Singapore)

### Script defaults
- Date window: latest ingested sitting date in DB + 1 day, through `today` (`DD-MM-YYYY`, Asia/Singapore)
- If no sittings exist in DB yet, fallback window is `today-2` to `today`
- Pipeline order: ingest -> dedupe (`--keep-newest`) -> sitting summaries (`--only-blank`)
- Deploy policy: only when semantic data digest changes, and only on clean `main` git state

### Script options
```bash
scripts/daily_pipeline.sh \
[--start-date DD-MM-YYYY] \
[--end-date DD-MM-YYYY] \
[--lookback-days N] \
[--skip-summaries] \
[--skip-deploy] \
[--force-deploy] \
[--dry-run]
```
`--lookback-days` overrides the incremental default and uses a recent window of `today-N` to `today`.

### Current limitation
- `batch_process_sqlite.py` is not fully idempotent for overlapping date re-ingestion. Re-running a date can create duplicate `sections` and `bills` rows.
- `scripts/daily_pipeline.sh` now defaults to incremental non-overlapping ingestion (`max(sittings.date)+1` to `today`) to reduce this risk.
- The dedupe step in the pipeline currently cleans duplicate sections only.

### Setup
1. Sync dependencies and ensure deploy auth
```bash
cd /Users/admin/scribe/python && uv sync
cd /Users/admin/scribe/astro && bun install
# If needed:
# cd /Users/admin/scribe/astro && wrangler login
```

1. Create log directories
```bash
mkdir -p /Users/admin/scribe/logs/pipeline
mkdir -p /Users/admin/Library/Logs/second-reading
```

1. Install LaunchAgent
```bash
mkdir -p /Users/admin/Library/LaunchAgents
cp /Users/admin/scribe/launchd/com.secondreading.daily.plist \
/Users/admin/Library/LaunchAgents/com.secondreading.daily.plist

launchctl bootstrap gui/$(id -u) /Users/admin/Library/LaunchAgents/com.secondreading.daily.plist
launchctl enable gui/$(id -u)/com.secondreading.daily
```

### Testing
```bash
# Smoke tests
/Users/admin/scribe/scripts/daily_pipeline.sh --dry-run
/Users/admin/scribe/scripts/daily_pipeline.sh --dry-run --start-date 08-03-2026 --end-date 10-03-2026

# Functional run without deployment
/Users/admin/scribe/scripts/daily_pipeline.sh --start-date 08-03-2026 --end-date 10-03-2026 --skip-deploy

# Idempotency / change-gate check
/Users/admin/scribe/scripts/daily_pipeline.sh --start-date 08-03-2026 --end-date 10-03-2026 --skip-deploy

# Force deployment path
/Users/admin/scribe/scripts/daily_pipeline.sh --start-date 08-03-2026 --end-date 10-03-2026 --force-deploy

# launchd checks
plutil -lint /Users/admin/scribe/launchd/com.secondreading.daily.plist
launchctl kickstart -k gui/$(id -u)/com.secondreading.daily
launchctl print gui/$(id -u)/com.secondreading.daily
```

## Acknowledgements
This project is inspired by the creators of [Telescope](https://telescope.gov.sg/) and [Pair Search](https://search.pair.gov.sg/).

The copyright to the Hansard is owned by the Singapore Government.
The copyright to the Hansard is owned by the Singapore Government.
4 changes: 4 additions & 0 deletions astro/astro.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ import { defineConfig } from "astro/config";
// https://astro.build/config
export default defineConfig({
output: "static",
prefetch: {
prefetchAll: true,
defaultStrategy: "tap",
},
build: {
format: "file",
},
Expand Down
268 changes: 118 additions & 150 deletions astro/bun.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion astro/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@
"autoprefixer": "^10.4.24",
"postcss": "^8.5.6",
"tailwindcss": "3",
"wrangler": "^3.83.0"
"wrangler": "^4.71.0"
}
}
119 changes: 88 additions & 31 deletions astro/src/components/islands/PaginatedList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ export default function PaginatedList({
const pagefindResultsRef = useRef<PagefindResult[]>([]);
// Cache of resolved pagefind result index -> meta.id
const resolvedIdsRef = useRef<Map<number, string>>(new Map());
// Tracks fragment indices currently being resolved to avoid duplicate fetches
const resolvingIndicesRef = useRef<Set<number>>(new Set());

const hasDateFilter = dateFrom !== "" || dateTo !== "";
const isDateFilterMode = hasDateFilter && !isSearchMode;
Expand Down Expand Up @@ -169,9 +171,62 @@ export default function PaginatedList({
}
}, [dataUrl, allData]);

// Resolve pagefind fragments for a page window and return current page's ListItems.
// Fetches fragments for pageNum through pageNum + PREFETCH_PAGES in parallel,
// skipping any already-resolved indices. Returns items for pageNum only.
// Resolve a set of pagefind result indices and cache index -> id mappings.
const resolveFragmentIndices = useCallback(
async (indices: number[], seq: number): Promise<void> => {
const pfResults = pagefindResultsRef.current;
const pending: number[] = [];

for (const idx of indices) {
if (
idx >= 0 &&
idx < pfResults.length &&
!resolvedIdsRef.current.has(idx) &&
!resolvingIndicesRef.current.has(idx)
) {
pending.push(idx);
resolvingIndicesRef.current.add(idx);
}
}

if (!pending.length) return;

try {
const resolved = await Promise.all(
pending.map(async (idx) => {
try {
const resultData = await pfResults[idx].data();
return { idx, id: resultData.meta?.id };
} catch {
return { idx, id: undefined };
}
}),
);

// Ignore stale async work from a previous search/query.
if (
seq !== searchSeqRef.current ||
pagefindResultsRef.current !== pfResults
) {
return;
}

for (const { idx, id } of resolved) {
if (id) {
resolvedIdsRef.current.set(idx, id);
}
}
} finally {
for (const idx of pending) {
resolvingIndicesRef.current.delete(idx);
}
}
},
[],
);

// Resolve and return ListItems for the requested page.
// Prefetches additional pages in the background without blocking UI update.
const resolveSearchPage = useCallback(
async (
pageNum: number,
Expand All @@ -185,40 +240,20 @@ export default function PaginatedList({

// Determine the window of indices to resolve (current page + prefetch buffer)
const startIdx = (pageNum - 1) * pageSize;
const endIdx = Math.min(
(pageNum + PREFETCH_PAGES) * pageSize,
pfResults.length,
);
const pageEndIdx = Math.min(pageNum * pageSize, pfResults.length);

// Find indices that haven't been resolved yet
const unresolvedIndices: number[] = [];
for (let i = startIdx; i < endIdx; i++) {
// Resolve current page first so we can render quickly.
const unresolvedCurrentPage: number[] = [];
for (let i = startIdx; i < pageEndIdx; i++) {
if (!resolvedIdsRef.current.has(i)) {
unresolvedIndices.push(i);
unresolvedCurrentPage.push(i);
}
}

// Fetch unresolved fragments in parallel
if (unresolvedIndices.length > 0) {
const fragmentPromises = unresolvedIndices.map(async (idx) => {
const resultData = await pfResults[idx].data();
return { idx, id: resultData.meta?.id };
});
const resolved = await Promise.all(fragmentPromises);

// Check if search was superseded
if (seq !== searchSeqRef.current) return null;

// Cache resolved mappings
for (const { idx, id } of resolved) {
if (id) {
resolvedIdsRef.current.set(idx, id);
}
}
}
await resolveFragmentIndices(unresolvedCurrentPage, seq);
if (seq !== searchSeqRef.current) return null;

// Build ListItems for the current page only
const pageEndIdx = Math.min(pageNum * pageSize, pfResults.length);
const pageItems: ListItem[] = [];
for (let i = startIdx; i < pageEndIdx; i++) {
const id = resolvedIdsRef.current.get(i);
Expand All @@ -228,9 +263,26 @@ export default function PaginatedList({
}
}

// Prefetch subsequent pages in the background.
const prefetchEndIdx = Math.min(
(pageNum + PREFETCH_PAGES) * pageSize,
pfResults.length,
);
if (pageEndIdx < prefetchEndIdx) {
const prefetchIndices: number[] = [];
for (let i = pageEndIdx; i < prefetchEndIdx; i++) {
if (!resolvedIdsRef.current.has(i)) {
prefetchIndices.push(i);
}
}
if (prefetchIndices.length > 0) {
void resolveFragmentIndices(prefetchIndices, seq);
}
}

return pageItems;
},
[pageSize],
[pageSize, resolveFragmentIndices],
);

// Update URL parameters
Expand Down Expand Up @@ -303,6 +355,7 @@ export default function PaginatedList({
setSearchHasDateFilter(false);
pagefindResultsRef.current = [];
resolvedIdsRef.current = new Map();
resolvingIndicesRef.current.clear();
setPage(1);
setItems(initialItems);
// If date filter active, apply it
Expand Down Expand Up @@ -379,6 +432,7 @@ export default function PaginatedList({

pagefindResultsRef.current = [];
resolvedIdsRef.current = new Map();
resolvingIndicesRef.current.clear();
setIsSearchMode(true);
setSearchHasDateFilter(true);
setSearchResults(matched);
Expand All @@ -393,6 +447,7 @@ export default function PaginatedList({
// Store raw results and only resolve fragments for the first few pages.
pagefindResultsRef.current = results.results;
resolvedIdsRef.current = new Map();
resolvingIndicesRef.current.clear();

setIsSearchMode(true);
setSearchHasDateFilter(false);
Expand Down Expand Up @@ -602,6 +657,7 @@ export default function PaginatedList({
setSearchHasDateFilter(false);
pagefindResultsRef.current = [];
resolvedIdsRef.current = new Map();
resolvingIndicesRef.current.clear();
applyDateFilter(fromParam, toParam);
} else {
setQuery("");
Expand All @@ -611,6 +667,7 @@ export default function PaginatedList({
setSearchHasDateFilter(false);
pagefindResultsRef.current = [];
resolvedIdsRef.current = new Map();
resolvingIndicesRef.current.clear();
setFilteredResults([]);
const pageNum = pageParam ? parseInt(pageParam, 10) : 1;
const validPage =
Expand Down
Loading