diff --git a/src/clis/weread/book.ts b/src/clis/weread/book.ts index b41c5cdb..918ab36f 100644 --- a/src/clis/weread/book.ts +++ b/src/clis/weread/book.ts @@ -1,6 +1,82 @@ import { cli, Strategy } from '../../registry.js'; +import { CliError } from '../../errors.js'; import type { IPage } from '../../types.js'; -import { fetchPrivateApi } from './utils.js'; +import { fetchPrivateApi, resolveShelfReaderUrl } from './utils.js'; + +interface ReaderFallbackResult { + title: string; + author: string; + publisher: string; + intro: string; + category: string; + rating: string; + metadataReady: boolean; +} + +/** + * Read visible book metadata from the web reader cover/flyleaf page. + * This path is used as a fallback when the private API session has expired. + */ +async function loadReaderFallbackResult(page: IPage, readerUrl: string): Promise { + await page.goto(readerUrl); + await page.wait({ selector: '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle', timeout: 10 }); + + const result = await page.evaluate(` + (() => { + const text = (node) => node?.textContent?.trim() || ''; + const bodyText = document.body?.innerText?.replace(/\\s+/g, ' ').trim() || ''; + const titleSelector = '.horizontalReaderCoverPage_content_bookTitle, .wr_flyleaf_page_bookInfo_bookTitle'; + const authorSelector = '.horizontalReaderCoverPage_content_author, .wr_flyleaf_page_bookInfo_author'; + const extractRating = () => { + const match = bodyText.match(/微信读书推荐值\\s*([0-9.]+%)/); + return match ? match[1] : ''; + }; + const extractPublisher = () => { + const direct = text(document.querySelector('.introDialog_content_pub_line')); + return direct.startsWith('出版社') ? direct.replace(/^出版社\\s*/, '').trim() : ''; + }; + const extractIntro = () => { + const selectors = [ + '.horizontalReaderCoverPage_content_bookInfo_intro', + '.wr_flyleaf_page_bookIntro_content', + '.introDialog_content_intro_para', + ]; + for (const selector of selectors) { + const value = text(document.querySelector(selector)); + if (value) return value; + } + return ''; + }; + + const categorySource = Array.from(document.scripts) + .map((script) => script.textContent || '') + .find((scriptText) => scriptText.includes('"category"')) || ''; + const categoryMatch = categorySource.match(/"category"\\s*:\\s*"([^"]+)"/); + const title = text(document.querySelector(titleSelector)); + const author = text(document.querySelector(authorSelector)); + + return { + title, + author, + publisher: extractPublisher(), + intro: extractIntro(), + category: categoryMatch ? categoryMatch[1].trim() : '', + rating: extractRating(), + metadataReady: Boolean(title || author), + }; + })() + `) as Partial; + + return { + title: String(result?.title || '').trim(), + author: String(result?.author || '').trim(), + publisher: String(result?.publisher || '').trim(), + intro: String(result?.intro || '').trim(), + category: String(result?.category || '').trim(), + rating: String(result?.rating || '').trim(), + metadataReady: result?.metadataReady === true, + }; +} cli({ site: 'weread', @@ -9,20 +85,47 @@ cli({ domain: 'weread.qq.com', strategy: Strategy.COOKIE, args: [ - { name: 'book-id', positional: true, required: true, help: 'Book ID (numeric, from search or shelf results)' }, + { name: 'book-id', positional: true, required: true, help: 'Book ID from search or shelf results' }, ], columns: ['title', 'author', 'publisher', 'intro', 'category', 'rating'], func: async (page: IPage, args) => { - const data = await fetchPrivateApi(page, '/book/info', { bookId: args['book-id'] }); - // newRating is 0-1000 scale per community docs; needs runtime verification - const rating = data.newRating ? `${(data.newRating / 10).toFixed(1)}%` : '-'; - return [{ - title: data.title ?? '', - author: data.author ?? '', - publisher: data.publisher ?? '', - intro: data.intro ?? '', - category: data.category ?? '', - rating, - }]; + const bookId = String(args['book-id'] || '').trim(); + + try { + const data = await fetchPrivateApi(page, '/book/info', { bookId }); + // newRating is 0-1000 scale per community docs; needs runtime verification + const rating = data.newRating ? `${(data.newRating / 10).toFixed(1)}%` : '-'; + return [{ + title: data.title ?? '', + author: data.author ?? '', + publisher: data.publisher ?? '', + intro: data.intro ?? '', + category: data.category ?? '', + rating, + }]; + } catch (error) { + if (!(error instanceof CliError) || error.code !== 'AUTH_REQUIRED') { + throw error; + } + + const readerUrl = await resolveShelfReaderUrl(page, bookId); + if (!readerUrl) { + throw error; + } + + const data = await loadReaderFallbackResult(page, readerUrl); + if (!data.metadataReady || !data.title) { + throw error; + } + + return [{ + title: data.title, + author: data.author, + publisher: data.publisher, + intro: data.intro, + category: data.category, + rating: data.rating, + }]; + } }, }); diff --git a/src/clis/weread/commands.test.ts b/src/clis/weread/commands.test.ts index 50085d69..1ef90158 100644 --- a/src/clis/weread/commands.test.ts +++ b/src/clis/weread/commands.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { CliError } from '../../errors.js'; const { mockFetchPrivateApi } = vi.hoisted(() => ({ mockFetchPrivateApi: vi.fn(), @@ -34,6 +35,254 @@ describe('weread book-id positional args', () => { expect(mockFetchPrivateApi).toHaveBeenCalledWith({}, '/book/info', { bookId: '12345' }); }); + it('falls back to the shelf reader page when private API auth has expired', async () => { + mockFetchPrivateApi.mockRejectedValue( + new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first'), + ); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn() + .mockResolvedValueOnce({ + cacheFound: true, + rawBooks: [ + { bookId: 'MP_WXS_3634777637', title: '文明、现代化、价值投资与中国', author: '李录' }, + ], + shelfIndexes: [ + { bookId: 'MP_WXS_3634777637', idx: 0, role: 'book' }, + ], + }) + .mockResolvedValueOnce(['https://weread.qq.com/web/reader/6f5323f071bd7f7b6f521e8']) + .mockResolvedValueOnce({ + title: '文明、现代化、价值投资与中国', + author: '李录', + publisher: '中信出版集团', + intro: '对中国未来几十年的预测。', + category: '', + rating: '84.1%', + metadataReady: true, + }), + getCookies: vi.fn().mockResolvedValue([ + { name: 'wr_vid', value: '70486028', domain: '.weread.qq.com' }, + ]), + wait: vi.fn().mockResolvedValue(undefined), + } as any; + + const result = await book!.func!(page, { 'book-id': 'MP_WXS_3634777637' }); + + expect(page.goto).toHaveBeenNthCalledWith(1, 'https://weread.qq.com/web/shelf'); + expect(page.goto).toHaveBeenNthCalledWith(2, 'https://weread.qq.com/web/reader/6f5323f071bd7f7b6f521e8'); + expect(page.evaluate).toHaveBeenCalledTimes(3); + expect(result).toEqual([ + { + title: '文明、现代化、价值投资与中国', + author: '李录', + publisher: '中信出版集团', + intro: '对中国未来几十年的预测。', + category: '', + rating: '84.1%', + }, + ]); + }); + + it('keeps mixed shelf entries aligned when resolving MP_WXS reader urls', async () => { + mockFetchPrivateApi.mockRejectedValue( + new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first'), + ); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn() + .mockResolvedValueOnce({ + cacheFound: true, + rawBooks: [ + { bookId: 'MP_WXS_1', title: '公众号文章一', author: '作者甲' }, + { bookId: 'BOOK_2', title: '普通书二', author: '作者乙' }, + { bookId: 'MP_WXS_3', title: '公众号文章三', author: '作者丙' }, + ], + shelfIndexes: [ + { bookId: 'MP_WXS_1', idx: 0, role: 'mp' }, + { bookId: 'BOOK_2', idx: 1, role: 'book' }, + { bookId: 'MP_WXS_3', idx: 2, role: 'mp' }, + ], + }) + .mockResolvedValueOnce([ + 'https://weread.qq.com/web/reader/mp1', + 'https://weread.qq.com/web/reader/book2', + 'https://weread.qq.com/web/reader/mp3', + ]) + .mockResolvedValueOnce({ + title: '公众号文章一', + author: '作者甲', + publisher: '微信读书', + intro: '第一篇文章。', + category: '', + rating: '', + metadataReady: true, + }), + getCookies: vi.fn().mockResolvedValue([ + { name: 'wr_vid', value: '70486028', domain: '.weread.qq.com' }, + ]), + wait: vi.fn().mockResolvedValue(undefined), + } as any; + + const result = await book!.func!(page, { 'book-id': 'MP_WXS_1' }); + + expect(page.goto).toHaveBeenNthCalledWith(1, 'https://weread.qq.com/web/shelf'); + expect(page.goto).toHaveBeenNthCalledWith(2, 'https://weread.qq.com/web/reader/mp1'); + expect(result).toEqual([ + { + title: '公众号文章一', + author: '作者甲', + publisher: '微信读书', + intro: '第一篇文章。', + category: '', + rating: '', + }, + ]); + }); + + it('rethrows AUTH_REQUIRED when shelf ordering is incomplete and reader urls cannot be trusted', async () => { + mockFetchPrivateApi.mockRejectedValue( + new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first'), + ); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn() + .mockResolvedValueOnce({ + cacheFound: true, + rawBooks: [ + { bookId: 'BOOK_1', title: '第一本', author: '作者甲' }, + { bookId: 'BOOK_2', title: '第二本', author: '作者乙' }, + ], + shelfIndexes: [ + { bookId: 'BOOK_2', idx: 0, role: 'book' }, + ], + }) + .mockResolvedValueOnce([ + 'https://weread.qq.com/web/reader/book2', + 'https://weread.qq.com/web/reader/book1', + ]), + getCookies: vi.fn().mockResolvedValue([ + { name: 'wr_vid', value: '70486028', domain: '.weread.qq.com' }, + ]), + wait: vi.fn().mockResolvedValue(undefined), + } as any; + + await expect(book!.func!(page, { 'book-id': 'BOOK_1' })).rejects.toMatchObject({ + code: 'AUTH_REQUIRED', + message: 'Not logged in to WeRead', + }); + expect(page.goto).toHaveBeenCalledTimes(1); + expect(page.goto).toHaveBeenCalledWith('https://weread.qq.com/web/shelf'); + }); + + it('waits for shelf indexes to hydrate before resolving a trusted reader url', async () => { + mockFetchPrivateApi.mockRejectedValue( + new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first'), + ); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn() + .mockResolvedValueOnce({ + cacheFound: true, + rawBooks: [ + { bookId: 'BOOK_1', title: '第一本', author: '作者甲' }, + { bookId: 'BOOK_2', title: '第二本', author: '作者乙' }, + ], + shelfIndexes: [ + { bookId: 'BOOK_2', idx: 0, role: 'book' }, + ], + }) + .mockResolvedValueOnce({ + cacheFound: true, + rawBooks: [ + { bookId: 'BOOK_1', title: '第一本', author: '作者甲' }, + { bookId: 'BOOK_2', title: '第二本', author: '作者乙' }, + ], + shelfIndexes: [ + { bookId: 'BOOK_2', idx: 0, role: 'book' }, + { bookId: 'BOOK_1', idx: 1, role: 'book' }, + ], + }) + .mockResolvedValueOnce([ + 'https://weread.qq.com/web/reader/book2', + 'https://weread.qq.com/web/reader/book1', + ]) + .mockResolvedValueOnce({ + title: '第一本', + author: '作者甲', + publisher: '出版社甲', + intro: '简介甲', + category: '', + rating: '', + metadataReady: true, + }), + getCookies: vi.fn().mockResolvedValue([ + { name: 'wr_vid', value: '70486028', domain: '.weread.qq.com' }, + ]), + wait: vi.fn().mockResolvedValue(undefined), + } as any; + + const result = await book!.func!(page, { 'book-id': 'BOOK_1' }); + + expect(page.goto).toHaveBeenNthCalledWith(1, 'https://weread.qq.com/web/shelf'); + expect(page.goto).toHaveBeenNthCalledWith(2, 'https://weread.qq.com/web/reader/book1'); + expect(result).toEqual([ + { + title: '第一本', + author: '作者甲', + publisher: '出版社甲', + intro: '简介甲', + category: '', + rating: '', + }, + ]); + }); + + it('rethrows AUTH_REQUIRED when the reader page lacks stable cover metadata', async () => { + mockFetchPrivateApi.mockRejectedValue( + new CliError('AUTH_REQUIRED', 'Not logged in to WeRead', 'Please log in to weread.qq.com in Chrome first'), + ); + + const page = { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn() + .mockResolvedValueOnce({ + cacheFound: true, + rawBooks: [ + { bookId: 'BOOK_1', title: '第一本', author: '作者甲' }, + ], + shelfIndexes: [ + { bookId: 'BOOK_1', idx: 0, role: 'book' }, + ], + }) + .mockResolvedValueOnce([ + 'https://weread.qq.com/web/reader/book1', + ]) + .mockResolvedValueOnce({ + title: '', + author: '', + publisher: '', + intro: '这是正文第一段,不应该被当成简介。', + category: '', + rating: '', + metadataReady: false, + }), + getCookies: vi.fn().mockResolvedValue([ + { name: 'wr_vid', value: '70486028', domain: '.weread.qq.com' }, + ]), + wait: vi.fn().mockResolvedValue(undefined), + } as any; + + await expect(book!.func!(page, { 'book-id': 'BOOK_1' })).rejects.toMatchObject({ + code: 'AUTH_REQUIRED', + message: 'Not logged in to WeRead', + }); + }); + it('passes the positional book-id to highlights', async () => { mockFetchPrivateApi.mockResolvedValue({ updated: [] }); diff --git a/src/weread-private-api-regression.test.ts b/src/clis/weread/private-api-regression.test.ts similarity index 68% rename from src/weread-private-api-regression.test.ts rename to src/clis/weread/private-api-regression.test.ts index dcbbef8b..f8af05f5 100644 --- a/src/weread-private-api-regression.test.ts +++ b/src/clis/weread/private-api-regression.test.ts @@ -1,7 +1,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { getRegistry } from './registry.js'; -import { fetchPrivateApi } from './clis/weread/utils.js'; -import './clis/weread/shelf.js'; +import { getRegistry } from '../../registry.js'; +import { log } from '../../logger.js'; +import { fetchPrivateApi } from './utils.js'; +import './shelf.js'; describe('weread private API regression', () => { beforeEach(() => { @@ -12,8 +13,10 @@ describe('weread private API regression', () => { const mockPage = { getCookies: vi.fn() .mockResolvedValueOnce([ - { name: 'wr_name', value: 'alice', domain: 'weread.qq.com' }, { name: 'wr_vid', value: 'vid123', domain: 'i.weread.qq.com' }, + ]) + .mockResolvedValueOnce([ + { name: 'wr_name', value: 'alice', domain: 'weread.qq.com' }, ]), evaluate: vi.fn(), } as any; @@ -28,8 +31,9 @@ describe('weread private API regression', () => { const result = await fetchPrivateApi(mockPage, '/book/info', { bookId: '123' }); expect(result.title).toBe('Test Book'); - expect(mockPage.getCookies).toHaveBeenCalledTimes(1); + expect(mockPage.getCookies).toHaveBeenCalledTimes(2); expect(mockPage.getCookies).toHaveBeenCalledWith({ url: 'https://i.weread.qq.com/book/info?bookId=123' }); + expect(mockPage.getCookies).toHaveBeenCalledWith({ domain: 'weread.qq.com' }); expect(mockPage.evaluate).not.toHaveBeenCalled(); expect(fetchMock).toHaveBeenCalledWith( 'https://i.weread.qq.com/book/info?bookId=123', @@ -41,6 +45,72 @@ describe('weread private API regression', () => { ); }); + it('merges host-only main-domain cookies into private API requests', async () => { + // Simulates host-only cookies on weread.qq.com that don't match i.weread.qq.com by URL + const mockPage = { + getCookies: vi.fn() + .mockResolvedValueOnce([]) // URL lookup returns nothing for i.weread.qq.com + .mockResolvedValueOnce([ + { name: 'wr_skey', value: 'skey-host', domain: 'weread.qq.com' }, + { name: 'wr_vid', value: 'vid-host', domain: 'weread.qq.com' }, + ]), + evaluate: vi.fn(), + } as any; + + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: () => Promise.resolve({ title: 'Book', errcode: 0 }), + }); + vi.stubGlobal('fetch', fetchMock); + + await fetchPrivateApi(mockPage, '/book/info', { bookId: '42' }); + + expect(mockPage.getCookies).toHaveBeenCalledTimes(2); + expect(mockPage.getCookies).toHaveBeenCalledWith({ url: 'https://i.weread.qq.com/book/info?bookId=42' }); + expect(mockPage.getCookies).toHaveBeenCalledWith({ domain: 'weread.qq.com' }); + expect(fetchMock).toHaveBeenCalledWith( + 'https://i.weread.qq.com/book/info?bookId=42', + expect.objectContaining({ + headers: expect.objectContaining({ + Cookie: 'wr_skey=skey-host; wr_vid=vid-host', + }), + }), + ); + }); + + it('prefers API-subdomain cookies over main-domain cookies on name collision', async () => { + const mockPage = { + getCookies: vi.fn() + .mockResolvedValueOnce([ + { name: 'wr_skey', value: 'from-api', domain: 'i.weread.qq.com' }, + ]) + .mockResolvedValueOnce([ + { name: 'wr_skey', value: 'from-main', domain: 'weread.qq.com' }, + { name: 'wr_vid', value: 'vid-main', domain: 'weread.qq.com' }, + ]), + evaluate: vi.fn(), + } as any; + + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: () => Promise.resolve({ title: 'Book', errcode: 0 }), + }); + vi.stubGlobal('fetch', fetchMock); + + await fetchPrivateApi(mockPage, '/book/info', { bookId: '99' }); + + expect(fetchMock).toHaveBeenCalledWith( + 'https://i.weread.qq.com/book/info?bookId=99', + expect.objectContaining({ + headers: expect.objectContaining({ + Cookie: 'wr_skey=from-api; wr_vid=vid-main', + }), + }), + ); + }); + it('maps unauthenticated private API responses to AUTH_REQUIRED', async () => { const mockPage = { getCookies: vi.fn().mockResolvedValue([]), @@ -126,8 +196,10 @@ describe('weread private API regression', () => { const mockPage = { getCookies: vi.fn() .mockResolvedValueOnce([ - { name: 'wr_name', value: 'alice', domain: 'weread.qq.com' }, { name: 'wr_vid', value: 'vid123', domain: 'i.weread.qq.com' }, + ]) + .mockResolvedValueOnce([ + { name: 'wr_name', value: 'alice', domain: 'weread.qq.com' }, ]), evaluate: vi.fn(), } as any; @@ -153,9 +225,11 @@ describe('weread private API regression', () => { 'https://i.weread.qq.com/shelf/sync?synckey=0&lectureSynckey=0', expect.any(Object), ); + expect(mockPage.getCookies).toHaveBeenCalledTimes(2); expect(mockPage.getCookies).toHaveBeenCalledWith({ url: 'https://i.weread.qq.com/shelf/sync?synckey=0&lectureSynckey=0', }); + expect(mockPage.getCookies).toHaveBeenCalledWith({ domain: 'weread.qq.com' }); expect(result).toEqual([ { title: 'Deep Work', @@ -169,15 +243,16 @@ describe('weread private API regression', () => { it('falls back to structured shelf cache when the private API reports AUTH_REQUIRED', async () => { const command = getRegistry().get('weread/shelf'); expect(command?.func).toBeTypeOf('function'); + const warnSpy = vi.spyOn(log, 'warn').mockImplementation(() => {}); const mockPage = { getCookies: vi.fn() - .mockResolvedValueOnce([ - { name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }, - ]) - .mockResolvedValueOnce([ - { name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }, - ]), + // fetchPrivateApi: URL lookup (i.weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // fetchPrivateApi: domain lookup (weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // loadWebShelfSnapshot: domain lookup for wr_vid + .mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }]), goto: vi.fn().mockResolvedValue(undefined), evaluate: vi.fn().mockImplementation(async (source: string) => { expect(source).toContain('shelf:rawBooks:vid-current'); @@ -219,6 +294,9 @@ describe('weread private API regression', () => { expect(mockPage.goto).toHaveBeenCalledWith('https://weread.qq.com/web/shelf'); expect(mockPage.getCookies).toHaveBeenCalledWith({ domain: 'weread.qq.com' }); expect(mockPage.evaluate).toHaveBeenCalledTimes(1); + expect(warnSpy).toHaveBeenCalledWith( + 'WeRead private API auth expired; showing cached shelf data from localStorage. Results may be stale, and detail commands may still require re-login.', + ); expect(result).toEqual([ { title: '文明、现代化、价值投资与中国', @@ -235,12 +313,12 @@ describe('weread private API regression', () => { const mockPage = { getCookies: vi.fn() - .mockResolvedValueOnce([ - { name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }, - ]) - .mockResolvedValueOnce([ - { name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }, - ]), + // fetchPrivateApi: URL lookup (i.weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // fetchPrivateApi: domain lookup (weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // loadWebShelfSnapshot: domain lookup for wr_vid + .mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }]), goto: vi.fn().mockResolvedValue(undefined), evaluate: vi.fn().mockResolvedValue({ cacheFound: false, @@ -270,12 +348,12 @@ describe('weread private API regression', () => { const mockPage = { getCookies: vi.fn() - .mockResolvedValueOnce([ - { name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }, - ]) - .mockResolvedValueOnce([ - { name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }, - ]), + // fetchPrivateApi: URL lookup (i.weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // fetchPrivateApi: domain lookup (weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // loadWebShelfSnapshot: domain lookup for wr_vid + .mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }]), goto: vi.fn().mockResolvedValue(undefined), evaluate: vi.fn().mockResolvedValue({ cacheFound: true, @@ -304,12 +382,12 @@ describe('weread private API regression', () => { const mockPage = { getCookies: vi.fn() - .mockResolvedValueOnce([ - { name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }, - ]) - .mockResolvedValueOnce([ - { name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }, - ]), + // fetchPrivateApi: URL lookup (i.weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // fetchPrivateApi: domain lookup (weread.qq.com) + .mockResolvedValueOnce([{ name: 'wr_skey', value: 'skey123', domain: '.weread.qq.com' }]) + // loadWebShelfSnapshot: domain lookup for wr_vid + .mockResolvedValueOnce([{ name: 'wr_vid', value: 'vid-current', domain: '.weread.qq.com' }]), goto: vi.fn().mockResolvedValue(undefined), evaluate: vi.fn().mockResolvedValue({ cacheFound: true, diff --git a/src/clis/weread/search-regression.test.ts b/src/clis/weread/search-regression.test.ts new file mode 100644 index 00000000..5bba8024 --- /dev/null +++ b/src/clis/weread/search-regression.test.ts @@ -0,0 +1,440 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { getRegistry } from '../../registry.js'; +import './search.js'; + +describe('weread/search regression', () => { + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('uses the query argument for the search API and returns reader urls from search html', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve(` + + `), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: 'deep work', limit: 5 }); + + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(String(fetchMock.mock.calls[0][0])).toContain('keyword=deep+work'); + expect(String(fetchMock.mock.calls[1][0])).toContain('/web/search/books?keyword=deep+work'); + expect(result).toEqual([ + { + rank: 1, + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + url: 'https://weread.qq.com/web/reader/reader123', + }, + ]); + }); + + it('does not emit stale bookDetail urls when the reader url is unavailable', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve('

no search cards

'), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: 'deep work', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + url: '', + }, + ]); + }); + + it('matches reader urls by title queue instead of assuming identical result order', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + }, + }, + { + bookInfo: { + title: 'Digital Minimalism', + author: 'Cal Newport', + bookId: 'xyz789', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve(` + + `), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: 'cal newport', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + url: 'https://weread.qq.com/web/reader/deep333', + }, + { + rank: 2, + title: 'Digital Minimalism', + author: 'Cal Newport', + bookId: 'xyz789', + url: 'https://weread.qq.com/web/reader/digital222', + }, + ]); + }); + + it('falls back to empty urls when the search html request fails', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + }, + }, + ], + }), + }) + .mockRejectedValueOnce(new Error('network timeout')); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: 'deep work', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: 'Deep Work', + author: 'Cal Newport', + bookId: 'abc123', + url: '', + }, + ]); + }); + + it('binds reader urls with title and author instead of title alone', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: '文明', + author: '作者甲', + bookId: 'book-a', + }, + }, + { + bookInfo: { + title: '文明', + author: '作者乙', + bookId: 'book-b', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve(` + + `), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: '文明', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: '文明', + author: '作者甲', + bookId: 'book-a', + url: 'https://weread.qq.com/web/reader/book-a-reader', + }, + { + rank: 2, + title: '文明', + author: '作者乙', + bookId: 'book-b', + url: 'https://weread.qq.com/web/reader/book-b-reader', + }, + ]); + }); + + it('leaves urls empty when same-title results are ambiguous and html cards have no author', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: '文明', + author: '作者甲', + bookId: 'book-a', + }, + }, + { + bookInfo: { + title: '文明', + author: '作者乙', + bookId: 'book-b', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve(` + + `), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: '文明', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: '文明', + author: '作者甲', + bookId: 'book-a', + url: '', + }, + { + rank: 2, + title: '文明', + author: '作者乙', + bookId: 'book-b', + url: '', + }, + ]); + }); + + it('leaves urls empty when exact author matching fails and multiple html cards share the same title', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: '文明', + author: '作者甲', + bookId: 'book-a', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve(` + + `), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: '文明', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: '文明', + author: '作者甲', + bookId: 'book-a', + url: '', + }, + ]); + }); + + it('leaves urls empty when multiple results share the same title and author identity', async () => { + const command = getRegistry().get('weread/search'); + expect(command?.func).toBeTypeOf('function'); + + const fetchMock = vi.fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ + books: [ + { + bookInfo: { + title: '文明', + author: '作者甲', + bookId: 'book-a', + }, + }, + { + bookInfo: { + title: '文明', + author: '作者甲', + bookId: 'book-b', + }, + }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + text: () => Promise.resolve(` + + `), + }); + vi.stubGlobal('fetch', fetchMock); + + const result = await command!.func!(null as any, { query: '文明', limit: 5 }); + + expect(result).toEqual([ + { + rank: 1, + title: '文明', + author: '作者甲', + bookId: 'book-a', + url: '', + }, + { + rank: 2, + title: '文明', + author: '作者甲', + bookId: 'book-b', + url: '', + }, + ]); + }); +}); diff --git a/src/clis/weread/search.ts b/src/clis/weread/search.ts index 46472d79..5369a9ea 100644 --- a/src/clis/weread/search.ts +++ b/src/clis/weread/search.ts @@ -1,5 +1,155 @@ import { cli, Strategy } from '../../registry.js'; -import { fetchWebApi } from './utils.js'; +import { fetchWebApi, WEREAD_UA, WEREAD_WEB_ORIGIN } from './utils.js'; + +interface SearchHtmlEntry { + title: string; + author: string; + url: string; +} + +function decodeHtmlText(value: string): string { + return value + .replace(/<[^>]+>/g, '') + .replace(/&#x([0-9a-fA-F]+);/gi, (_, n) => String.fromCharCode(parseInt(n, 16))) + .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/"/g, '"') + .trim(); +} + +function normalizeSearchTitle(value: string): string { + return value.replace(/\s+/g, ' ').trim(); +} + +function buildSearchIdentity(title: string, author: string): string { + return `${normalizeSearchTitle(title)}\u0000${normalizeSearchTitle(author)}`; +} + +function countSearchTitles(entries: Array<{ title: string }>): Map { + const counts = new Map(); + for (const entry of entries) { + const key = normalizeSearchTitle(entry.title); + if (!key) continue; + counts.set(key, (counts.get(key) || 0) + 1); + } + return counts; +} + +function countSearchIdentities(entries: Array<{ title: string; author: string }>): Map { + const counts = new Map(); + for (const entry of entries) { + const key = buildSearchIdentity(entry.title, entry.author); + if (!normalizeSearchTitle(entry.title) || !normalizeSearchTitle(entry.author)) continue; + counts.set(key, (counts.get(key) || 0) + 1); + } + return counts; +} + +function isUniqueCount(counts: Map, key: string): boolean { + return (counts.get(key) || 0) <= 1; +} + +/** + * Build exact and title-only queues separately. + * Exact title+author matches are preferred; title-only matching is used only + * when the HTML card did not expose an author field. + */ +function buildSearchUrlQueues(entries: SearchHtmlEntry[]): { + exactQueues: Map; + titleOnlyQueues: Map; +} { + const exactQueues = new Map(); + const titleOnlyQueues = new Map(); + for (const entry of entries) { + const titleKey = normalizeSearchTitle(entry.title); + if (!titleKey || !entry.url) continue; + const queueMap = entry.author ? exactQueues : titleOnlyQueues; + const queueKey = entry.author ? buildSearchIdentity(entry.title, entry.author) : titleKey; + const current = queueMap.get(queueKey); + if (current) { + current.push(entry.url); + continue; + } + queueMap.set(queueKey, [entry.url]); + } + return { exactQueues, titleOnlyQueues }; +} + +function resolveSearchResultUrl(params: { + exactQueues: Map; + titleOnlyQueues: Map; + apiIdentityCounts: Map; + htmlIdentityCounts: Map; + apiTitleCounts: Map; + htmlTitleCounts: Map; + title: string; + author: string; +}): string { + const { + exactQueues, + titleOnlyQueues, + apiIdentityCounts, + htmlIdentityCounts, + apiTitleCounts, + htmlTitleCounts, + title, + author, + } = params; + const identityKey = buildSearchIdentity(title, author); + if (isUniqueCount(apiIdentityCounts, identityKey) && isUniqueCount(htmlIdentityCounts, identityKey)) { + const exactUrl = exactQueues.get(identityKey)?.shift(); + if (exactUrl) return exactUrl; + } + + const titleKey = normalizeSearchTitle(title); + if (!isUniqueCount(apiTitleCounts, titleKey) || !isUniqueCount(htmlTitleCounts, titleKey)) { + return ''; + } + + return titleOnlyQueues.get(titleKey)?.shift() ?? ''; +} + +/** + * Extract rendered search result reader URLs from the server-rendered search page. + * The public JSON API still returns bookId, but the current web app links results + * through /web/reader/ rather than /web/bookDetail/. + */ +async function loadSearchHtmlEntries(query: string): Promise { + const url = new URL('/web/search/books', WEREAD_WEB_ORIGIN); + url.searchParams.set('keyword', query); + + let html = ''; + try { + const resp = await fetch(url.toString(), { + headers: { 'User-Agent': WEREAD_UA }, + }); + if (!resp.ok) return []; + html = await resp.text(); + } catch { + return []; + } + const items = Array.from( + html.matchAll(/]*class="wr_bookList_item"[^>]*>([\s\S]*?)<\/li>/g), + ); + + return items.map((match) => { + const chunk = match[1]; + const hrefMatch = chunk.match(/]*href="([^"]+)"[^>]*class="wr_bookList_item_link"[^>]*>|]*class="wr_bookList_item_link"[^>]*href="([^"]+)"[^>]*>/); + const titleMatch = chunk.match(/]*class="wr_bookList_item_title"[^>]*>([\s\S]*?)<\/p>/); + const authorMatch = chunk.match(/]*class="wr_bookList_item_author"[^>]*>([\s\S]*?)<\/p>/); + + const href = hrefMatch?.[1] || hrefMatch?.[2] || ''; + const title = decodeHtmlText(titleMatch?.[1] || ''); + const author = decodeHtmlText(authorMatch?.[1] || ''); + + return { + author, + url: href ? new URL(href, WEREAD_WEB_ORIGIN).toString() : '', + title, + }; + }).filter((item) => item.url && item.title); +} cli({ site: 'weread', @@ -14,14 +164,44 @@ cli({ ], columns: ['rank', 'title', 'author', 'bookId', 'url'], func: async (_page, args) => { - const data = await fetchWebApi('/search/global', { keyword: args.query }); + const [data, htmlEntries] = await Promise.all([ + fetchWebApi('/search/global', { keyword: args.query }), + loadSearchHtmlEntries(String(args.query ?? '')), + ]); const books: any[] = data?.books ?? []; - return books.slice(0, Number(args.limit)).map((item: any, i: number) => ({ - rank: i + 1, - title: item.bookInfo?.title ?? '', - author: item.bookInfo?.author ?? '', - bookId: item.bookInfo?.bookId ?? '', - url: item.bookInfo?.bookId ? 'https://weread.qq.com/web/bookDetail/' + item.bookInfo.bookId : '', - })); + const { exactQueues, titleOnlyQueues } = buildSearchUrlQueues(htmlEntries); + const apiIdentityCounts = countSearchIdentities( + books.map((item: any) => ({ + title: item.bookInfo?.title ?? '', + author: item.bookInfo?.author ?? '', + })), + ); + const htmlIdentityCounts = countSearchIdentities( + htmlEntries.filter((entry) => entry.author), + ); + const apiTitleCounts = countSearchTitles( + books.map((item: any) => ({ title: item.bookInfo?.title ?? '' })), + ); + const htmlTitleCounts = countSearchTitles(htmlEntries); + return books.slice(0, Number(args.limit)).map((item: any, i: number) => { + const title = item.bookInfo?.title ?? ''; + const author = item.bookInfo?.author ?? ''; + return { + rank: i + 1, + title, + author, + bookId: item.bookInfo?.bookId ?? '', + url: resolveSearchResultUrl({ + exactQueues, + titleOnlyQueues, + apiIdentityCounts, + htmlIdentityCounts, + apiTitleCounts, + htmlTitleCounts, + title, + author, + }), + }; + }); }, }); diff --git a/src/clis/weread/shelf.ts b/src/clis/weread/shelf.ts index 0d9fcb5e..977c2103 100644 --- a/src/clis/weread/shelf.ts +++ b/src/clis/weread/shelf.ts @@ -1,10 +1,13 @@ import { cli, Strategy } from '../../registry.js'; import { CliError } from '../../errors.js'; +import { log } from '../../logger.js'; import type { IPage } from '../../types.js'; -import { fetchPrivateApi } from './utils.js'; - -const WEREAD_DOMAIN = 'weread.qq.com'; -const WEREAD_SHELF_URL = `https://${WEREAD_DOMAIN}/web/shelf`; +import { + buildWebShelfEntries, + fetchPrivateApi, + loadWebShelfSnapshot, + type WebShelfSnapshot, +} from './utils.js'; interface ShelfRow { title: string; @@ -13,24 +16,6 @@ interface ShelfRow { bookId: string; } -interface WebShelfRawBook { - bookId?: string; - title?: string; - author?: string; -} - -interface WebShelfIndexEntry { - bookId?: string; - idx?: number; - role?: string; -} - -interface WebShelfSnapshot { - cacheFound: boolean; - rawBooks: WebShelfRawBook[]; - shelfIndexes: WebShelfIndexEntry[]; -} - function normalizeShelfLimit(limit: number): number { if (!Number.isFinite(limit)) return 0; return Math.max(0, Math.trunc(limit)); @@ -50,110 +35,17 @@ function normalizePrivateApiRows(data: any, limit: number): ShelfRow[] { function normalizeWebShelfRows(snapshot: WebShelfSnapshot, limit: number): ShelfRow[] { if (limit <= 0) return []; - const bookById = new Map(); - for (const book of snapshot.rawBooks) { - const bookId = String(book?.bookId || '').trim(); - if (!bookId) continue; - bookById.set(bookId, book); - } - - const orderedBookIds = snapshot.shelfIndexes - .filter((entry) => String(entry?.role || 'book') === 'book') - .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER)) - .map((entry) => String(entry?.bookId || '').trim()) - .filter(Boolean); - - const fallbackOrder = snapshot.rawBooks - .map((book) => String(book?.bookId || '').trim()) - .filter(Boolean); - - const orderedUniqueBookIds = Array.from(new Set([ - ...orderedBookIds, - ...fallbackOrder, - ])); - - return orderedUniqueBookIds - .map((bookId) => { - const book = bookById.get(bookId); - if (!book) return null; - return { - title: String(book.title || '').trim(), - author: String(book.author || '').trim(), + return buildWebShelfEntries(snapshot) + .map((entry) => ({ + title: entry.title, + author: entry.author, progress: '-', - bookId, - } satisfies ShelfRow; - }) - .filter((item): item is ShelfRow => Boolean(item && (item.title || item.bookId))) + bookId: entry.bookId, + } satisfies ShelfRow)) + .filter((item): item is ShelfRow => Boolean(item.title || item.bookId)) .slice(0, limit); } -/** - * Read the structured shelf cache from the web shelf page. - * The page hydrates localStorage with raw book data plus shelf ordering. - */ -async function loadWebShelfSnapshot(page: IPage): Promise { - await page.goto(WEREAD_SHELF_URL); - - const cookies = await page.getCookies({ domain: WEREAD_DOMAIN }); - const currentVid = String(cookies.find((cookie) => cookie.name === 'wr_vid')?.value || '').trim(); - - if (!currentVid) { - return { cacheFound: false, rawBooks: [], shelfIndexes: [] }; - } - - const rawBooksKey = `shelf:rawBooks:${currentVid}`; - const shelfIndexesKey = `shelf:shelfIndexes:${currentVid}`; - - const result = await page.evaluate(` - (() => new Promise((resolve) => { - const deadline = Date.now() + 5000; - const rawBooksKey = ${JSON.stringify(rawBooksKey)}; - const shelfIndexesKey = ${JSON.stringify(shelfIndexesKey)}; - - const readJson = (raw) => { - if (typeof raw !== 'string') return null; - try { - return JSON.parse(raw); - } catch { - return null; - } - }; - - const poll = () => { - const rawBooksRaw = localStorage.getItem(rawBooksKey); - const shelfIndexesRaw = localStorage.getItem(shelfIndexesKey); - const rawBooks = readJson(rawBooksRaw); - const shelfIndexes = readJson(shelfIndexesRaw); - const cacheFound = Array.isArray(rawBooks); - - if (cacheFound || Date.now() >= deadline) { - resolve({ - cacheFound, - rawBooks: Array.isArray(rawBooks) ? rawBooks : [], - shelfIndexes: Array.isArray(shelfIndexes) ? shelfIndexes : [], - }); - return; - } - - setTimeout(poll, 100); - }; - - poll(); - })) - `); - - if (!result || typeof result !== 'object') { - return { cacheFound: false, rawBooks: [], shelfIndexes: [] }; - } - - const snapshot = result as Partial; - return { - cacheFound: snapshot.cacheFound === true, - rawBooks: Array.isArray(snapshot.rawBooks) ? snapshot.rawBooks : [], - shelfIndexes: Array.isArray(snapshot.shelfIndexes) ? snapshot.shelfIndexes : [], - }; -} - cli({ site: 'weread', name: 'shelf', @@ -180,6 +72,12 @@ cli({ if (!snapshot.cacheFound) { throw error; } + + // Make the fallback explicit so users do not mistake cached shelf data + // for a valid private API session. + log.warn( + 'WeRead private API auth expired; showing cached shelf data from localStorage. Results may be stale, and detail commands may still require re-login.', + ); return normalizeWebShelfRows(snapshot, limit); } }, diff --git a/src/clis/weread/utils.test.ts b/src/clis/weread/utils.test.ts index 3b1d3c15..88458ab7 100644 --- a/src/clis/weread/utils.test.ts +++ b/src/clis/weread/utils.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { formatDate, fetchWebApi } from './utils.js'; +import { buildWebShelfEntries, formatDate, fetchWebApi } from './utils.js'; describe('formatDate', () => { it('formats a typical Unix timestamp in UTC+8', () => { @@ -71,3 +71,83 @@ describe('fetchWebApi', () => { await expect(fetchWebApi('/search/global')).rejects.toThrow('Invalid JSON'); }); }); + +describe('buildWebShelfEntries', () => { + it('keeps mixed shelf item reader urls aligned when shelf indexes include non-book roles', () => { + const result = buildWebShelfEntries( + { + cacheFound: true, + rawBooks: [ + { bookId: 'MP_WXS_1', title: '公众号文章一', author: '作者甲' }, + { bookId: 'BOOK_2', title: '普通书二', author: '作者乙' }, + { bookId: 'MP_WXS_3', title: '公众号文章三', author: '作者丙' }, + ], + shelfIndexes: [ + { bookId: 'MP_WXS_1', idx: 0, role: 'mp' }, + { bookId: 'BOOK_2', idx: 1, role: 'book' }, + { bookId: 'MP_WXS_3', idx: 2, role: 'mp' }, + ], + }, + [ + 'https://weread.qq.com/web/reader/mp1', + 'https://weread.qq.com/web/reader/book2', + 'https://weread.qq.com/web/reader/mp3', + ], + ); + + expect(result).toEqual([ + { + bookId: 'MP_WXS_1', + title: '公众号文章一', + author: '作者甲', + readerUrl: 'https://weread.qq.com/web/reader/mp1', + }, + { + bookId: 'BOOK_2', + title: '普通书二', + author: '作者乙', + readerUrl: 'https://weread.qq.com/web/reader/book2', + }, + { + bookId: 'MP_WXS_3', + title: '公众号文章三', + author: '作者丙', + readerUrl: 'https://weread.qq.com/web/reader/mp3', + }, + ]); + }); + + it('falls back to raw cache order when shelf indexes are incomplete', () => { + const result = buildWebShelfEntries( + { + cacheFound: true, + rawBooks: [ + { bookId: 'BOOK_1', title: '第一本', author: '作者甲' }, + { bookId: 'BOOK_2', title: '第二本', author: '作者乙' }, + ], + shelfIndexes: [ + { bookId: 'BOOK_2', idx: 0, role: 'book' }, + ], + }, + [ + 'https://weread.qq.com/web/reader/book1', + 'https://weread.qq.com/web/reader/book2', + ], + ); + + expect(result).toEqual([ + { + bookId: 'BOOK_1', + title: '第一本', + author: '作者甲', + readerUrl: 'https://weread.qq.com/web/reader/book1', + }, + { + bookId: 'BOOK_2', + title: '第二本', + author: '作者乙', + readerUrl: 'https://weread.qq.com/web/reader/book2', + }, + ]); + }); +}); diff --git a/src/clis/weread/utils.ts b/src/clis/weread/utils.ts index 38949b75..e5e7f790 100644 --- a/src/clis/weread/utils.ts +++ b/src/clis/weread/utils.ts @@ -9,11 +9,44 @@ import { CliError } from '../../errors.js'; import type { BrowserCookie, IPage } from '../../types.js'; -const WEB_API = 'https://weread.qq.com/web'; -const API = 'https://i.weread.qq.com'; -const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'; +export const WEREAD_DOMAIN = 'weread.qq.com'; +export const WEREAD_WEB_ORIGIN = `https://${WEREAD_DOMAIN}`; +export const WEREAD_SHELF_URL = `${WEREAD_WEB_ORIGIN}/web/shelf`; +const WEB_API = `${WEREAD_WEB_ORIGIN}/web`; +const API = `https://i.${WEREAD_DOMAIN}`; +export const WEREAD_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'; const WEREAD_AUTH_ERRCODES = new Set([-2010, -2012]); +export interface WebShelfRawBook { + bookId?: string; + title?: string; + author?: string; +} + +export interface WebShelfIndexEntry { + bookId?: string; + idx?: number; + role?: string; +} + +export interface WebShelfSnapshot { + cacheFound: boolean; + rawBooks: WebShelfRawBook[]; + shelfIndexes: WebShelfIndexEntry[]; +} + +export interface WebShelfEntry { + bookId: string; + title: string; + author: string; + readerUrl: string; +} + +interface WebShelfStorageKeys { + rawBooksKey: string; + shelfIndexesKey: string; +} + function buildCookieHeader(cookies: BrowserCookie[]): string { return cookies.map((cookie) => `${cookie.name}=${cookie.value}`).join('; '); } @@ -22,6 +55,88 @@ function isAuthErrorResponse(resp: Response, data: any): boolean { return resp.status === 401 || WEREAD_AUTH_ERRCODES.has(Number(data?.errcode)); } +function getCurrentVid(cookies: BrowserCookie[]): string { + return String(cookies.find((cookie) => cookie.name === 'wr_vid')?.value || '').trim(); +} + +function getWebShelfStorageKeys(currentVid: string): WebShelfStorageKeys { + return { + rawBooksKey: `shelf:rawBooks:${currentVid}`, + shelfIndexesKey: `shelf:shelfIndexes:${currentVid}`, + }; +} + +function normalizeWebShelfSnapshot(value: Partial | null | undefined): WebShelfSnapshot { + return { + cacheFound: value?.cacheFound === true, + rawBooks: Array.isArray(value?.rawBooks) ? value.rawBooks : [], + shelfIndexes: Array.isArray(value?.shelfIndexes) ? value.shelfIndexes : [], + }; +} + +function buildShelfSnapshotPollScript(storageKeys: WebShelfStorageKeys, requireTrustedIndexes: boolean): string { + return ` + (() => new Promise((resolve) => { + const deadline = Date.now() + 5000; + const rawBooksKey = ${JSON.stringify(storageKeys.rawBooksKey)}; + const shelfIndexesKey = ${JSON.stringify(storageKeys.shelfIndexesKey)}; + const requireTrustedIndexes = ${JSON.stringify(requireTrustedIndexes)}; + + const readJson = (raw) => { + if (typeof raw !== 'string') return null; + try { + return JSON.parse(raw); + } catch { + return null; + } + }; + + const collectBookIds = (items) => Array.isArray(items) + ? Array.from(new Set(items.map((item) => String(item?.bookId || '').trim()).filter(Boolean))) + : []; + + // Mirror of getTrustedIndexedBookIds in Node.js — keep in sync + const hasTrustedIndexes = (rawBooks, shelfIndexes) => { + const rawBookIds = collectBookIds(rawBooks); + if (rawBookIds.length === 0) return false; + + const rawBookIdSet = new Set(rawBookIds); + const projectedIndexedBookIds = Array.isArray(shelfIndexes) + ? Array.from(new Set( + shelfIndexes + .filter((entry) => Number.isFinite(entry?.idx)) + .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER)) + .map((entry) => String(entry?.bookId || '').trim()) + .filter((bookId) => rawBookIdSet.has(bookId)), + )) + : []; + + return projectedIndexedBookIds.length === rawBookIds.length; + }; + + const poll = () => { + const rawBooks = readJson(localStorage.getItem(rawBooksKey)); + const shelfIndexes = readJson(localStorage.getItem(shelfIndexesKey)); + const cacheFound = Array.isArray(rawBooks); + const ready = cacheFound && (!requireTrustedIndexes || hasTrustedIndexes(rawBooks, shelfIndexes)); + + if (ready || Date.now() >= deadline) { + resolve({ + cacheFound, + rawBooks: Array.isArray(rawBooks) ? rawBooks : [], + shelfIndexes: Array.isArray(shelfIndexes) ? shelfIndexes : [], + }); + return; + } + + setTimeout(poll, 100); + }; + + poll(); + })) + `; +} + /** * Fetch a public WeRead web endpoint (Node.js direct fetch). * Used by search and ranking commands (browser: false). @@ -32,7 +147,7 @@ export async function fetchWebApi(path: string, params?: Record) for (const [k, v] of Object.entries(params)) url.searchParams.set(k, v); } const resp = await fetch(url.toString(), { - headers: { 'User-Agent': UA }, + headers: { 'User-Agent': WEREAD_UA }, }); if (!resp.ok) { throw new CliError('FETCH_ERROR', `HTTP ${resp.status} for ${path}`, 'WeRead API may be temporarily unavailable'); @@ -47,6 +162,10 @@ export async function fetchWebApi(path: string, params?: Record) /** * Fetch a private WeRead API endpoint with cookies extracted from the browser. * The HTTP request itself runs in Node.js to avoid page-context CORS failures. + * + * Cookies are collected from both the API subdomain (i.weread.qq.com) and the + * main domain (weread.qq.com). WeRead may set auth cookies as host-only on + * weread.qq.com, which won't match i.weread.qq.com in a URL-based lookup. */ export async function fetchPrivateApi(page: IPage, path: string, params?: Record): Promise { const url = new URL(`${API}${path}`); @@ -55,14 +174,21 @@ export async function fetchPrivateApi(page: IPage, path: string, params?: Record } const urlStr = url.toString(); - const cookies = await page.getCookies({ url: urlStr }); - const cookieHeader = buildCookieHeader(cookies); + // Merge cookies from both domains; API-domain cookies take precedence on name collision + const [apiCookies, domainCookies] = await Promise.all([ + page.getCookies({ url: urlStr }), + page.getCookies({ domain: WEREAD_DOMAIN }), + ]); + const merged = new Map(); + for (const c of domainCookies) merged.set(c.name, c); + for (const c of apiCookies) merged.set(c.name, c); + const cookieHeader = buildCookieHeader(Array.from(merged.values())); let resp: Response; try { resp = await fetch(urlStr, { headers: { - 'User-Agent': UA, + 'User-Agent': WEREAD_UA, 'Origin': 'https://weread.qq.com', 'Referer': 'https://weread.qq.com/', ...(cookieHeader ? { 'Cookie': cookieHeader } : {}), @@ -95,6 +221,137 @@ export async function fetchPrivateApi(page: IPage, path: string, params?: Record return data; } +function getUniqueRawBookIds(snapshot: WebShelfSnapshot): string[] { + return Array.from(new Set( + snapshot.rawBooks + .map((book) => String(book?.bookId || '').trim()) + .filter(Boolean), + )); +} + +/** Mirror of hasTrustedIndexes in buildShelfSnapshotPollScript — keep in sync */ +function getTrustedIndexedBookIds(snapshot: WebShelfSnapshot): string[] { + const rawBookIds = getUniqueRawBookIds(snapshot); + if (rawBookIds.length === 0) return []; + + const rawBookIdSet = new Set(rawBookIds); + const projectedIndexedBookIds = Array.from(new Set( + snapshot.shelfIndexes + .filter((entry) => Number.isFinite(entry?.idx)) + .sort((left, right) => Number(left?.idx ?? Number.MAX_SAFE_INTEGER) - Number(right?.idx ?? Number.MAX_SAFE_INTEGER)) + .map((entry) => String(entry?.bookId || '').trim()) + .filter((bookId) => rawBookIdSet.has(bookId)), + )); + + return projectedIndexedBookIds.length === rawBookIds.length ? projectedIndexedBookIds : []; +} + +/** + * Build stable shelf records from the web cache plus optional rendered reader URLs. + * We only trust shelfIndexes when it fully covers the same bookId set as rawBooks; + * otherwise we keep rawBooks order to avoid partial hydration reordering entries. + */ +export function buildWebShelfEntries(snapshot: WebShelfSnapshot, readerUrls: string[] = []): WebShelfEntry[] { + const rawBookIds = getUniqueRawBookIds(snapshot); + const trustedIndexedBookIds = getTrustedIndexedBookIds(snapshot); + const orderedBookIds = trustedIndexedBookIds.length > 0 ? trustedIndexedBookIds : rawBookIds; + + const rawBookById = new Map(); + for (const book of snapshot.rawBooks) { + const bookId = String(book?.bookId || '').trim(); + if (!bookId || rawBookById.has(bookId)) continue; + rawBookById.set(bookId, book); + } + + return orderedBookIds.map((bookId, index) => { + const book = rawBookById.get(bookId); + return { + bookId, + title: String(book?.title || '').trim(), + author: String(book?.author || '').trim(), + readerUrl: String(readerUrls[index] || '').trim(), + }; + }); +} + +/** + * Internal: load shelf snapshot and return the currentVid alongside it, + * so callers like resolveShelfReaderUrl can reuse it without a second getCookies. + */ +async function loadWebShelfSnapshotWithVid(page: IPage): Promise<{ snapshot: WebShelfSnapshot; currentVid: string }> { + await page.goto(WEREAD_SHELF_URL); + + const cookies = await page.getCookies({ domain: WEREAD_DOMAIN }); + const currentVid = getCurrentVid(cookies); + + if (!currentVid) { + return { snapshot: { cacheFound: false, rawBooks: [], shelfIndexes: [] }, currentVid: '' }; + } + + const result = await page.evaluate( + buildShelfSnapshotPollScript(getWebShelfStorageKeys(currentVid), false), + ); + return { + snapshot: normalizeWebShelfSnapshot(result as Partial | null | undefined), + currentVid, + }; +} + +/** + * Read the structured shelf cache from the WeRead shelf page. + * The page hydrates localStorage asynchronously, so we poll briefly before + * giving up and treating the cache as unavailable for the current session. + */ +export async function loadWebShelfSnapshot(page: IPage): Promise { + const { snapshot } = await loadWebShelfSnapshotWithVid(page); + return snapshot; +} + +/** + * `book` needs a trustworthy `bookId -> readerUrl` mapping, which may lag behind + * the first rawBooks cache hydration. Keep the fast shelf fallback path separate + * and only wait here, with a bounded poll, when resolving reader URLs. + */ +async function waitForTrustedWebShelfSnapshot(page: IPage, snapshot: WebShelfSnapshot, currentVid: string): Promise { + // Cache not available; nothing to wait for + if (!snapshot.cacheFound) return snapshot; + // Indexes already fully cover rawBooks; no need to re-poll + if (getTrustedIndexedBookIds(snapshot).length > 0) return snapshot; + + if (!currentVid) return snapshot; + + const result = await page.evaluate( + buildShelfSnapshotPollScript(getWebShelfStorageKeys(currentVid), true), + ); + return normalizeWebShelfSnapshot(result as Partial | null | undefined); +} + +/** + * Resolve a shelf bookId to the current web reader URL by pairing structured + * shelf cache order with the visible shelf links rendered on the page. + */ +export async function resolveShelfReaderUrl(page: IPage, bookId: string): Promise { + const { snapshot: initialSnapshot, currentVid } = await loadWebShelfSnapshotWithVid(page); + const snapshot = await waitForTrustedWebShelfSnapshot(page, initialSnapshot, currentVid); + if (!snapshot.cacheFound) return null; + const trustedIndexedBookIds = getTrustedIndexedBookIds(snapshot); + if (trustedIndexedBookIds.length === 0) return null; + + const readerUrls = await page.evaluate(` + (() => Array.from(document.querySelectorAll('a.shelfBook[href]')) + .map((anchor) => { + const href = anchor.getAttribute('href') || ''; + return href ? new URL(href, location.origin).toString() : ''; + }) + .filter(Boolean)) + `) as string[]; + if (readerUrls.length !== trustedIndexedBookIds.length) return null; + const entries = buildWebShelfEntries(snapshot, readerUrls); + + const entry = entries.find((candidate) => candidate.bookId === bookId); + return entry?.readerUrl || null; +} + /** Format a Unix timestamp (seconds) to YYYY-MM-DD in UTC+8. Returns '-' for invalid input. */ export function formatDate(ts: number | undefined | null): string { if (!Number.isFinite(ts) || (ts as number) <= 0) return '-'; diff --git a/src/weread-search-regression.test.ts b/src/weread-search-regression.test.ts deleted file mode 100644 index 4aa85569..00000000 --- a/src/weread-search-regression.test.ts +++ /dev/null @@ -1,44 +0,0 @@ -import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { getRegistry } from './registry.js'; -import './clis/weread/search.js'; - -describe('weread/search regression', () => { - beforeEach(() => { - vi.restoreAllMocks(); - }); - - it('uses the query argument for the search API and returns urls', async () => { - const command = getRegistry().get('weread/search'); - expect(command?.func).toBeTypeOf('function'); - - const fetchMock = vi.fn().mockResolvedValue({ - ok: true, - json: () => Promise.resolve({ - books: [ - { - bookInfo: { - title: 'Deep Work', - author: 'Cal Newport', - bookId: 'abc123', - }, - }, - ], - }), - }); - vi.stubGlobal('fetch', fetchMock); - - const result = await command!.func!(null as any, { query: 'deep work', limit: 5 }); - - expect(fetchMock).toHaveBeenCalledTimes(1); - expect(String(fetchMock.mock.calls[0][0])).toContain('keyword=deep+work'); - expect(result).toEqual([ - { - rank: 1, - title: 'Deep Work', - author: 'Cal Newport', - bookId: 'abc123', - url: 'https://weread.qq.com/web/bookDetail/abc123', - }, - ]); - }); -});