Skip to content

Commit 0d8beea

Browse files
committed
feat(docs): add TODO list for completed and future improvements
feat(package): update version to 0.1.5 in package.json feat(storage): implement bulk file fetching and processing in repo client
1 parent d8f3a48 commit 0d8beea

6 files changed

Lines changed: 192 additions & 172 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ See [Configuration Guide](docs/CONFIG.md) for all options.
114114
| [Development Setup](docs/DEV-SETUP.md) | Local development environment |
115115
| [Publishing](docs/PUBLISH.md) | npm release process |
116116
| [LLM Installation](docs/LLM-INSTALL.md) | Instructions for AI coding agents |
117+
| [TODO](docs/TODO.md) | Future improvements |
117118

118119
## License
119120

docs/TODO.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# TODO
2+
3+
## Completed
4+
5+
- [x] Push optimization: inline content in tree (O(n) → O(1))
6+
- [x] Pull optimization: raw.githubusercontent.com (0 API calls)
7+
- [x] Rate limit detection and backoff
8+
- [x] Skip push when no changes
9+
10+
## Future
11+
12+
- [ ] ETag caching for manifest fetch (saves bandwidth, not API calls)
13+
- [ ] GitHub webhook for push notifications (eliminate polling)
14+
- [ ] Compression improvements for large messages
15+
- [ ] Parallel category processing during sync

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "oc-sync",
3-
"version": "0.1.4",
3+
"version": "0.1.5",
44
"description": "Sync OpenCode data across machines using a private GitHub repository with vector clock conflict resolution",
55
"main": "dist/index.js",
66
"types": "dist/index.d.ts",

src/storage/interface.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ export interface StorageBackend {
3232
/** Get file content by path */
3333
getFile(path: string): Promise<string | null>;
3434

35+
/** Get multiple files in bulk (optimized for fewer API calls) */
36+
getFiles(paths: string[]): Promise<Record<string, string | null>>;
37+
3538
/** Update multiple files atomically (null value = delete) */
3639
updateFiles(files: Record<string, string | null>): Promise<void>;
3740

src/storage/repo/repo-client.ts

Lines changed: 103 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,21 @@ interface GitRef {
3131
object: { sha: string; type: string };
3232
}
3333

34-
interface GitTree {
34+
interface GitTreeResponse {
3535
sha: string;
36-
tree: { path: string; mode: string; type: string; sha: string }[];
3736
}
3837

39-
interface GitBlob {
38+
interface GitCommit {
4039
sha: string;
4140
}
4241

43-
interface GitCommit {
44-
sha: string;
42+
/** Tree entry for GitHub API - can use either sha (existing blob) or content (inline) */
43+
interface TreeEntry {
44+
path: string;
45+
mode: string;
46+
type: string;
47+
sha?: string | null;
48+
content?: string;
4549
}
4650

4751
interface ContentFile {
@@ -132,17 +136,12 @@ export class RepoStorageBackend implements StorageBackend {
132136
// Get current HEAD commit SHA
133137
const headSha = await this.getHeadSha();
134138

135-
// Get current tree
136-
const currentTree = await this.getTree(headSha);
137-
138-
// Build new tree entries (includes blob creation)
139-
const blobStart = Date.now();
140-
const treeEntries = await this.buildTreeEntries(files, currentTree);
141-
const blobDuration = Date.now() - blobStart;
142-
this.logProgress(`Created ${String(treeEntries.length)} blobs in ${String(blobDuration)}ms`);
139+
// Build tree entries with inline content (no separate blob creation needed)
140+
const treeEntries = this.buildTreeEntries(files);
141+
this.logProgress(`Prepared ${String(treeEntries.length)} tree entries`);
143142

144-
// Create new tree
145-
const newTreeSha = await this.createTree(treeEntries, currentTree.sha);
143+
// Create new tree with base_tree for incremental update
144+
const newTreeSha = await this.createTree(treeEntries, headSha);
146145

147146
// Create commit
148147
const message = `Sync update: ${String(fileCount)} files`;
@@ -152,7 +151,9 @@ export class RepoStorageBackend implements StorageBackend {
152151
await this.updateRef(commitSha);
153152

154153
const totalDuration = Date.now() - startTime;
155-
this.logProgress(`Upload complete: ${String(fileCount)} files in ${String(totalDuration)}ms`);
154+
this.logProgress(
155+
`Upload complete: ${String(fileCount)} files in ${String(totalDuration)}ms (~5 API calls)`
156+
);
156157
}
157158

158159
/** Log progress for debugging */
@@ -185,6 +186,66 @@ export class RepoStorageBackend implements StorageBackend {
185186
}));
186187
}
187188

189+
/**
190+
* Bulk fetch multiple files using raw.githubusercontent.com.
191+
* No API rate limits - all files fetched in parallel.
192+
*/
193+
public async getFiles(paths: string[]): Promise<Record<string, string | null>> {
194+
if (paths.length === 0) return {};
195+
196+
const startTime = Date.now();
197+
const fetched = await this.fetchFilesViaRaw(paths);
198+
199+
const result: Record<string, string | null> = {};
200+
for (const { path, content } of fetched) {
201+
result[path] = content;
202+
}
203+
204+
const duration = Date.now() - startTime;
205+
const found = fetched.filter((f) => f.content !== null).length;
206+
this.logProgress(
207+
`Bulk fetch: ${String(found)}/${String(paths.length)} files in ${String(duration)}ms (0 API calls)`
208+
);
209+
210+
return result;
211+
}
212+
213+
/**
214+
* Fetch files via raw.githubusercontent.com (no API rate limits).
215+
* All fetches run in parallel - raw content endpoint has no rate limiting.
216+
*/
217+
private async fetchFilesViaRaw(
218+
paths: string[]
219+
): Promise<{ path: string; content: string | null }[]> {
220+
const branch = await this.getBranch();
221+
const baseUrl = `https://raw.githubusercontent.com/${this.owner}/${this.repo}/${branch}`;
222+
223+
const results = await Promise.all(
224+
paths.map(async (path) => {
225+
const content = await this.fetchRawFile(`${baseUrl}/${SYNC_DIR}/${path}`);
226+
return { path, content };
227+
})
228+
);
229+
230+
return results;
231+
}
232+
233+
/** Fetch a single file from raw.githubusercontent.com */
234+
private async fetchRawFile(url: string): Promise<string | null> {
235+
try {
236+
const res = await fetchWithRetry(
237+
url,
238+
{ headers: { Authorization: `Bearer ${this.token}` } },
239+
this.maxRetries,
240+
this.retryDelayMs
241+
);
242+
if (!res.ok) return null;
243+
return await res.text();
244+
} catch {
245+
return null;
246+
}
247+
}
248+
188249
// --- Private helpers ---
189250

190251
private async fetch(path: string, options?: RequestInit): Promise<Response> {
@@ -336,122 +397,51 @@ export class RepoStorageBackend implements StorageBackend {
336397
return data.object.sha;
337398
}
338399

339-
private async getTree(commitSha: string): Promise<GitTree> {
340-
const res = await this.fetch(`/git/trees/${commitSha}?recursive=1`);
341-
if (!res.ok) {
342-
throw new RepoApiError('Failed to get tree', res.status);
343-
}
344-
return (await res.json()) as GitTree;
345-
}
346-
347-
private async buildTreeEntries(
348-
files: Record<string, string | null>,
349-
currentTree: GitTree
350-
): Promise<{ path: string; mode: string; type: string; sha?: string | null }[]> {
351-
// Start with existing tree entries (excluding ones we're updating/deleting)
352-
const updatedPaths = new Set(Object.keys(files).map((p) => `${SYNC_DIR}/${p}`));
353-
354-
const entries = currentTree.tree
355-
.filter((e) => !updatedPaths.has(e.path))
356-
.map((e) => ({ path: e.path, mode: e.mode, type: e.type, sha: e.sha }));
400+
/**
401+
* Build tree entries with inline content.
402+
* Uses GitHub's ability to accept `content` directly instead of blob SHA,
403+
* reducing API calls from N+5 to just 5 (regardless of file count).
404+
*/
405+
private buildTreeEntries(files: Record<string, string | null>): TreeEntry[] {
406+
const entries: TreeEntry[] = [];
357407

358-
// Collect files that need blob creation
359-
const filesToUpload: { path: string; content: string }[] = [];
360408
for (const [path, content] of Object.entries(files)) {
361-
if (content !== null) {
362-
filesToUpload.push({ path: `${SYNC_DIR}/${path}`, content });
409+
const fullPath = `${SYNC_DIR}/${path}`;
410+
if (content === null) {
411+
// Delete file by setting sha to null
412+
entries.push({ path: fullPath, mode: '100644', type: 'blob', sha: null });
413+
} else {
414+
// Add/update file with inline content (no blob creation needed!)
415+
entries.push({ path: fullPath, mode: '100644', type: 'blob', content });
363416
}
364417
}
365418

366-
// Create blobs in small parallel batches with delays to avoid secondary rate limits
367-
const BATCH_SIZE = 5;
368-
const blobResults = await this.createBlobsInBatches(filesToUpload, BATCH_SIZE);
369-
370-
// Add new entries from batch results
371-
for (const result of blobResults) {
372-
entries.push({
373-
path: result.path,
374-
mode: '100644',
375-
type: 'blob',
376-
sha: result.sha,
377-
});
378-
}
379-
380419
return entries;
381420
}
382421

383422
/**
384-
* Create blobs in parallel batches with rate limit protection.
385-
* Uses small batches with delays to avoid GitHub's secondary rate limits.
423+
* Create a new tree using base_tree for incremental updates.
424+
* Entries can use either `sha` (for existing blobs) or `content` (inline).
386425
*/
387-
private async createBlobsInBatches(
388-
files: { path: string; content: string }[],
389-
batchSize: number
390-
): Promise<{ path: string; sha: string }[]> {
391-
const results: { path: string; sha: string }[] = [];
392-
const totalBatches = Math.ceil(files.length / batchSize);
393-
394-
for (let i = 0; i < files.length; i += batchSize) {
395-
const batchNum = Math.floor(i / batchSize) + 1;
396-
const batch = files.slice(i, i + batchSize);
397-
398-
if (files.length > batchSize) {
399-
this.logProgress(
400-
`Batch ${String(batchNum)}/${String(totalBatches)} (${String(batch.length)} files)`
401-
);
402-
}
403-
404-
const batchPromises = batch.map(async (file) => {
405-
const sha = await this.createBlob(file.content);
406-
return { path: file.path, sha };
407-
});
408-
const batchResults = await Promise.all(batchPromises);
409-
results.push(...batchResults);
410-
411-
// Add delay between batches to avoid secondary rate limits
412-
if (i + batchSize < files.length) {
413-
await new Promise((r) => setTimeout(r, 100));
414-
}
415-
}
416-
417-
return results;
418-
}
419-
420-
private async createBlob(content: string): Promise<string> {
421-
const body = JSON.stringify({
422-
content: Buffer.from(content).toString('base64'),
423-
encoding: 'base64',
424-
});
425-
426-
const res = await this.fetch('/git/blobs', { method: 'POST', body });
427-
if (!res.ok) {
428-
throw new RepoApiError('Failed to create blob', res.status);
429-
}
430-
431-
const data = (await res.json()) as GitBlob;
432-
return data.sha;
433-
}
434-
435-
private async createTree(
436-
entries: { path: string; mode: string; type: string; sha?: string | null }[],
437-
baseSha?: string
438-
): Promise<string> {
426+
private async createTree(entries: TreeEntry[], baseSha: string): Promise<string> {
439427
const body = JSON.stringify({
440428
base_tree: baseSha,
441-
tree: entries.map((e) => ({
442-
path: e.path,
443-
mode: e.mode,
444-
type: e.type,
445-
sha: e.sha,
446-
})),
429+
tree: entries.map((e) => {
430+
if (e.content !== undefined) {
431+
// Inline content - GitHub will create the blob automatically
432+
return { path: e.path, mode: e.mode, type: e.type, content: e.content };
433+
}
434+
// Delete (sha: null) or reference existing blob
435+
return { path: e.path, mode: e.mode, type: e.type, sha: e.sha };
436+
}),
447437
});
448438

449439
const res = await this.fetch('/git/trees', { method: 'POST', body });
450440
if (!res.ok) {
451441
throw new RepoApiError('Failed to create tree', res.status);
452442
}
453443

454-
const data = (await res.json()) as GitTree;
444+
const data = (await res.json()) as GitTreeResponse;
455445
return data.sha;
456446
}
457447

0 commit comments

Comments
 (0)