From a568f68e87d0cb05fe323b5aa8b8030d5cfeee20 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Fri, 6 Jun 2025 12:19:07 +0200 Subject: [PATCH 1/5] add required deps --- package.json | 3 + pnpm-lock.yaml | 219 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 222 insertions(+) diff --git a/package.json b/package.json index 45ff209b..c5385771 100644 --- a/package.json +++ b/package.json @@ -74,12 +74,15 @@ "@huggingface/tasks": "^0.19.5", "@huggingface/transformers": "^3.5.1", "@lancedb/lancedb": "^0.18.2", + "@types/papaparse": "^5.3.16", "apache-arrow": "^19.0.1", "consola": "^3.4.0", "date-fns": "^4.1.0", "express": "4.20.0", + "googleapis": "^149.0.0", "marked": "^15.0.7", "mustache": "^4.2.0", + "papaparse": "^5.5.3", "playwright": "^1.51.1", "playwright-core": "^1.51.1", "sbd": "^1.0.19", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 68c7b8e2..67129c3d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -26,6 +26,9 @@ importers: '@lancedb/lancedb': specifier: ^0.18.2 version: 0.18.2(apache-arrow@19.0.1) + '@types/papaparse': + specifier: ^5.3.16 + version: 5.3.16 apache-arrow: specifier: ^19.0.1 version: 19.0.1 @@ -38,12 +41,18 @@ importers: express: specifier: 4.20.0 version: 4.20.0 + googleapis: + specifier: ^149.0.0 + version: 149.0.0(encoding@0.1.13) marked: specifier: ^15.0.7 version: 15.0.7 mustache: specifier: ^4.2.0 version: 4.2.0 + papaparse: + specifier: ^5.5.3 + version: 5.5.3 playwright: specifier: ^1.51.1 version: 1.51.1 @@ -1242,6 +1251,9 @@ packages: '@types/node@20.14.11': resolution: {integrity: sha512-kprQpL8MMeszbz6ojB5/tU8PLN4kesnN8Gjzw349rDlNgsSzg90lAVj3llK99Dh7JON+t9AuscPPFW6mPbTnSA==} + '@types/papaparse@5.3.16': + resolution: {integrity: sha512-T3VuKMC2H0lgsjI9buTB3uuKj3EMD2eap1MOuEQuBQ44EnDx/IkGhU6EwiTf9zG3za4SKlmwKAImdDKdNnCsXg==} + '@types/qs@6.9.18': resolution: {integrity: sha512-kK7dgTYDyGqS+e2Q4aK9X3D7q234CIZ1Bv0q/7Z5IwRDoADNU81xXJK/YVyLbLTZCoIwUoDoffFeF+p/eIklAA==} @@ -1390,6 +1402,10 @@ packages: resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} engines: {node: '>= 6.0.0'} + agent-base@7.1.3: + resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==} + engines: {node: '>= 14'} + agentkeepalive@4.6.0: resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} engines: {node: '>= 8.0.0'} @@ -1501,6 +1517,9 @@ packages: base64-js@1.5.1: resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + bignumber.js@9.3.0: + resolution: {integrity: sha512-EM7aMFTXbptt/wZdMlBv2t8IViwQL+h6SLHosp8Yf0dqJMTnY6iL32opnAB6kAdL0SZPuvcAzFr31o0c/R3/RA==} + binary-extensions@2.3.0: resolution: {integrity: sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==} engines: {node: '>=8'} @@ -1540,6 +1559,9 @@ packages: engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} hasBin: true + buffer-equal-constant-time@1.0.1: + resolution: {integrity: sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==} + buffer@5.7.1: resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} @@ -1908,6 +1930,9 @@ packages: eastasianwidth@0.2.0: resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==} + ecdsa-sig-formatter@1.0.11: + resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==} + ee-first@1.1.1: resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} @@ -2225,6 +2250,14 @@ packages: engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} deprecated: This package is no longer supported. + gaxios@6.7.1: + resolution: {integrity: sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==} + engines: {node: '>=14'} + + gcp-metadata@6.1.1: + resolution: {integrity: sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==} + engines: {node: '>=14'} + get-caller-file@2.0.5: resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} engines: {node: 6.* || 8.* || >= 10.*} @@ -2275,6 +2308,22 @@ packages: globrex@0.1.2: resolution: {integrity: sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg==} + google-auth-library@9.15.1: + resolution: {integrity: sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==} + engines: {node: '>=14'} + + google-logging-utils@0.0.2: + resolution: {integrity: sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==} + engines: {node: '>=14'} + + googleapis-common@7.2.0: + resolution: {integrity: sha512-/fhDZEJZvOV3X5jmD+fKxMqma5q2Q9nZNSF3kn1F18tpxmA86BcTxAGBQdM0N89Z3bEaIs+HVznSmFJEAmMTjA==} + engines: {node: '>=14.0.0'} + + googleapis@149.0.0: + resolution: {integrity: sha512-LTMc/njwYy7KTeaUHDcQt7KxftHyghdzm2XzbL46PRLd1AXB09utT9Po2ZJn2X0EApz0pE2T5x5A9zM8iue6zw==} + engines: {node: '>=14.0.0'} + gopd@1.2.0: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} @@ -2285,6 +2334,10 @@ packages: graphemer@1.4.0: resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==} + gtoken@7.1.0: + resolution: {integrity: sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==} + engines: {node: '>=14.0.0'} + guid-typescript@1.0.9: resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==} @@ -2337,6 +2390,10 @@ packages: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} + https-proxy-agent@7.0.6: + resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} + engines: {node: '>= 14'} + humanize-ms@1.2.1: resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==} @@ -2467,6 +2524,10 @@ packages: resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==} engines: {node: '>=0.10.0'} + is-stream@2.0.1: + resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} + engines: {node: '>=8'} + is-unicode-supported@0.1.0: resolution: {integrity: sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==} engines: {node: '>=10'} @@ -2509,6 +2570,9 @@ packages: jsbn@1.1.0: resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==} + json-bigint@1.0.0: + resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} + json-bignum@0.0.3: resolution: {integrity: sha512-2WHyXj3OfHSgNyuzDbSxI1w2jgw5gkWSWhS7Qg4bWXx1nLk3jnbwfUeS0PSba3IzpTUWdHxBieELUzXRjQB2zg==} engines: {node: '>=0.8'} @@ -2533,6 +2597,12 @@ packages: jsonc-parser@3.2.0: resolution: {integrity: sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w==} + jwa@2.0.1: + resolution: {integrity: sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==} + + jws@4.0.0: + resolution: {integrity: sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==} + keyv@4.5.4: resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==} @@ -2889,6 +2959,15 @@ packages: node-addon-api@7.1.1: resolution: {integrity: sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==} + node-fetch@2.7.0: + resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} + engines: {node: 4.x || >=6.0.0} + peerDependencies: + encoding: ^0.1.0 + peerDependenciesMeta: + encoding: + optional: true + node-gyp@8.4.1: resolution: {integrity: sha512-olTJRgUtAb/hOXG0E93wZDs5YiJlgbXxTwQAFHyNlRsXQnYzUaF2aGgujZbw+hR8aF4ZG/rST57bWMWD16jr9w==} engines: {node: '>= 10.12.0'} @@ -3008,6 +3087,9 @@ packages: package-json-from-dist@1.0.1: resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} + papaparse@5.5.3: + resolution: {integrity: sha512-5QvjGxYVjxO59MGU2lHVYpRWBBtKHnlIAcSe1uNFCkkptUh63NFRj0FJQm7nR67puEruUci/ZkjmEFrjCAyP4A==} + parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} engines: {node: '>=6'} @@ -3639,6 +3721,9 @@ packages: resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==} engines: {node: '>=6'} + tr46@0.0.3: + resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} + trim-lines@3.0.1: resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==} @@ -3746,6 +3831,9 @@ packages: uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} + url-template@2.0.8: + resolution: {integrity: sha512-XdVKMF4SJ0nP/O7XIPB0JwAEuT9lDIYnNsK8yGVe43y0AWoKeJNdv3ZNWh7ksJ6KqQFjOO6ox/VEitLnaVNufw==} + util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} @@ -3757,6 +3845,10 @@ packages: resolution: {integrity: sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg==} hasBin: true + uuid@9.0.1: + resolution: {integrity: sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==} + hasBin: true + valibot@0.42.1: resolution: {integrity: sha512-3keXV29Ar5b//Hqi4MbSdV7lfVp6zuYLZuA9V1PvQUsXqogr+u5lvLPLk3A4f74VUXDnf/JfWMN6sB+koJ/FFw==} peerDependencies: @@ -3855,6 +3947,12 @@ packages: wcwidth@1.0.1: resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==} + webidl-conversions@3.0.1: + resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} + + whatwg-url@5.0.0: + resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} + which@2.0.2: resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} engines: {node: '>= 8'} @@ -4803,6 +4901,10 @@ snapshots: dependencies: undici-types: 5.26.5 + '@types/papaparse@5.3.16': + dependencies: + '@types/node': 20.14.11 + '@types/qs@6.9.18': {} '@types/range-parser@1.2.7': {} @@ -4990,6 +5092,8 @@ snapshots: - supports-color optional: true + agent-base@7.1.3: {} + agentkeepalive@4.6.0: dependencies: humanize-ms: 1.2.1 @@ -5100,6 +5204,8 @@ snapshots: base64-js@1.5.1: {} + bignumber.js@9.3.0: {} + binary-extensions@2.3.0: {} bindings@1.5.0: @@ -5155,6 +5261,8 @@ snapshots: node-releases: 2.0.19 update-browserslist-db: 1.1.2(browserslist@4.24.4) + buffer-equal-constant-time@1.0.1: {} + buffer@5.7.1: dependencies: base64-js: 1.5.1 @@ -5503,6 +5611,10 @@ snapshots: eastasianwidth@0.2.0: {} + ecdsa-sig-formatter@1.0.11: + dependencies: + safe-buffer: 5.2.1 + ee-first@1.1.1: {} ejs@3.1.10: @@ -5887,6 +5999,26 @@ snapshots: wide-align: 1.1.5 optional: true + gaxios@6.7.1(encoding@0.1.13): + dependencies: + extend: 3.0.2 + https-proxy-agent: 7.0.6 + is-stream: 2.0.1 + node-fetch: 2.7.0(encoding@0.1.13) + uuid: 9.0.1 + transitivePeerDependencies: + - encoding + - supports-color + + gcp-metadata@6.1.1(encoding@0.1.13): + dependencies: + gaxios: 6.7.1(encoding@0.1.13) + google-logging-utils: 0.0.2 + json-bigint: 1.0.0 + transitivePeerDependencies: + - encoding + - supports-color + get-caller-file@2.0.5: {} get-intrinsic@1.2.7: @@ -5964,6 +6096,40 @@ snapshots: globrex@0.1.2: {} + google-auth-library@9.15.1(encoding@0.1.13): + dependencies: + base64-js: 1.5.1 + ecdsa-sig-formatter: 1.0.11 + gaxios: 6.7.1(encoding@0.1.13) + gcp-metadata: 6.1.1(encoding@0.1.13) + gtoken: 7.1.0(encoding@0.1.13) + jws: 4.0.0 + transitivePeerDependencies: + - encoding + - supports-color + + google-logging-utils@0.0.2: {} + + googleapis-common@7.2.0(encoding@0.1.13): + dependencies: + extend: 3.0.2 + gaxios: 6.7.1(encoding@0.1.13) + google-auth-library: 9.15.1(encoding@0.1.13) + qs: 6.13.0 + url-template: 2.0.8 + uuid: 9.0.1 + transitivePeerDependencies: + - encoding + - supports-color + + googleapis@149.0.0(encoding@0.1.13): + dependencies: + google-auth-library: 9.15.1(encoding@0.1.13) + googleapis-common: 7.2.0(encoding@0.1.13) + transitivePeerDependencies: + - encoding + - supports-color + gopd@1.2.0: {} graceful-fs@4.2.11: @@ -5971,6 +6137,14 @@ snapshots: graphemer@1.4.0: {} + gtoken@7.1.0(encoding@0.1.13): + dependencies: + gaxios: 6.7.1(encoding@0.1.13) + jws: 4.0.0 + transitivePeerDependencies: + - encoding + - supports-color + guid-typescript@1.0.9: {} has-flag@4.0.0: {} @@ -6072,6 +6246,13 @@ snapshots: - supports-color optional: true + https-proxy-agent@7.0.6: + dependencies: + agent-base: 7.1.3 + debug: 4.4.1 + transitivePeerDependencies: + - supports-color + humanize-ms@1.2.1: dependencies: ms: 2.1.3 @@ -6172,6 +6353,8 @@ snapshots: is-plain-object@5.0.0: {} + is-stream@2.0.1: {} + is-unicode-supported@0.1.0: {} is-wsl@2.2.0: @@ -6216,6 +6399,10 @@ snapshots: jsbn@1.1.0: optional: true + json-bigint@1.0.0: + dependencies: + bignumber.js: 9.3.0 + json-bignum@0.0.3: {} json-buffer@3.0.1: {} @@ -6230,6 +6417,17 @@ snapshots: jsonc-parser@3.2.0: {} + jwa@2.0.1: + dependencies: + buffer-equal-constant-time: 1.0.1 + ecdsa-sig-formatter: 1.0.11 + safe-buffer: 5.2.1 + + jws@4.0.0: + dependencies: + jwa: 2.0.1 + safe-buffer: 5.2.1 + keyv@4.5.4: dependencies: json-buffer: 3.0.1 @@ -6754,6 +6952,12 @@ snapshots: node-addon-api@7.1.1: {} + node-fetch@2.7.0(encoding@0.1.13): + dependencies: + whatwg-url: 5.0.0 + optionalDependencies: + encoding: 0.1.13 + node-gyp@8.4.1: dependencies: env-paths: 2.2.1 @@ -6932,6 +7136,8 @@ snapshots: package-json-from-dist@1.0.1: {} + papaparse@5.5.3: {} + parent-module@1.0.1: dependencies: callsites: 3.1.0 @@ -7751,6 +7957,8 @@ snapshots: totalist@3.0.1: {} + tr46@0.0.3: {} + trim-lines@3.0.1: {} trough@2.2.0: {} @@ -7857,12 +8065,16 @@ snapshots: dependencies: punycode: 2.3.1 + url-template@2.0.8: {} + util-deprecate@1.0.2: {} utils-merge@1.0.1: {} uuid@8.3.2: {} + uuid@9.0.1: {} + valibot@0.42.1(typescript@5.8.3): optionalDependencies: typescript: 5.8.3 @@ -7967,6 +8179,13 @@ snapshots: dependencies: defaults: 1.0.4 + webidl-conversions@3.0.1: {} + + whatwg-url@5.0.0: + dependencies: + tr46: 0.0.3 + webidl-conversions: 3.0.1 + which@2.0.2: dependencies: isexe: 2.0.0 From ae92e193a1217fe21db510e40b8fdef8e51cb4ff Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Fri, 6 Jun 2025 12:22:38 +0200 Subject: [PATCH 2/5] feat: read csv and json files with ignore_errors=true --- .../tables/create-table-from-file.ts | 28 ++++--------------- src/services/repository/tables/utils.ts | 12 ++++++++ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/services/repository/tables/create-table-from-file.ts b/src/services/repository/tables/create-table-from-file.ts index a4a08fe9..ae39ba17 100644 --- a/src/services/repository/tables/create-table-from-file.ts +++ b/src/services/repository/tables/create-table-from-file.ts @@ -5,6 +5,7 @@ import { getColumnName, getDatasetRowSequenceName, getDatasetTableName, + readFuncFromFile, } from './utils'; export const createDatasetTableFromFile = async ( @@ -30,26 +31,10 @@ export const createDatasetTableFromFile = async ( const tableName = getDatasetTableName(dataset); const sequenceName = getDatasetRowSequenceName(dataset); - let secretDropStatement = ''; - - await db.run(` - BEGIN TRANSACTION; - `); - - if (options?.secrets?.googleSheets) { - await db.run(` - CREATE OR REPLACE SECRET gsheet_secret( - TYPE gsheet, - PROVIDER access_token, - TOKEN '${options.secrets.googleSheets}' - ); - `); - - secretDropStatement = 'DROP SECRET gsheet_secret;'; - } + const fileSourceStatement = readFuncFromFile(file); const results = await db.run(` - DESCRIBE (SELECT * FROM '${file}'); + DESCRIBE (SELECT * FROM ${fileSourceStatement}); `); const columns = await results.getRowObjects(); @@ -67,7 +52,7 @@ export const createDatasetTableFromFile = async ( .map((column) => `"${column.name}" as ${getColumnName(column)}`) .join(', '); - let selectStatement = `SELECT ${selectColumnNames}, nextval('${sequenceName}') as rowIdx FROM '${file}'`; + let selectStatement = `SELECT ${selectColumnNames}, nextval('${sequenceName}') as rowIdx FROM ${fileSourceStatement}`; if (options?.limit) selectStatement += ` LIMIT ${options.limit}`; @@ -77,10 +62,7 @@ export const createDatasetTableFromFile = async ( CREATE TABLE ${tableName} AS (${selectStatement}); SHOW ${tableName}; - - ${secretDropStatement} - - COMMIT; + `); return dbColumns.map((column) => { diff --git a/src/services/repository/tables/utils.ts b/src/services/repository/tables/utils.ts index 6d7671cf..8ef244af 100644 --- a/src/services/repository/tables/utils.ts +++ b/src/services/repository/tables/utils.ts @@ -22,3 +22,15 @@ export const escapeValue = (value: any) => { return value; }; + +export const readFuncFromFile = (file: string) => { + switch (file.split('.').pop()?.toLowerCase()) { + case 'csv': + case 'tsv': + return `read_csv('${file}', ignore_errors=true, auto_detect=true)`; + case 'jsonl': + return `read_json('${file}', ignore_errors=true, auto_detect=true)`; + default: + return `'${file}'`; + } +}; From 9b87de69564ca49867df71beefa00bf5eea57de4 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Fri, 6 Jun 2025 12:22:59 +0200 Subject: [PATCH 3/5] remove gsheets extension --- src/services/db/duckdb.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/services/db/duckdb.ts b/src/services/db/duckdb.ts index 1c356c90..b23626bf 100644 --- a/src/services/db/duckdb.ts +++ b/src/services/db/duckdb.ts @@ -25,12 +25,3 @@ export const connectAndClose = async ( db.disconnectSync(); } }; - -await connectAndClose(async (db) => { - // Install plugins and extensions - - await db.run(` - INSTALL gsheets FROM community; - LOAD gsheets; - `); -}); From 4c068898a93e451a01ad1fe74efd4b288b0cdb8a Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Fri, 6 Jun 2025 12:26:45 +0200 Subject: [PATCH 4/5] feat: Read spreadsheet content manually --- .../import-from-google-sheets/index.tsx | 35 +----- src/usecases/import-from-google.usecase.tsx | 118 ++++++++++++++++++ 2 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 src/usecases/import-from-google.usecase.tsx diff --git a/src/features/import-from-google-sheets/index.tsx b/src/features/import-from-google-sheets/index.tsx index edbe219c..9f363871 100644 --- a/src/features/import-from-google-sheets/index.tsx +++ b/src/features/import-from-google-sheets/index.tsx @@ -3,7 +3,7 @@ import { useNavigate } from '@builder.io/qwik-city'; import { LuChevronRightSquare, LuExternalLink } from '@qwikest/icons/lucide'; import { Button, Label } from '~/components'; import { useClientConfig } from '~/loaders'; -import { useImportFromURL } from '~/usecases/import-from-url.usecase'; +import { useImportFromGoogle } from '~/usecases/import-from-google.usecase'; export const ImportFromGoogleSheets = component$(() => { const config = useClientConfig(); @@ -13,7 +13,7 @@ export const ImportFromGoogleSheets = component$(() => { const googleSheetsToken = useSignal(''); const isImporting = useSignal(false); - const importFromURI = useImportFromURL(); + const importFromGoogle = useImportFromGoogle(); const googleOauthURL = useComputed$(() => { const { GOOGLE_CLIENT_ID, GOOGLE_REDIRECT_URI } = config.value; @@ -22,7 +22,7 @@ export const ImportFromGoogleSheets = component$(() => { client_id: GOOGLE_CLIENT_ID!, redirect_uri: GOOGLE_REDIRECT_URI!, response_type: 'token', - scope: 'https://www.googleapis.com/auth/spreadsheets', + scope: 'https://www.googleapis.com/auth/spreadsheets.readonly', }); return `https://accounts.google.com/o/oauth2/v2/auth?${params.toString()}`; @@ -45,40 +45,15 @@ export const ImportFromGoogleSheets = component$(() => { ); }); - const datasetName = $(async () => { - if (!url.value) return ''; - - try { - if (!isGoogleSheetsURL.value) return url.value.split('/').pop() || ''; - if (isGoogleSheetsURL.value && !googleSheetsToken.value) return ''; - - const response = await fetch(url.value, { - headers: { Authorization: `Bearer ${googleSheetsToken.value}` }, - }); - - const text = await response.text(); - const parser = new DOMParser(); - - const doc = parser.parseFromString(text, 'text/html'); - const title = doc.querySelector('title')?.innerText || 'Untitled'; - - return title; - } catch (error) { - console.error('Error getting dataset name:', error); - return url.value; - } - }); - const handleImport = $(async () => { isImporting.value = true; try { - const dataset = await importFromURI({ + const dataset = await importFromGoogle({ url: url.value, - name: await datasetName(), secretToken: googleSheetsToken.value, }); - nav(`/home/dataset/${dataset.id}`); + await nav(`/home/dataset/${dataset.id}`); } catch (error) { console.error('Error importing dataset:', error); } finally { diff --git a/src/usecases/import-from-google.usecase.tsx b/src/usecases/import-from-google.usecase.tsx new file mode 100644 index 00000000..1df107cf --- /dev/null +++ b/src/usecases/import-from-google.usecase.tsx @@ -0,0 +1,118 @@ +import { type RequestEventBase, server$ } from '@builder.io/qwik-city'; + +import { google } from 'googleapis'; +import { importDatasetFromFile } from '~/services/repository/datasets'; +import { type Dataset, useServerSession } from '~/state'; + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { GOOGLE_CLIENT_ID } from '~/config'; + +import { default as papa } from 'papaparse'; + +const readSpreadsheetContent = async ( + url: string, + token: string, +): Promise<{ + filePath: string; + name: string; +}> => { + const spreadsheetId = url.split('/d/')[1].split('/')[0]; + const sheetId = url.split('#gid=')[1] || '0'; // Default to gid 0 if not specified + + // Create an OAuth2 client with the provided token + const oauth2Client = new google.auth.OAuth2({ + clientId: GOOGLE_CLIENT_ID, + }); + + oauth2Client.setCredentials({ + access_token: token, + scope: 'https://www.googleapis.com/auth/spreadsheets.readonly', + }); + + const service = google.sheets({ + version: 'v4', + auth: oauth2Client, + }); + + try { + const res = await service.spreadsheets.get({ spreadsheetId }); + + const spreadSheetTitle = + res.data.properties?.title || 'Untitled Spreadsheet'; + + let requestedSheet: any | undefined; + for (const sheet of res.data.sheets || []) { + if (sheet.properties?.sheetId === Number(sheetId)) { + requestedSheet = sheet; + break; + } + } + if (requestedSheet === undefined) { + throw new Error(`Sheet with ID ${sheetId} not found in the spreadsheet.`); + } + + // Fetch the rows from the specified sheet + const response = await service.spreadsheets.values.get({ + spreadsheetId, + range: `${requestedSheet.properties.title}!A1:Z`, + }); + + const data = response.data.values; + + if (!data || data.length === 0) { + throw new Error('No data found in the sheet.'); + } + + const csv = papa.unparse(data, { + header: true, + quotes: true, + skipEmptyLines: true, + quoteChar: '"', + delimiter: ',', + escapeChar: '\\', + }); + + const filePath = path.join( + os.tmpdir(), + `sheet-${spreadsheetId}-${sheetId}.csv`, + ); + fs.writeFileSync(filePath, csv); + + return { + filePath, + name: `${spreadSheetTitle} - ${requestedSheet.properties.title}`, + }; + } catch (error) { + console.error('Error initializing Google Drive API:', error); + throw new Error('Failed to initialize Google Drive API'); + } +}; + +export const useImportFromGoogle = () => + server$(async function ( + this: RequestEventBase, + { + url, + secretToken, + }: { + url: string; + secretToken: string; + }, + ): Promise { + const session = useServerSession(this); + + const { filePath, name } = await readSpreadsheetContent(url, secretToken); + + return await importDatasetFromFile( + { + name: name, + createdBy: session.user.username, + file: filePath, + }, + { + limit: 1000, + }, + ); + }); From eb84ddf413ee37e6a3217cb9cbf2f68cdc6cf69b Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Mon, 9 Jun 2025 08:11:16 +0200 Subject: [PATCH 5/5] chore: define google scope as constant --- src/config.ts | 3 +++ src/features/import-from-google-sheets/index.tsx | 5 +++-- src/loaders/config.ts | 2 ++ src/usecases/import-from-google.usecase.tsx | 4 ++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/config.ts b/src/config.ts index 1e5909e4..e71a214d 100644 --- a/src/config.ts +++ b/src/config.ts @@ -223,6 +223,9 @@ export const GOOGLE_CLIENT_ID: string | undefined = export const GOOGLE_REDIRECT_URI: string | undefined = process.env.GOOGLE_REDIRECT_URI; +export const GOOGLE_OAUTH_SCOPE: string = + 'https://www.googleapis.com/auth/spreadsheets.readonly'; + /** * If defined, the inference endpoint bill with by applied to the organization billing. * diff --git a/src/features/import-from-google-sheets/index.tsx b/src/features/import-from-google-sheets/index.tsx index 9f363871..4487a23f 100644 --- a/src/features/import-from-google-sheets/index.tsx +++ b/src/features/import-from-google-sheets/index.tsx @@ -16,13 +16,14 @@ export const ImportFromGoogleSheets = component$(() => { const importFromGoogle = useImportFromGoogle(); const googleOauthURL = useComputed$(() => { - const { GOOGLE_CLIENT_ID, GOOGLE_REDIRECT_URI } = config.value; + const { GOOGLE_CLIENT_ID, GOOGLE_REDIRECT_URI, GOOGLE_OAUTH_SCOPE } = + config.value; const params = new URLSearchParams({ client_id: GOOGLE_CLIENT_ID!, redirect_uri: GOOGLE_REDIRECT_URI!, response_type: 'token', - scope: 'https://www.googleapis.com/auth/spreadsheets.readonly', + scope: GOOGLE_OAUTH_SCOPE, }); return `https://accounts.google.com/o/oauth2/v2/auth?${params.toString()}`; diff --git a/src/loaders/config.ts b/src/loaders/config.ts index aa42212d..1c02ab47 100644 --- a/src/loaders/config.ts +++ b/src/loaders/config.ts @@ -16,6 +16,7 @@ export const useClientConfig = routeLoader$(async function ( isGoogleAuthEnabled: boolean; GOOGLE_CLIENT_ID?: string; GOOGLE_REDIRECT_URI?: string; + GOOGLE_OAUTH_SCOPE: string; }> { useServerSession(this); @@ -29,5 +30,6 @@ export const useClientConfig = routeLoader$(async function ( ), GOOGLE_CLIENT_ID: config.GOOGLE_CLIENT_ID, GOOGLE_REDIRECT_URI: config.GOOGLE_REDIRECT_URI, + GOOGLE_OAUTH_SCOPE: config.GOOGLE_OAUTH_SCOPE, }; }); diff --git a/src/usecases/import-from-google.usecase.tsx b/src/usecases/import-from-google.usecase.tsx index 1df107cf..0da93b89 100644 --- a/src/usecases/import-from-google.usecase.tsx +++ b/src/usecases/import-from-google.usecase.tsx @@ -7,7 +7,7 @@ import { type Dataset, useServerSession } from '~/state'; import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { GOOGLE_CLIENT_ID } from '~/config'; +import { GOOGLE_CLIENT_ID, GOOGLE_OAUTH_SCOPE } from '~/config'; import { default as papa } from 'papaparse'; @@ -28,7 +28,7 @@ const readSpreadsheetContent = async ( oauth2Client.setCredentials({ access_token: token, - scope: 'https://www.googleapis.com/auth/spreadsheets.readonly', + scope: GOOGLE_OAUTH_SCOPE, }); const service = google.sheets({