diff --git a/Cargo.lock b/Cargo.lock index d0aaa8f..5c71789 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "ansi-str" version = "0.8.0" @@ -83,12 +92,98 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "atomic-waker" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.3", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + [[package]] name = "base64" version = "0.22.1" @@ -209,6 +304,22 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "csv" version = "1.4.0" @@ -232,7 +343,7 @@ dependencies = [ [[package]] name = "dataverse-cli" -version = "0.1.2" +version = "0.1.4" dependencies = [ "anyhow", "clap", @@ -240,12 +351,16 @@ dependencies = [ "csv", "dialoguer", "dirs", + "prost", + "prost-types", "reqwest", "serde", "serde_json", "tabled", "tokio", "toml", + "tonic", + "tonic-build", ] [[package]] @@ -293,6 +408,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -327,6 +448,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "fnv" version = "1.0.7" @@ -439,13 +566,19 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", "tracing", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.15.5" @@ -512,6 +645,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "hyper" version = "1.8.1" @@ -526,6 +665,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "pin-utils", @@ -551,6 +691,19 @@ dependencies = [ "webpki-roots", ] +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.20" @@ -568,7 +721,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -682,6 +835,16 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.13.0" @@ -716,6 +879,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -777,12 +949,24 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -800,6 +984,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + [[package]] name = "nom" version = "7.1.3" @@ -822,6 +1012,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + [[package]] name = "option-ext" version = "0.2.0" @@ -847,6 +1043,36 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap 2.13.0", +] + +[[package]] +name = "pin-project" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -918,6 +1144,58 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +dependencies = [ + "heck 0.5.0", + "itertools", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn 2.0.117", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + [[package]] name = "quinn" version = "0.11.9" @@ -931,7 +1209,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -947,7 +1225,7 @@ dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand", + "rand 0.9.2", "ring", "rustc-hash", "rustls", @@ -968,7 +1246,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.3", "tracing", "windows-sys 0.59.0", ] @@ -994,14 +1272,35 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ - "rand_chacha", - "rand_core", + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", ] [[package]] @@ -1011,7 +1310,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.9.5", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", ] [[package]] @@ -1034,6 +1342,35 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + [[package]] name = "reqwest" version = "0.12.28" @@ -1063,7 +1400,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-rustls", - "tower", + "tower 0.5.3", "tower-http", "tower-service", "url", @@ -1112,6 +1449,7 @@ version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ + "log", "once_cell", "ring", "rustls-pki-types", @@ -1120,6 +1458,27 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "rustls-pki-types" version = "1.14.0" @@ -1153,6 +1512,38 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "schannel" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -1247,6 +1638,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.3" @@ -1430,7 +1831,7 @@ dependencies = [ "libc", "mio", "pin-project-lite", - "socket2", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] @@ -1456,6 +1857,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -1496,7 +1908,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap", + "indexmap 2.13.0", "serde", "serde_spanned", "toml_datetime", @@ -1510,6 +1922,73 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64", + "bytes", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs", + "rustls-pemfile", + "socket2 0.5.10", + "tokio", + "tokio-rustls", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic-build" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +dependencies = [ + "prettyplease", + "proc-macro2", + "prost-build", + "prost-types", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.5.3" @@ -1538,7 +2017,7 @@ dependencies = [ "http-body", "iri-string", "pin-project-lite", - "tower", + "tower 0.5.3", "tower-layer", "tower-service", ] @@ -1562,9 +2041,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "tracing-core" version = "0.1.36" @@ -1758,7 +2249,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap", + "indexmap 2.13.0", "wasm-encoder", "wasmparser", ] @@ -1771,7 +2262,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", - "indexmap", + "indexmap 2.13.0", "semver", ] @@ -1938,7 +2429,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck 0.5.0", - "indexmap", + "indexmap 2.13.0", "prettyplease", "syn 2.0.117", "wasm-metadata", @@ -1969,7 +2460,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap", + "indexmap 2.13.0", "log", "serde", "serde_derive", @@ -1988,7 +2479,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap", + "indexmap 2.13.0", "log", "semver", "serde", diff --git a/Cargo.toml b/Cargo.toml index 9fc321c..120a82b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "dataverse-cli" -version = "0.1.3" +version = "0.1.4" edition = "2021" description = "CLI for Macrocosmos Data Universe (Bittensor SN13) - query social data from X/Twitter and Reddit" license = "MIT" repository = "https://github.com/macrocosm-os/dataverse-cli" keywords = ["bittensor", "macrocosmos", "social-data", "cli", "sn13"] categories = ["command-line-utilities"] -include = ["src/**/*", "Cargo.toml", "LICENSE", "README.md", "AGENTS.md"] +include = ["src/**/*", "proto/**/*", "build.rs", "Cargo.toml", "LICENSE", "README.md", "AGENTS.md"] [[bin]] name = "dv" @@ -26,6 +26,12 @@ dirs = "6" toml = "0.8" csv = "1" dialoguer = "0.11" +tonic = { version = "0.12", features = ["tls", "tls-native-roots", "transport"] } +prost = "0.13" +prost-types = "0.13" + +[build-dependencies] +tonic-build = "0.12" [profile.release] opt-level = 3 diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..3bbb669 --- /dev/null +++ b/build.rs @@ -0,0 +1,9 @@ +fn main() -> Result<(), Box> { + tonic_build::configure() + .build_server(false) + .compile_protos( + &["proto/sn13/v1/sn13_validator.proto"], + &["proto"], + )?; + Ok(()) +} diff --git a/proto/gravity/v1/gravity.proto b/proto/gravity/v1/gravity.proto new file mode 100644 index 0000000..f032b99 --- /dev/null +++ b/proto/gravity/v1/gravity.proto @@ -0,0 +1,912 @@ +syntax = "proto3"; + +package gravity.v1; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/empty.proto"; + +option go_package = "macrocosm-os/rift/constellation_api/gen/gravity/v1"; + +service GravityService { + // Lists all data collection tasks for a user + rpc GetPopularTags(google.protobuf.Empty) returns (GetPopularTagsResponse); + + // Lists all data collection tasks for a user + rpc GetGravityTasks(GetGravityTasksRequest) returns (GetGravityTasksResponse); + + // Get all marketplace crawlers + rpc GetMarketplaceCrawlers(google.protobuf.Empty) returns (GetMarketplaceCrawlersResponse); + + // Gets raw miner files for a specific crawler + rpc GetCrawlerRawMinerFiles(GetCrawlerRequest) returns (CrawlerRawMinerFilesResponse); + + // Get the parent workflow id (the id of the ui workflow) for this crawler + rpc GetCrawlerParentTaskId(GetCrawlerRequest) returns (CreateGravityTaskResponse); + + // Get a single crawler by its ID + rpc GetCrawler(GetCrawlerRequest) returns (GetCrawlerResponse); + + // Upsert marketplace task metadata + rpc UpsertMarketplaceTaskMetadata(UpsertMarketplaceTaskMetadataRequest) returns (UpsertResponse); + + // Upsert marketplace task suggestions + rpc UpsertMarketplaceTaskSuggestions(UpsertMarketplaceTaskSuggestionsRequest) returns (UpsertResponse); + + // Get marketplace task suggestions + rpc GetMarketplaceTaskSuggestions(GetMarketplaceTaskSuggestionsRequest) returns (GetMarketplaceDatasetsResponse); + + // Create a new gravity task + rpc CreateGravityTask(CreateGravityTaskRequest) + returns (CreateGravityTaskResponse); + + // Gets all dataset files for a given marketplace gravity task (no user_id check, validates against marketplace tasks table) + rpc GetGravityMarketplaceTaskDatasetFiles(GetGravityTaskDatasetFilesRequest) + returns (GetGravityTaskDatasetFilesResponse); + + // Build a dataset for a single crawler + rpc BuildDataset(BuildDatasetRequest) returns (BuildDatasetResponse); + + // Get the dataset build status and results + rpc GetDataset(GetDatasetRequest) returns (GetDatasetResponse); + + // Cancel a gravity task and any crawlers associated with it + rpc CancelGravityTask(CancelGravityTaskRequest) + returns (CancelGravityTaskResponse); + + // Cancel dataset build if it is in progress and purges the dataset + rpc CancelDataset(CancelDatasetRequest) returns (CancelDatasetResponse); + + // Refund user if fewer rows are returned + rpc DatasetBillingCorrection(DatasetBillingCorrectionRequest) + returns (DatasetBillingCorrectionResponse); + + // Gets the available datsets for use in Dataset Marketplace + rpc GetMarketplaceDatasets(GetMarketplaceDatasetsRequest) + returns (GetMarketplaceDatasetsResponse); + + // Gets all dataset files for a given gravity task + rpc GetGravityTaskDatasetFiles(GetGravityTaskDatasetFilesRequest) + returns (GetGravityTaskDatasetFilesResponse); + + // Publishes a dataset into the Marketplace + rpc PublishDataset(PublishDatasetRequest) returns (UpsertResponse); + + // Get crawler data for DD submission + rpc GetActiveUserTasks(google.protobuf.Empty) + returns (GetActiveUserTasksResponse); + + // Get crawler data for DD submission for the marketplace user + rpc GetMarketplaceCrawlerDataForDDSubmission(GetMarketplaceCrawlerDataForDDSubmissionRequest) + returns (GetMarketplaceCrawlerDataForDDSubmissionResponse); + + // Upserts a crawler into the Gravity state DB + rpc UpsertCrawler(UpsertCrawlerRequest) returns (UpsertResponse); + + // Upserts a crawler criteria into the Gravity state DB + rpc InsertCrawlerCriteria(InsertCrawlerCriteriaRequest) + returns (UpsertResponse); + + // Upserts a gravity task into the Gravity state DB + rpc UpsertGravityTask(UpsertGravityTaskRequest) + returns (UpsertGravityTaskResponse); + // Upserts a dataset into to the Gravity state DB + rpc UpsertDataset(UpsertDatasetRequest) returns (UpsertResponse); + + // Inserts a dataset file row into the Gravity state DB + rpc InsertDatasetFile(InsertDatasetFileRequest) returns (UpsertResponse); + + // Upserts a nebula into the Gravity nebula DB + rpc UpsertNebula(UpsertNebulaRequest) returns (UpsertResponse); + + // Builds all datasets for a task (additionally cancels crawlers with no data) + rpc BuildAllDatasets(BuildAllDatasetsRequest) + returns (BuildAllDatasetsResponse); + + // Builds datasets for multiple crawlers within a single gravity task periodically + rpc BuildUserDatasetsPeriodically(BuildAllDatasetsRequest) + returns (BuildAllDatasetsResponse); + + // Charges a user for dataset rows + rpc ChargeForDatasetRows(ChargeForDatasetRowsRequest) + returns (UpsertResponse); + + // Gets crawler history for a gravity task + rpc GetCrawlerHistory(GetCrawlerHistoryRequest) + returns (GetCrawlerHistoryResponse); + + // Completes a crawler + rpc CompleteCrawler(CompleteCrawlerRequest) returns (UpsertResponse); + + // Upserts raw miner files (parquet paths) for a crawler + rpc UpsertRawMinerFiles(UpsertRawMinerFilesRequest) returns (UpsertResponse); + + // Upserts raw miner files (parquet paths) for a crawler + rpc UpsertHotkeys(UpsertHotkeysRequest) returns (UpsertResponse); + + // Gets all hotkeys from the Gravity state DB + rpc GetHotkeys(google.protobuf.Empty) returns (GetHotkeysResponse); + + // Purchase a marketplace dataset + rpc BuyMarketplaceDataset(BuyMarketplaceDatasetRequest) + returns (BuyMarketplaceDatasetResponse); + + // Get all marketplace datasets owned by the authenticated user + rpc GetUserMarketplaceDatasets(google.protobuf.Empty) + returns (GetUserMarketplaceDatasetsResponse); + + // Upserts pre-built user dataset records + rpc UpsertPreBuiltUserDatasets(UpsertPreBuiltUserDatasetsRequest) returns (UpsertResponse); + + // Gets pre-built user dataset records for a gravity task + rpc GetPreBuiltUserDatasets(GetPreBuiltUserDatasetsRequest) returns (GetPreBuiltUserDatasetsResponse); +} + +// UpsertRawMinerFilesRequest is the request message for UpsertRawMinerFiles +message UpsertRawMinerFilesRequest { + // crawler_id: the ID of the crawler + string crawler_id = 1; + // parquet_paths: the paths to the raw miner files collected + repeated string parquet_paths = 2; + // path_sizes: the sizes of the raw miner files collected + repeated int64 path_sizes = 3; +} +// GetHotkeysResponse is the response message for getting hotkeys +message GetHotkeysResponse { + // hotkeys: the hotkeys + repeated string hotkeys = 1; +} + +// BuyMarketplaceDatasetRequest is the request to purchase a dataset +message BuyMarketplaceDatasetRequest { + // gravity_task_id: the marketplace dataset's gravity task id to purchase + string gravity_task_id = 1; +} + +// BuyMarketplaceDatasetResponse is the response to a dataset purchase +message BuyMarketplaceDatasetResponse { + // success: whether the purchase succeeded + bool success = 1; + // message: optional detail + string message = 2; + // purchase_transaction_id: billing transaction id + string purchase_transaction_id = 3; +} + +// UserMarketplaceDataset represents a single owned dataset record +message UserMarketplaceDataset { + string gravity_task_id = 1; + google.protobuf.Timestamp created_at = 2; + int64 purchase_price_cents = 3; + string purchase_transaction_id = 4; +} + +// GetUserMarketplaceDatasetsResponse lists owned datasets +message GetUserMarketplaceDatasetsResponse { + repeated UserMarketplaceDataset user_datasets = 1; +} + +// UpsertHotkeysRequest is the request message for upserting hotkeys +message UpsertHotkeysRequest { + // hotkeys: the hotkeys to upsert + repeated string hotkeys = 1; +} + +// UpsertMarketplaceTaskSuggestionsRequest is the request message for upserting marketplace task suggestions +message UpsertMarketplaceTaskSuggestionsRequest { + // gravity_task_id: the id of the gravity task + string gravity_task_id = 1; + // suggested_gravity_task_ids: the ids of the suggested gravity tasks + repeated string suggested_gravity_task_ids = 2; +} + +// GetMarketplaceTaskSuggestionsRequest is the request message for getting marketplace task suggestions +message GetMarketplaceTaskSuggestionsRequest { + // gravity_task_id: the id of the gravity task + string gravity_task_id = 1; +} + +// GetMarketplaceTaskSuggestionsResponse is the response message for getting marketplace task suggestions +message GetMarketplaceTaskSuggestionsResponse { + // suggested_gravity_task_ids: the ids of the suggested gravity tasks + repeated string suggested_gravity_task_ids = 1; +} + +// PopularTag is a single popular tag along with its count +message PopularTag { + // tag: the popular tag + string tag = 1; + // count: the count of the tag + uint64 count = 2; +} + + +// GetPopularTagsResponse is the response message for getting popular tags +message GetPopularTagsResponse { + // popular_tags: the popular tags + repeated PopularTag popular_tags = 1; +} + +// PublishDatasetRequest is the request message for publishing a dataset +message PublishDatasetRequest { + // dataset_id: the ID of the dataset + string dataset_id = 1; +} + +// UpsertMarketplaceTaskMetadataRequest +message UpsertMarketplaceTaskMetadataRequest { + // gravity_task_id: the id of the gravity task + string gravity_task_id = 1; + // description: a description of the curated gravity task + string description = 2; + // name: the name of the curated task + string name = 3; + // image_url: points to an image related to the task + string image_url = 4; + // tags: a set of tags for this task + repeated string tags = 5; +} +// GetMarketplaceDatasetsRequest is the request message for getting marketplace datasets +message GetMarketplaceDatasetsRequest { + // popular: whether to return popular datasets + bool popular = 1; +} + +// Crawler is a single crawler workflow that registers a single job +// (platform/topic) on SN13's dynamic desirability engine +message Crawler { + // crawler_id: the ID of the crawler + string crawler_id = 1; + // criteria: the contents of the job and the notification details + CrawlerCriteria criteria = 2; + // start_time: the time the crawler was created + google.protobuf.Timestamp start_time = 3; + // deregistration_time: the time the crawler was deregistered + google.protobuf.Timestamp deregistration_time = 4; + // archive_time: the time the crawler was archived + google.protobuf.Timestamp archive_time = 5; + // state: the current state of the crawler + CrawlerState state = 6; + // dataset_workflows: the IDs of the dataset workflows that are associated + // with the crawler + repeated string dataset_workflows = 7; + // parquet_paths: the paths to the raw miner files collected + repeated string parquet_paths = 8; +} + +// UpsertCrawlerRequest for upserting a crawler and its criteria +message UpsertCrawlerRequest { + // gravity_task_id: the parent workflow id -- in this case the multicrawler id + string gravity_task_id = 1; + // crawler: the crawler to upsert into the database + Crawler crawler = 2; +} + +// UpsertResponse is the response message for upserting a crawler +message UpsertResponse { + // message: the message of upserting a crawler (currently hardcoded to + // "success") + string message = 1; +} + +// UpsertGravityTaskRequest for upserting a gravity task +message UpsertGravityTaskRequest { + // gravity_task: the gravity task to upsert into the database + GravityTaskRequest gravity_task = 1; +} + +// UpsertGravityTaskResponse is the response message for upserting a gravity +// task +message UpsertGravityTaskResponse { + // message: the message of upserting a gravity task (currently hardcoded to + // "success") + string message = 1; +} + +// GravityTaskRequest represents the data needed to upsert a gravity task +message GravityTaskRequest { + // id: the ID of the gravity task + string id = 1; + // name: the name of the gravity task + string name = 2; + // status: the status of the gravity task + string status = 3; + // start_time: the start time of the gravity task + google.protobuf.Timestamp start_time = 4; + // notification_to: the notification email address + string notification_to = 5; + // notification_link: the notification redirect link + string notification_link = 6; +} + +// UpsertCrawlerCriteriaRequest for upserting a crawler and its criteria +message InsertCrawlerCriteriaRequest { + // crawler_id: the id of the crawler + string crawler_id = 1; + // crawler_criteria: the crawler criteria to upsert into the database + CrawlerCriteria crawler_criteria = 2; +} + +// CrawlerCriteria is the contents of the job and the notification details +message CrawlerCriteria { + // platform: the platform of the job ('x' or 'reddit') + string platform = 1; + // topic: the topic of the job (e.g. '#ai' for X, 'r/ai' for Reddit) + optional string topic = 2; + // notification: the details of the notification to be sent to the user + CrawlerNotification notification = 3; + // mock: Used for testing purposes (optional, defaults to false) + bool mock = 4; + // user_id: the ID of the user who created the gravity task + string user_id = 5; + // keyword: the keyword to search for in the job (optional) + optional string keyword = 6; + // post_start_datetime: the start date of the job (optional) + optional google.protobuf.Timestamp post_start_datetime = 7; + // post_end_datetime: the end date of the job (optional) + optional google.protobuf.Timestamp post_end_datetime = 8; +} + +// CrawlerNotification is the details of the notification to be sent to the user +message CrawlerNotification { + // to: the email address of the user + string to = 1; + // link: the redirect link in the email where the user can view the dataset + string link = 2; +} + +// HfRepo is a single Hugging Face repository that contains data for a crawler +message HfRepo { + // repo_name: the name of the Hugging Face repository + string repo_name = 1; + // row_count: the number of rows in the repository for the crawler criteria + uint64 row_count = 2; + // last_update: the last recorded time the repository was updated + string last_update = 3; +} + +// CrawlerState is the current state of the crawler +message CrawlerState { + // status: the current status of the crawler + // "Pending" -- Crawler is pending submission to the SN13 Validator + // "Submitted" -- Crawler is submitted to the SN13 Validator + // "Running" -- Crawler is running (we got the first update) + // "Completed" -- Crawler is completed (timer expired) + // "Cancelled" -- Crawler is cancelled by user via cancellation of workflow + // "Archived" -- Crawler is archived (now read-only i.e. no new dataset) + // "Failed" -- Crawler failed to run + string status = 1; + // bytes_collected: the estimated number of bytes collected by the crawler + uint64 bytes_collected = 2; + // records_collected: the estimated number of records collected by the crawler + uint64 records_collected = 3; + // repos: the Hugging Face repositories that contain data for a crawler + repeated HfRepo repos = 4; +} + +// GravityTaskState is the current state of a gravity task +message GravityTaskState { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; + // name: the name given by the user of the gravity task + string name = 2; + // status: the current status of the gravity task + string status = 3; + // start_time: the time the gravity task was created + google.protobuf.Timestamp start_time = 4; + // crawler_ids: the IDs of the crawler workflows that are associated with the + // gravity task + repeated string crawler_ids = 5; + // crawler_workflows: the crawler workflows that are associated with the + // gravity task + repeated Crawler crawler_workflows = 6; +} + +// GravityMarketplaceTaskState is the current state of a gravity task for marketplace display +message GravityMarketplaceTaskState { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; + // name: the name given by the user of the gravity task + string name = 2; + // status: the current status of the gravity task + string status = 3; + // start_time: the time the gravity task was created + google.protobuf.Timestamp start_time = 4; + // crawler_ids: the IDs of the crawler workflows that are associated with the + // gravity task + repeated string crawler_ids = 5; + // crawler_workflows: the crawler workflows that are associated with the + // gravity task + repeated Crawler crawler_workflows = 6; + // task_records_collected: the total number of records collected across all crawlers for this task + uint64 task_records_collected = 7; + // task_bytes_collected: the total number of bytes collected across all crawlers for this task + uint64 task_bytes_collected = 8; + // description: description from gravity_marketplace_task_metadata + string description = 9; + // image_url: image url from gravity_marketplace_task_metadata + string image_url = 10; + // view_count: number of views from gravity_marketplace_task_download_history + uint64 view_count = 11; + // download_count: number of downloads from gravity_marketplace_task_download_history + uint64 download_count = 12; + // tags: set of tags from gravity_marketplace_task_tags (accumulated) + repeated string tags = 13; +} + +// GetGravityTasksRequest is the request message for listing gravity tasks for a +// user +message GetGravityTasksRequest { + // gravity_task_id: the ID of the gravity task (optional, if not provided, all + // gravity tasks for the user will be returned) + optional string gravity_task_id = 1; + // include_crawlers: whether to include the crawler states in the response + optional bool include_crawlers = 2; +} + +// GetGravityTasksResponse is the response message for listing gravity tasks for +// a user +message GetGravityTasksResponse { + // gravity_task_states: the current states of the gravity tasks + repeated GravityTaskState gravity_task_states = 1; +} + +// GravityTask defines a crawler's criteria for a single job (platform/topic) +message GravityTask { + // topic: the topic of the job (e.g. '#ai' for X, 'r/ai' for Reddit) + optional string topic = 1; + // platform: the platform of the job ('x' or 'reddit') + string platform = 2; + // keyword: the keyword to search for in the job (optional) + optional string keyword = 3; + // post_start_datetime: the start date of the job (optional) + optional google.protobuf.Timestamp post_start_datetime = 4; + // post_end_datetime: the end date of the job (optional) + optional google.protobuf.Timestamp post_end_datetime = 5; +} + +// NotificationRequest is the request message for sending a notification to a +// user when a dataset is ready to download +message NotificationRequest { + // type: the type of notification to send ('email' is only supported + // currently) + string type = 1; + // address: the address to send the notification to (only email addresses are + // supported currently) + string address = 2; + // redirect_url: the URL to include in the notication message that redirects + // the user to any built datasets + optional string redirect_url = 3; +} + +// GetCrawlerRequest is the request message for getting a crawler +message GetCrawlerRequest { + // crawler_id: the ID of the crawler + string crawler_id = 1; +} + +// GetMarketplaceCrawlersResponse is the response message holding all marketplace crawlers +message GetMarketplaceCrawlersResponse { + // crawler_id: the ID of the crawler + repeated string crawler_id = 1; +} +// CompleteCrawlerRequest is the request message for cancelling a crawler +message CompleteCrawlerRequest { + // crawler_id: the ID of the crawler + string crawler_id = 1; + // status: ending status of the crawler + string status = 3; + // removed field + reserved 2; + reserved "gravity_task_id"; +} + +// GetCrawlerResponse is the response message for getting a crawler +message GetCrawlerResponse { + // crawler: the crawler + Crawler crawler = 1; +} + +// CreateGravityTaskRequest is the request message for creating a new gravity +// task +message CreateGravityTaskRequest { + // gravity_tasks: the criteria for the crawlers that will be created + repeated GravityTask gravity_tasks = 1; + // name: the name of the gravity task (optional, default will generate a + // random name) + string name = 2; + // notification_requests: the details of the notification to be sent to the + // user when a dataset + // that is automatically generated upon completion of the crawler is ready + // to download (optional) + repeated NotificationRequest notification_requests = 3; + // gravity_task_id: the ID of the gravity task (optional, default will + // generate a random ID) + optional string gravity_task_id = 4; +} + +// CreateGravityTaskResponse is the response message for creating a new gravity +// task +message CreateGravityTaskResponse { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; +} + +// BuildDatasetRequest is the request message for manually requesting the +// building of a dataset for a single crawler +message BuildDatasetRequest { + // crawler_id: the ID of the crawler that will be used to build the dataset + string crawler_id = 1; + // notification_requests: the details of the notification to be sent to the + // user when the dataset is ready to download (optional) + repeated NotificationRequest notification_requests = 2; + // max_rows: the maximum number of rows to include in the dataset (optional, + // defaults to 500) + int64 max_rows = 3; + // is_periodic: determines whether the datasets to build are for periodic build + optional bool is_periodic = 4; +} + +// BuildDatasetResponse is the response message for manually requesting the +// building of a dataset for a single crawler +// - dataset: the dataset that was built +message BuildDatasetResponse { + // dataset_id: the ID of the dataset + string dataset_id = 1; + // dataset: the dataset that was built + Dataset dataset = 2; +} + +// BuildAllDatasetsRequest is the request message for building all datasets +// belonging to a workflow +message BuildAllDatasetsRequest { + // gravityTaskId specifies which task to build + string gravity_task_id = 1; + // specifies how much of each crawler to build for workflow + repeated BuildDatasetRequest build_crawlers_config = 2; +} + +message BuildAllDatasetsResponse { + string gravity_task_id = 1; + repeated Dataset datasets = 2; +} + +// ChargeForDatasetRowsRequest is the request message for charging a user for dataset rows +message ChargeForDatasetRowsRequest { + // crawler_id: the ID of the crawler that was used to build the dataset + string crawler_id = 1; + // row_count: the number of rows to charge for + int64 row_count = 2; +} + +message Nebula { + // error: nebula build error message + string error = 1; + // file_size_bytes: the size of the file in bytes + int64 file_size_bytes = 2; + // url: the URL of the file + string url = 3; +} + +// Dataset contains the progress and results of a dataset build +message Dataset { + // crawler_workflow_id: the ID of the parent crawler for this dataset + string crawler_workflow_id = 1; + // create_date: the date the dataset was created + google.protobuf.Timestamp create_date = 2; + // expire_date: the date the dataset will expire (be deleted) + google.protobuf.Timestamp expire_date = 3; + // files: the details about the dataset files that are included in the dataset + repeated DatasetFile files = 4; + // status: the status of the dataset + string status = 5; + // status_message: the message of the status of the dataset + string status_message = 6; + // steps: the progress of the dataset build + repeated DatasetStep steps = 7; + // total_steps: the total number of steps in the dataset build + int64 total_steps = 8; + // nebula: the details about the nebula that was built + Nebula nebula = 9; +} + +// UpsertDatasetRequest contains the dataset id to insert and the dataset +// details +message UpsertDatasetRequest { + // dataset_id: a unique id for the dataset + string dataset_id = 1; + // dataset: the details of the dataset + Dataset dataset = 2; +} + +// UpsertNebulaRequest contains the dataset id and nebula details to upsert +message UpsertNebulaRequest { + // dataset_id: a unique id for the dataset + string dataset_id = 1; + // nebula_id: a unique id for the nebula + string nebula_id = 2; + // nebula: the details of the nebula + Nebula nebula = 3; +} + +// InsertDatasetFileRequest contains the dataset id to insert into and the +// dataset file details +message InsertDatasetFileRequest { + // dataset_id: the ID of the dataset to attach the file to + string dataset_id = 1; + // files: the dataset files to insert + repeated DatasetFile files = 2; +} + +// DatasetFile contains the details about a dataset file +message DatasetFile { + // file_name: the name of the file + string file_name = 1; + // file_size_bytes: the size of the file in bytes + uint64 file_size_bytes = 2; + // last_modified: the date the file was last modified + google.protobuf.Timestamp last_modified = 3; + // num_rows: the number of rows in the file + uint64 num_rows = 4; + // s3_key: the key of the file in S3 (internal use only) + string s3_key = 5; + // url: the URL of the file (public use) + string url = 6; +} + +// DatasetStep contains one step of the progress of a dataset build +// (NOTE: each step varies in time and complexity) +message DatasetStep { + // progress: the progress of this step in the dataset build (0.0 - 1.0) + double progress = 1; + // step: the step number of the dataset build (1-indexed) + int64 step = 2; + // step_name: description of what is happening in the step + string step_name = 3; +} + +// GetDatasetRequest is the request message for getting the status of a dataset +message GetDatasetRequest { + // dataset_id: the ID of the dataset + string dataset_id = 1; +} + +// GetDatasetResponse is the response message for getting the status of a +// dataset +message GetDatasetResponse { + // dataset: the dataset that is being built + Dataset dataset = 1; +} + +// CancelGravityTaskRequest is the request message for cancelling a gravity task +message CancelGravityTaskRequest { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; +} + +// CancelGravityTaskResponse is the response message for cancelling a gravity +// task +message CancelGravityTaskResponse { + // message: the message of the cancellation of the gravity task (currently + // hardcoded to "success") + string message = 1; +} + +// CancelDatasetRequest is the request message for cancelling a dataset build +message CancelDatasetRequest { + // dataset_id: the ID of the dataset + string dataset_id = 1; +} + +// CancelDatasetResponse is the response message for cancelling a dataset build +message CancelDatasetResponse { + // message: the message of the cancellation of the dataset build (currently + // hardcoded to "success") + string message = 1; +} + +// DatasetBillingCorrectionRequest is the request message for refunding a user +message DatasetBillingCorrectionRequest { + // requested_row_count: number of rows expected by the user + int64 requested_row_count = 1; + // actual_row_count: number of rows returned by gravity + int64 actual_row_count = 2; +} + +// DatasetBillingCorrectionResponse is the response message for refunding a user +message DatasetBillingCorrectionResponse { + // refund_amount + double refund_amount = 1; +} + +// GetMarketplaceDatasetsResponse returns the dataset metadata to be used in +// Marketplace +message GetMarketplaceDatasetsResponse { + // datasets: list of marketplace datasets + repeated GravityMarketplaceTaskState datasets = 1; +} + +// GetGravityTaskDatasetFilesRequest is the request message for getting dataset +// files for a gravity task +message GetGravityTaskDatasetFilesRequest { + // gravity_task_id: the ID of the gravity task (required) + string gravity_task_id = 1; +} + +// CrawlerDatasetFiles contains dataset files for a specific crawler +message CrawlerDatasetFiles { + // crawler_id: the ID of the crawler + string crawler_id = 1; + // dataset_files: the dataset files associated with this crawler + repeated DatasetFileWithId dataset_files = 2; +} + +// CrawlerRawMinerFiles contains raw miner files for a specific crawler +message CrawlerRawMinerFilesResponse { + // crawler_id: the ID of the crawler + string crawler_id = 1; + // s3_paths: the S3 paths associated with this crawler + repeated string s3_paths = 2; + // file_size_bytes: the sizes of the raw miner files collected + repeated int64 file_size_bytes = 3; +} + +// DatasetFileWithId extends DatasetFile to include the dataset ID +message DatasetFileWithId { + // dataset_id: the ID of the dataset this file belongs to + string dataset_id = 1; + // file_name: the name of the file + string file_name = 2; + // file_size_bytes: the size of the file in bytes + uint64 file_size_bytes = 3; + // last_modified: the date the file was last modified + google.protobuf.Timestamp last_modified = 4; + // num_rows: the number of rows in the file + uint64 num_rows = 5; + // s3_key: the key of the file in S3 (internal use only) + string s3_key = 6; + // url: the URL of the file (public use) + string url = 7; + // nebula_url: the url of a nebula + string nebula_url = 8; +} + +// GetGravityTaskDatasetFilesResponse is the response message for getting +// dataset files for a gravity task +message GetGravityTaskDatasetFilesResponse { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; + // crawler_dataset_files: dataset files grouped by crawler + repeated CrawlerDatasetFiles crawler_dataset_files = 2; +} + +// GetCrawlerHistoryRequest is the request message for getting crawler history +// associated to the provided gravity_task_id +message GetCrawlerHistoryRequest { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; +} + +// CrawlerHistoryEntry represents a single history entry for a crawler +message CrawlerHistoryEntry { + // ingest_dt: the timestamp when this entry was ingested + google.protobuf.Timestamp ingest_dt = 1; + // records_collected: the number of records collected + int64 records_collected = 2; + // bytes_collected: the number of bytes collected + int64 bytes_collected = 3; +} + +// CrawlerCriteriaAndHistory represents crawler information with criteria and +// history +message CrawlerCriteriaAndHistory { + // crawler_id: the ID of the crawler + string crawler_id = 1; + // platform: the platform from gravity_crawler_criteria + string platform = 2; + // topic: the topic from gravity_crawler_criteria + optional string topic = 3; + // keyword: the keyword from gravity_crawler_criteria + optional string keyword = 4; + // post_start_date: the start date for posts from gravity_crawler_criteria + optional google.protobuf.Timestamp post_start_date = 5; + // post_end_date: the end date for posts from gravity_crawler_criteria + optional google.protobuf.Timestamp post_end_date = 6; + // crawler_history: the history entries for this crawler + repeated CrawlerHistoryEntry crawler_history = 7; +} + +// GetCrawlerHistoryResponse is the response message for getting crawler history +message GetCrawlerHistoryResponse { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; + // crawlers: the crawlers with their criteria and history + repeated CrawlerCriteriaAndHistory crawlers = 2; +} + +// GetMarketplaceCrawlerDataForDDSubmissionRequest is the request message for getting crawler data for the marketplace user +message GetMarketplaceCrawlerDataForDDSubmissionRequest { + // marketplace_user_id: the ID of the marketplace user (required) + string marketplace_user_id = 1; +} + +// GetMarketplaceCrawlerDataForDDSubmissionResponse is the response message for marketplace crawler data +message GetMarketplaceCrawlerDataForDDSubmissionResponse { + // crawlers: list of marketplace crawler data for DD submission + repeated MarketplaceCrawlerDataForDDSubmission crawlers = 1; +} + +// MarketplaceCrawlerDataForDDSubmission contains crawler information for DD submission with all fields needed for UpsertDynamicDesirabilityEntry +message MarketplaceCrawlerDataForDDSubmission { + string crawler_id = 1; + string platform = 2; + optional string topic = 3; + optional string keyword = 4; + optional string post_start_datetime = 5; + optional string post_end_datetime = 6; + // Additional fields needed for UpsertDynamicDesirabilityEntry + google.protobuf.Timestamp start_time = 7; + google.protobuf.Timestamp deregistration_time = 8; + google.protobuf.Timestamp archive_time = 9; + string status = 10; + uint64 bytes_collected = 11; + uint64 records_collected = 12; + string notification_to = 13; + string notification_link = 14; + string user_id = 15; +} + +// GetActiveUserTasksResponse is the response message for active user tasks +message GetActiveUserTasksResponse { + // active_user_tasks: list of active user tasks + repeated ActiveUserTask active_user_tasks = 1; +} + +// ActiveUserCrawler contains active user crawler information +message ActiveUserCrawler { + // crawler_id: the id of the crawler + string crawler_id = 1; + // row_count: the number of rows collected by the crawler + uint64 row_count = 2; +} + +// ActiveUserTask contains active user task information +message ActiveUserTask { + // gravity_task_id: the id of the gravity_task + string gravity_task_id = 1; + // crawlers: list of active user crawlers + repeated ActiveUserCrawler crawlers = 2; +} + +// UpsertPreBuiltUserDatasetsRequest is the request message for upserting pre-built user datasets +message UpsertPreBuiltUserDatasetsRequest { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; + // crawler_id: the ID of the crawler + string crawler_id = 2; + // row_count: the number of rows in the pre-built dataset + int64 row_count = 3; +} + +// GetPreBuiltUserDatasetsRequest is the request message for getting pre-built user datasets +message GetPreBuiltUserDatasetsRequest { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; +} + +// PreBuiltUserDataset represents a single pre-built user dataset record +message PreBuiltUserDataset { + // gravity_task_id: the ID of the gravity task + string gravity_task_id = 1; + // crawler_id: the ID of the crawler + string crawler_id = 2; + // row_count: the number of rows in the pre-built dataset + int64 row_count = 3; +} + +// GetPreBuiltUserDatasetsResponse is the response message for getting pre-built user datasets +message GetPreBuiltUserDatasetsResponse { + // datasets: list of pre-built user datasets for the gravity task + repeated PreBuiltUserDataset datasets = 1; +} + + diff --git a/proto/sn13/v1/sn13_validator.proto b/proto/sn13/v1/sn13_validator.proto new file mode 100644 index 0000000..b226a6e --- /dev/null +++ b/proto/sn13/v1/sn13_validator.proto @@ -0,0 +1,89 @@ +syntax = "proto3"; + +package sn13.v1; +import "google/protobuf/struct.proto"; + +option go_package = "macrocosm-os/rift/constellation_api/gen/sn13/v1"; + + +service Sn13Service { + // ListTopics is the RPC method for getting the top topics + rpc ListTopics(ListTopicsRequest) returns (ListTopicsResponse); + rpc ValidateRedditTopic(ValidateRedditTopicRequest) returns (ValidateRedditTopicResponse); + + // Access the SN13 API endpoint on_demand_data_request via Constellation + rpc OnDemandData(OnDemandDataRequest) returns (OnDemandDataResponse); +} + +// ListTopicsRequest is the request message for getting the top topics +message ListTopicsRequest { + // source: the source to validate + string source = 1; +} + +// ListTopicsResponseDetail is the response message for getting the top topics +message ListTopicsResponseDetail { + // label_value: reddit or x topic + string label_value = 1; + // content_size_bytes: content size in bytes + uint64 content_size_bytes = 2; + // adj_content_size_bytes: adjacent content size in bytes + uint64 adj_content_size_bytes = 3; +} + +// ListTopicsResponse is a list of ListTopicsResponseDetail(s) with top topics +message ListTopicsResponse { + // message: the response message + repeated ListTopicsResponseDetail details = 1; +} + +// ValidateTopicRequest is the request message for validating a reddit topic +message ValidateRedditTopicRequest { + // topic: the topic to validate + string topic = 1; +} + +// ValidateTopicResponse is the response message for validating a topic +message ValidateRedditTopicResponse { + // platform: i.e. reddit + string platform = 1; + // topic: the topic to validate + string topic = 2; + // exists: whether the topic exists + bool exists = 3; + // over18: whether the topic is NSFW + bool over18 = 4; + // quarantine: whether the topic is quarantined + bool quarantine = 5; +} + +// OnDemandDataRequest is a request to SN13 to retrieve data +message OnDemandDataRequest { + // source: the data source (X, Reddit or Youtube) + string source = 1; + // usernames: list of usernames to fetch data from + repeated string usernames = 2; + // keywords: list of keywords to search for + repeated string keywords = 3; + // start_date: ISO 8601 formatted date string (e.g. "2024-01-01T00:00:00Z") + optional string start_date = 4; + // end_date: ISO 8601 formatted date string (e.g. "2024-01-31T23:59:59Z") + optional string end_date = 5; + // limit: maximum number of results to return + optional int64 limit = 6; + // keyword_mode: defines how keywords should be used in selecting response posts (optional): + // "all" (posts must include all keywords) or "any" (posts can include any combination of keywords) + optional string keyword_mode = 7; + // url: single URL for URL search mode (X) + optional string url = 8; +} + +// OnDemandDataResponse is the response from SN13 for an on-demand data request +message OnDemandDataResponse { + // status: the request status, either success/error + string status = 1; + // data: the data object returned + repeated google.protobuf.Struct data = 2; + // meta: additional metadata about the request + google.protobuf.Struct meta = 3; +} diff --git a/src/api/client.rs b/src/api/client.rs index 673fe16..4778904 100644 --- a/src/api/client.rs +++ b/src/api/client.rs @@ -1,39 +1,156 @@ -use anyhow::{bail, Context, Result}; -use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_TYPE}; +//! Native gRPC client for SN13 and Gravity APIs. + +use anyhow::{Context, Result}; use std::time::Duration; +use tonic::metadata::MetadataValue; +use tonic::transport::{Channel, ClientTlsConfig}; use super::types::*; pub const DEFAULT_BASE_URL: &str = "https://constellation.api.cloud.macrocosmos.ai"; - const SN13_SERVICE: &str = "sn13.v1.Sn13Service"; const GRAVITY_SERVICE: &str = "gravity.v1.GravityService"; +const CLIENT_ID: &str = "dataverse-rust-cli"; + +// ─── Generated protobuf modules ───────────────────────────────────── + +pub mod sn13_proto { + tonic::include_proto!("sn13.v1"); +} + +// ─── Struct → JSON conversion ─────────────────────────────────────── + +fn struct_to_json(s: prost_types::Struct) -> serde_json::Value { + let map: serde_json::Map = s + .fields + .into_iter() + .map(|(k, v)| (k, prost_value_to_json(v))) + .collect(); + serde_json::Value::Object(map) +} + +fn prost_value_to_json(v: prost_types::Value) -> serde_json::Value { + match v.kind { + Some(prost_types::value::Kind::NullValue(_)) => serde_json::Value::Null, + Some(prost_types::value::Kind::NumberValue(n)) => { + if n == (n as i64) as f64 && n.is_finite() { + serde_json::Value::Number(serde_json::Number::from(n as i64)) + } else { + serde_json::Number::from_f64(n) + .map(serde_json::Value::Number) + .unwrap_or(serde_json::Value::Null) + } + } + Some(prost_types::value::Kind::StringValue(s)) => serde_json::Value::String(s), + Some(prost_types::value::Kind::BoolValue(b)) => serde_json::Value::Bool(b), + Some(prost_types::value::Kind::StructValue(s)) => struct_to_json(s), + Some(prost_types::value::Kind::ListValue(l)) => { + serde_json::Value::Array(l.values.into_iter().map(prost_value_to_json).collect()) + } + None => serde_json::Value::Null, + } +} + + +// ─── Auth interceptor ─────────────────────────────────────────────── + +#[derive(Clone)] +struct AuthInterceptor { + auth_header: MetadataValue, + client_id: MetadataValue, +} + +impl AuthInterceptor { + fn new(api_key: &str) -> Result { + let auth_header = format!("Bearer {api_key}") + .parse() + .map_err(|_| anyhow::anyhow!("invalid API key for header"))?; + let client_id = CLIENT_ID + .parse() + .map_err(|_| anyhow::anyhow!("invalid client ID"))?; + Ok(Self { auth_header, client_id }) + } +} + +impl tonic::service::Interceptor for AuthInterceptor { + fn call( + &mut self, + mut req: tonic::Request<()>, + ) -> std::result::Result, tonic::Status> { + req.metadata_mut().insert("authorization", self.auth_header.clone()); + req.metadata_mut().insert("x-client-id", self.client_id.clone()); + Ok(req) + } +} + +// ─── Error mapping ────────────────────────────────────────────────── + +fn map_grpc_error(status: tonic::Status) -> anyhow::Error { + match status.code() { + tonic::Code::Unauthenticated => { + anyhow::anyhow!( + "authentication failed: check your API key. {}", + status.message() + ) + } + tonic::Code::Unavailable => { + anyhow::anyhow!( + "service temporarily unavailable: {}\n Tip: the SN13 miner network may be busy. Retry in a few seconds.", + status.message() + ) + } + tonic::Code::Internal => { + anyhow::anyhow!( + "service temporarily unavailable (internal): {}\n Tip: the SN13 miner network may be busy. Retry in a few seconds.", + status.message() + ) + } + _ => anyhow::anyhow!("gRPC error ({}): {}", status.code(), status.message()), + } +} + +// ─── Type aliases for intercepted clients ─────────────────────────── + +type InterceptedChannel = + tonic::service::interceptor::InterceptedService; + +// ─── ApiClient ────────────────────────────────────────────────────── pub struct ApiClient { + sn13: sn13_proto::sn13_service_client::Sn13ServiceClient, http: reqwest::Client, - base_url: String, api_key: String, + base_url: String, } impl ApiClient { pub fn new(api_key: String, base_url: Option, timeout_secs: u64) -> Result { - let mut headers = HeaderMap::new(); - headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); - headers.insert( - "x-client-id", - HeaderValue::from_static("dataverse-rust-cli"), - ); - headers.insert( - reqwest::header::USER_AGENT, - HeaderValue::from_str(&format!("dataverse-cli/{}", env!("CARGO_PKG_VERSION"))) - .expect("valid header value"), + let url = base_url.unwrap_or_else(|| DEFAULT_BASE_URL.to_string()); + + // SN13: native gRPC (reliable, bypasses ALB transcoding) + let tls = ClientTlsConfig::new().with_native_roots(); + let channel = Channel::from_shared(url.clone()) + .context("invalid endpoint URL")? + .tls_config(tls) + .context("TLS config failed")? + .timeout(Duration::from_secs(timeout_secs)) + .connect_lazy(); + + let interceptor = AuthInterceptor::new(&api_key)?; + let sn13 = sn13_proto::sn13_service_client::Sn13ServiceClient::with_interceptor( + channel, + interceptor, ); + + // Gravity: HTTP/JSON (their gRPC endpoint is broken — sends JSON over binary channel) + let mut headers = reqwest::header::HeaderMap::new(); + headers.insert(reqwest::header::CONTENT_TYPE, reqwest::header::HeaderValue::from_static("application/json")); + headers.insert("x-client-id", reqwest::header::HeaderValue::from_static(CLIENT_ID)); headers.insert( - AUTHORIZATION, - HeaderValue::from_str(&format!("Bearer {api_key}")) + reqwest::header::AUTHORIZATION, + reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}")) .context("invalid API key for header")?, ); - let http = reqwest::Client::builder() .default_headers(headers) .timeout(Duration::from_secs(timeout_secs)) @@ -41,17 +158,43 @@ impl ApiClient { .context("failed to build HTTP client")?; Ok(Self { + sn13, http, - base_url: base_url.unwrap_or_else(|| DEFAULT_BASE_URL.to_string()), api_key, + base_url: url, }) } - fn url(&self, service: &str, method: &str) -> String { - format!("{}/{}/{}", self.base_url, service, method) + /// HTTP POST for Gravity endpoints (their gRPC is broken server-side). + async fn gravity_post( + &self, + method: &str, + body: &impl serde::Serialize, + ) -> Result { + let url = format!("{}/{}/{}", self.base_url, GRAVITY_SERVICE, method); + let resp = self.http.post(&url).json(body).send().await + .with_context(|| format!("request to {url} failed"))?; + + let status = resp.status(); + if !status.is_success() { + let body_text = resp.text().await.unwrap_or_default(); + match status.as_u16() { + 401 => anyhow::bail!("authentication failed: check your API key. {body_text}"), + 500 | 502 | 503 | 504 => { + let msg = if body_text.is_empty() { "server error".to_string() } else { body_text }; + anyhow::bail!("service temporarily unavailable ({status}): {msg}\n Tip: the SN13 miner network may be busy. Retry in a few seconds."); + } + _ => anyhow::bail!("API error {status}: {body_text}"), + } + } + + resp.json::().await + .with_context(|| format!("failed to parse response from {url}")) } - pub fn dry_run( + // ─── Dry-run helpers ───────────────────────────────────────── + + fn dry_run( &self, service: &str, method: &str, @@ -62,70 +205,62 @@ impl ApiClient { "authorization".to_string(), format!("Bearer {}", crate::config::Config::mask_key(&self.api_key)), ); - headers.insert("content-type".to_string(), "application/json".to_string()); - headers.insert("x-client-id".to_string(), "dataverse-rust-cli".to_string()); + headers.insert( + "content-type".to_string(), + "application/grpc".to_string(), + ); + headers.insert("x-client-id".to_string(), CLIENT_ID.to_string()); DryRunOutput { - method: "POST".to_string(), - url: self.url(service, method), + method: "gRPC".to_string(), + url: format!("{}/{}/{}", self.base_url, service, method), headers, body: body.clone(), } } - async fn post( + // ─── SN13 ──────────────────────────────────────────────────── + + pub async fn on_demand_data( &self, - service: &str, - method: &str, - body: &impl serde::Serialize, - ) -> Result { - let url = self.url(service, method); - let resp = self - .http - .post(&url) - .json(body) - .send() + req: &OnDemandDataRequest, + ) -> Result { + let grpc_req = sn13_proto::OnDemandDataRequest { + source: req.source.clone(), + usernames: req.usernames.clone(), + keywords: req.keywords.clone(), + start_date: req.start_date.clone(), + end_date: req.end_date.clone(), + limit: req.limit, + keyword_mode: req.keyword_mode.clone(), + url: req.url.clone(), + }; + + let response = self + .sn13 + .clone() + .on_demand_data(tonic::Request::new(grpc_req)) .await - .with_context(|| format!("request to {url} failed"))?; + .map_err(map_grpc_error)?; - let status = resp.status(); - if !status.is_success() { - let body_text = resp.text().await.unwrap_or_default(); - match status.as_u16() { - 401 => bail!("authentication failed: check your API key. {body_text}"), - 464 => { - // Macrocosmos custom status: upstream SN13 miner network issue - let detail = if body_text.is_empty() { - "SN13 miner network temporarily unavailable. Try again in a few seconds.".to_string() - } else { - body_text - }; - bail!("service unavailable (464): {detail}"); - } - 500 | 502 | 503 | 504 => { - let msg = if body_text.is_empty() { - "server error".to_string() - } else { - body_text - }; - bail!("service temporarily unavailable ({status}): {msg}\n Tip: the SN13 miner network may be busy. Retry in a few seconds."); - } - _ => bail!("API error {status}: {body_text}"), - } - } + let inner = response.into_inner(); - resp.json::() - .await - .with_context(|| format!("failed to parse response from {url}")) - } + let data: Vec = inner.data.into_iter().map(struct_to_json).collect(); + let meta = inner.meta.map(struct_to_json); - // ─── SN13 ─────────────────────────────────────────────────────── + // The gRPC server may return an empty status string on success; + // normalize to "success" so downstream checks work. + let status = if inner.status.is_empty() { + "success".to_string() + } else { + inner.status + }; - pub async fn on_demand_data( - &self, - req: &OnDemandDataRequest, - ) -> Result { - self.post(SN13_SERVICE, "OnDemandData", req).await + Ok(OnDemandDataResponse { + status: Some(status), + data: Some(data), + meta, + }) } pub fn on_demand_data_dry_run(&self, req: &OnDemandDataRequest) -> Result { @@ -133,43 +268,31 @@ impl ApiClient { Ok(self.dry_run(SN13_SERVICE, "OnDemandData", &body)) } - // ─── Gravity ──────────────────────────────────────────────────── + // ─── Gravity (HTTP/JSON — their gRPC endpoint is broken) ──── pub async fn create_gravity_task( &self, req: &CreateGravityTaskRequest, ) -> Result { - self.post(GRAVITY_SERVICE, "CreateGravityTask", req).await + self.gravity_post("CreateGravityTask", req).await } - pub fn create_gravity_task_dry_run( - &self, - req: &CreateGravityTaskRequest, - ) -> Result { + pub fn create_gravity_task_dry_run(&self, req: &CreateGravityTaskRequest) -> Result { let body = serde_json::to_value(req)?; Ok(self.dry_run(GRAVITY_SERVICE, "CreateGravityTask", &body)) } - pub async fn get_gravity_tasks( - &self, - req: &GetGravityTasksRequest, - ) -> Result { - self.post(GRAVITY_SERVICE, "GetGravityTasks", req).await + pub async fn get_gravity_tasks(&self, req: &GetGravityTasksRequest) -> Result { + self.gravity_post("GetGravityTasks", req).await } - pub fn get_gravity_tasks_dry_run( - &self, - req: &GetGravityTasksRequest, - ) -> Result { + pub fn get_gravity_tasks_dry_run(&self, req: &GetGravityTasksRequest) -> Result { let body = serde_json::to_value(req)?; Ok(self.dry_run(GRAVITY_SERVICE, "GetGravityTasks", &body)) } - pub async fn build_dataset( - &self, - req: &BuildDatasetRequest, - ) -> Result { - self.post(GRAVITY_SERVICE, "BuildDataset", req).await + pub async fn build_dataset(&self, req: &BuildDatasetRequest) -> Result { + self.gravity_post("BuildDataset", req).await } pub fn build_dataset_dry_run(&self, req: &BuildDatasetRequest) -> Result { @@ -178,7 +301,7 @@ impl ApiClient { } pub async fn get_dataset(&self, req: &GetDatasetRequest) -> Result { - self.post(GRAVITY_SERVICE, "GetDataset", req).await + self.gravity_post("GetDataset", req).await } pub fn get_dataset_dry_run(&self, req: &GetDatasetRequest) -> Result { @@ -187,18 +310,13 @@ impl ApiClient { } pub async fn cancel_gravity_task(&self, task_id: &str) -> Result { - let req = CancelRequest { - gravity_task_id: Some(task_id.to_string()), - dataset_id: None, - }; - self.post(GRAVITY_SERVICE, "CancelGravityTask", &req).await + let req = CancelRequest { gravity_task_id: Some(task_id.to_string()), dataset_id: None }; + self.gravity_post("CancelGravityTask", &req).await } pub async fn cancel_dataset(&self, dataset_id: &str) -> Result { - let req = CancelRequest { - gravity_task_id: None, - dataset_id: Some(dataset_id.to_string()), - }; - self.post(GRAVITY_SERVICE, "CancelDataset", &req).await + let req = CancelRequest { gravity_task_id: None, dataset_id: Some(dataset_id.to_string()) }; + self.gravity_post("CancelDataset", &req).await } } + diff --git a/src/display/mod.rs b/src/display/mod.rs index 6d0a7a5..21524ce 100644 --- a/src/display/mod.rs +++ b/src/display/mod.rs @@ -53,7 +53,7 @@ fn truncate(s: &str, max: usize) -> String { if chars.next().is_some() { format!("{collected}...") } else { - s.to_string() + collected } } @@ -104,62 +104,46 @@ fn is_reddit(post: &serde_json::Value) -> bool { ) } -/// Extract the display text for a post, handling both X (text) and Reddit (title + body). -fn post_text(post: &serde_json::Value) -> String { - if is_reddit(post) { - let title = extract_str(post, "title"); - let body = extract_str(post, "body"); - if title != "-" && body != "-" && !body.is_empty() { - format!("{title} | {body}") - } else if title != "-" { - title - } else { - body - } - } else { - extract_str(post, "text") - } -} - -/// Extract author for a post — Reddit uses top-level `username`, X uses `user.username`. -fn post_author(post: &serde_json::Value) -> String { - if is_reddit(post) { - extract_str(post, "username") - } else { - extract_str(post, "user.username") - } -} - -/// Extract engagement metrics based on source. -fn post_likes(post: &serde_json::Value) -> String { - if is_reddit(post) { - extract_num(post, "score") - } else { - extract_num(post, "tweet.like_count") - } -} - -fn post_reposts(post: &serde_json::Value) -> String { - if is_reddit(post) { - "-".to_string() - } else { - extract_num(post, "tweet.retweet_count") - } -} - -fn post_replies(post: &serde_json::Value) -> String { - if is_reddit(post) { - extract_num(post, "num_comments") - } else { - extract_num(post, "tweet.reply_count") - } +/// Extract all display fields for a post, checking source once. +struct PostFields { + text: String, + author: String, + likes: String, + reposts: String, + replies: String, + views: String, } -fn post_views(post: &serde_json::Value) -> String { - if is_reddit(post) { - "-".to_string() - } else { - extract_num(post, "tweet.view_count") +impl PostFields { + fn extract(post: &serde_json::Value) -> Self { + if is_reddit(post) { + let title = extract_str(post, "title"); + let body = extract_str(post, "body"); + let text = if title != "-" && body != "-" && !body.is_empty() { + format!("{title} | {body}") + } else if title != "-" { + title + } else { + body + }; + Self { + text, + author: extract_str(post, "username"), + likes: extract_num(post, "score"), + reposts: "-".to_string(), + replies: extract_num(post, "num_comments"), + views: "-".to_string(), + } + } else { + Self { + text: extract_str(post, "text"), + author: extract_str(post, "user.username"), + likes: extract_num(post, "tweet.like_count"), + reposts: extract_num(post, "tweet.retweet_count"), + replies: extract_num(post, "tweet.reply_count"), + views: extract_num(post, "tweet.view_count"), + } + } } } @@ -217,7 +201,8 @@ pub fn print_posts(data: &[serde_json::Value], format: OutputFormat) -> Result<( let rows: Vec = data .iter() .map(|post| { - let raw_text = post_text(post) + let fields = PostFields::extract(post); + let raw_text = fields.text .replace('\n', " ") .replace('\r', ""); PostRow { @@ -225,12 +210,12 @@ pub fn print_posts(data: &[serde_json::Value], format: OutputFormat) -> Result<( .chars() .take(16) .collect(), - author: truncate(&post_author(post), 20), + author: truncate(&fields.author, 20), text: truncate(&raw_text, 60), - likes: post_likes(post), - reposts: post_reposts(post), - replies: post_replies(post), - views: post_views(post), + likes: fields.likes, + reposts: fields.reposts, + replies: fields.replies, + views: fields.views, } }) .collect();