From b847b2772ffa59192ba64bc099b9b590610d1e1e Mon Sep 17 00:00:00 2001 From: Abhijit Pujare Date: Thu, 27 Mar 2025 09:56:28 -0700 Subject: [PATCH 1/5] Adding ingestion script and qna script for haystack --- Earnings_Call_Ingestion_Script.ipynb | 8224 ++++++++++++++++++++++++++ QuestionAnsweringNotebook.ipynb | 1005 ++++ sycamore | 2 +- uv.lock | 128 +- 4 files changed, 9310 insertions(+), 49 deletions(-) create mode 100644 Earnings_Call_Ingestion_Script.ipynb create mode 100644 QuestionAnsweringNotebook.ipynb diff --git a/Earnings_Call_Ingestion_Script.ipynb b/Earnings_Call_Ingestion_Script.ipynb new file mode 100644 index 0000000..f1e0065 --- /dev/null +++ b/Earnings_Call_Ingestion_Script.ipynb @@ -0,0 +1,8224 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "8c351421-90b1-421a-8ba3-7abe33c7ce7a", + "metadata": {}, + "outputs": [], + "source": [ + "import sycamore\n", + "import os\n", + "from sycamore.transforms.partition import ArynPartitioner\n", + "from sycamore.transforms.extract_schema import (\n", + " OpenAIPropertyExtractor,\n", + ")\n", + "from sycamore.llms import OpenAI, OpenAIModels\n", + "from sycamore.data.element import Element\n", + "from sycamore.data.document import Document\n", + "from sycamore.functions import HuggingFaceTokenizer, OpenAITokenizer\n", + "from sycamore.llms import OpenAI, OpenAIModels\n", + "from sycamore.transforms.embed import SentenceTransformerEmbedder" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "558b6660-c646-47b7-9f0a-08ab39c42606", + "metadata": {}, + "outputs": [], + "source": [ + "context = sycamore.init()\n", + "# local file path to the SortBenchmark dataset\n", + "paths = \"\"\n", + "initial_docset = context.read.binary(paths, binary_format=\"pdf\")\n", + "#initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai\", docset_id=\"aryn:f-trcw7rui6kg2t9os03owzjf\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "101c2417-2e9e-4e32-ad77-31f88c287b21", + "metadata": {}, + "outputs": [], + "source": [ + "## set your keys here" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b497902d-6f93-40f1-b4e6-b24ef7d096d0", + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = OpenAITokenizer(OpenAIModels.GPT_4O.value.name)\n", + "llm = OpenAI(OpenAIModels.GPT_4O.value)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4de771fb-da21-4855-beba-a4a5d49d338c", + "metadata": {}, + "outputs": [], + "source": [ + "schema_json = {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"company_name\": {\"type\": \"string\", \"description\": \"name of the company\"},\n", + " \"company_ticker\": {\"type\": \"string\", \"description\": \"Ticker of the company\"},\n", + " \"quarter\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Quarter of the earnings call, it should be in the format of Q1, Q2, Q3, Q4\",\n", + " },\n", + " \"date\":{\"type\": \"string\", \"description\": \"The date of the earnings call\"}\n", + " },\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8b424f07-750b-488d-91cf-c2c23186ed00", + "metadata": {}, + "outputs": [], + "source": [ + "def add_property_to_schema(doc: Document) -> Document:\n", + " schema_json = {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"company_name\": {\"type\": \"string\", \"description\": \"name of the company\"},\n", + " \"company_ticker\": {\"type\": \"string\", \"description\": \"Ticker of the company\"},\n", + " \"quarter\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"Quarter of the earnings call, it should be in the format of Q1, Q2, Q3, Q4\",\n", + " },\n", + " \"date\":{\"type\": \"string\", \"description\": \"The date of the earnings call\"}\n", + " },\n", + " }\n", + "\n", + " doc.properties.update({\"_schema\": schema_json, \"_schema_class\": \"earnings_call\"})\n", + " return doc" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "533f2ff7-0a6e-4032-91c0-7aa6b35bfc34", + "metadata": {}, + "outputs": [], + "source": [ + "from sycamore import MaterializeSourceMode\n", + "partitioned_docset = (initial_docset.partition(partitioner=ArynPartitioner())\n", + " .materialize(path=\"/Users/abhijitpujare/workspace/haystack-workshop-2025/materialize/partitioned_docset\", source_mode=MaterializeSourceMode.USE_STORED)\n", + " .split_elements(tokenizer=tokenizer, max_tokens=512)\n", + " .extract_properties(property_extractor=OpenAIPropertyExtractor(llm=llm, schema=schema_json, schema_name=\"earnings_calls\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "cf5413c3-cb14-4bf5-917d-125784096c26", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def filterInSpeakers(elem: Element) -> bool:\n", + " if elem.type == 'Image' or elem.type == 'table' or elem.type == 'Page-footer' or elem.type == 'Footnote':\n", + " return False\n", + " return True\n", + "\n", + "def removeOriginalElements(doc: Document) -> Document:\n", + " if '_original_elements' in doc.properties:\n", + " del doc.properties['_original_elements']\n", + " return doc\n", + "\n", + "\n", + "filtered_Docset = partitioned_docset.filter_elements(filterInSpeakers)\n", + "docset_no_orig_elements = filtered_Docset.map(removeOriginalElements)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "27170a2a-47f1-4f19-93b0-1c314757e284", + "metadata": {}, + "outputs": [], + "source": [ + "## Replace [^ ]*[^\\S\\n\\t][^ ]*[^\\S\\n\\t]--[^\\S\\n\\t][^ ]*\n", + "import re\n", + "from sycamore.data import Document\n", + "def markSpeakers(doc:Element) -> Element:\n", + " if 'text_representation' not in doc:\n", + " return doc\n", + "\n", + " external_speaker = re.match('[^ ]*[^\\S\\n\\t][^ ]*[^\\S\\n\\t]--[^\\S\\n\\t].*--', doc.data['text_representation'])\n", + " internal_speaker = re.match('[^ ]*[^\\S\\n\\t][^ ]*[^\\S\\n\\t]--.*', doc.data['text_representation'])\n", + " doc_properties = doc.properties\n", + " if doc.text_representation == 'Operator':\n", + " doc_properties['speaker'] = True\n", + " doc_properties['speaker_role'] = 'Operator'\n", + " doc_properties['speaker_name'] = 'Operator'\n", + " elif external_speaker:\n", + " location = doc.text_representation.find('--')\n", + " location2 = location + doc.text_representation[location+2:].find('--')\n", + " doc_properties['speaker_name'] = doc.text_representation[:location].lstrip()\n", + " doc_properties['speaker_external_org'] = doc.text_representation[location+2:location2+1].lstrip()\n", + " doc_properties['speaker_role'] = doc.text_representation[location2+4:].lstrip()\n", + " doc_properties['speaker'] = True\n", + " elif internal_speaker:\n", + " location = doc.text_representation.find('--')\n", + " doc_properties['speaker_name'] = doc.text_representation[:location].lstrip()\n", + " doc_properties['speaker_role'] = doc.text_representation[location+2:].lstrip()\n", + " doc_properties['speaker'] = True\n", + "\n", + " return doc\n", + "\n", + "#filtered_Docset.map_elements(markSpeakers).show()\n", + "speakersMarkedDocSet = docset_no_orig_elements.map_elements(markSpeakers)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "71d5158d-9e31-432b-af55-c283905f3f8e", + "metadata": {}, + "outputs": [], + "source": [ + "def mergeDialogue(doc: Document) -> Document:\n", + " last_speaker_elem = None\n", + " curr_text_representation = ''\n", + " for elem in doc.elements:\n", + " \n", + " if 'speaker' in elem.properties and last_speaker_elem is not None:\n", + " last_speaker_elem.text_representation = curr_text_representation\n", + " last_speaker_elem = elem\n", + " curr_text_representation = ''\n", + " elif 'speaker' in elem.properties and elem.properties['speaker']:\n", + " last_speaker_elem = elem\n", + " curr_text_representation = ''\n", + " elif last_speaker_elem is not None and elem.text_representation:\n", + " curr_text_representation += elem.text_representation\n", + "\n", + " if last_speaker_elem is not None and 'text_representation' in last_speaker_elem.properties:\n", + " last_speaker_elem.text_representation = curr_text_representation\n", + " return doc\n", + "\n", + "\n", + "mergedDialogeSet = speakersMarkedDocSet.map(mergeDialogue)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2d7db173-2116-4d8f-b8ea-f266be975eff", + "metadata": {}, + "outputs": [], + "source": [ + "def filterOnlySpeakers(elem: Element):\n", + " return 'speaker' in elem.properties\n", + "\n", + "finalDocSet = mergedDialogeSet.filter_elements(filterOnlySpeakers)\n", + "#finalDocSet.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "9cec59d1-f457-4509-98f2-ac8b62cdf19c", + "metadata": {}, + "outputs": [], + "source": [ + "from sycamore.transforms.embed import OpenAIEmbedder\n", + "model_name = \"text-embedding-3-small\"\n", + "from aryn_sdk.client.client import Client \n", + "myClient = Client(aryn_url=\"https://test-api.aryn.ai\", aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")\n", + "docset = myClient.create_docset(name=\"haystack_workshop_target_correct\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2871d26-680a-47c2-b6b1-1ccaafa1f0e2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/ty/flsmmnn17p314__gg336qjy00000gn/T/ipykernel_77841/3213490678.py:1: FutureWarning: Class aryn is experimental and may change in the future.\n", + " finalDocSet.embed(embedder=OpenAIEmbedder(model_name=model_name)).write.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=docset.value.docset_id, aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")\n", + "/Users/abhijitpujare/workspace/haystack-workshop-2025/.venv/lib/python3.11/site-packages/sycamore/writer.py:861: FutureWarning: Class ArynWriter is experimental and may change in the future.\n", + " writer: Node = ArynWriter(self.plan, client_params=client_params, target_params=target_params, **kwargs)\n", + "/Users/abhijitpujare/workspace/haystack-workshop-2025/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2025-03-26 16:46:26,934\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2025-03-26 16:46:27,907\tINFO worker.py:1832 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n", + "2025-03-26 16:46:28,630\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2025-03-26 16:46:30,038\tWARNING util.py:576 -- The argument ``compute`` is deprecated in Ray 2.9. Please specify argument ``concurrency`` instead. For more information, see https://docs.ray.io/en/master/data/transforming-data.html#stateful-transforms.\n", + "(pid=77873)2025-03-26 16:46:30,046\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-46-26_951347_77841/logs/ray-data\n", + "2025-03-26 16:46:30,046\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> ActorPoolMapOperator[ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)] -> TaskPoolMapOperator[MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt)]\n", + " INFO:root:Spurious log 1: Verifying that log messages are propagated\n", + "Running 0: 0.00 row [00:00, ? row/s]\n", + "- ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap) 1: 0.00 row [00:00, ? row/s]2025-03-26 16:46:31,361\tWARNING progress_bar.py:120 -- Truncating long operator name to 100 characters. To disable this behavior, set `ray.data.DataContext.get_current().DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = False`.\n", + "\n", + "\n", + " 0 row [00:00, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 0.0B/1.0GB object store: : 0.00 row [00:01, ? row/s]\n", + "- ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap) 1: 0.00 row [00:00, ? row/s]\n", + "\n", + "- MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->...->MapBatches(_write_docs_tt) 2: 0.00 row [00:00, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-4dvkd9uyoo8krhhs5bmkndg\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-4dvkd9uyoo8krhhs5bmkndg\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:02, ? row/s]\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.75: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.75: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.75: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.75: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.85: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.85: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.18: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.18: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.31: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.31: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.57: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.57: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:02, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.76: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.76: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.92: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.92: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:03, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.14: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.14: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.29: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.29: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.46: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.46: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.67: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:03, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.67: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.99: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.99: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.00: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.00: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s](pid=77876)\n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s] object store: : 0.00 row [00:04, ? row/s] [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.15: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.15: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.67: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.67: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.80: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:04, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.80: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:04, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.35: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.35: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.37: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.37: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.51: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:05, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.51: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:05, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.23: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.23: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.34: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.34: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.80: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.80: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.88: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:06, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.88: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:06, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:07, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:07, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:07, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:07, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-aywcdsx681bfya8pwtutmww\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-aywcdsx681bfya8pwtutmww\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:08, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.94: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.94: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.16: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.16: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.29: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.29: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.55: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.55: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.73: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.73: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.91: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:09, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.91: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:09, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.08: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.08: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.24: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.24: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.42: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.42: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.59: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.59: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.76: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.76: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.93: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.93: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:10, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.10: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.10: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:11, ? row/s]\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.38: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.45: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.45: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.63: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.63: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.76: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.76: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:11, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.34: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:12, ? row/s]\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.96: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.96: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.04: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + " object store: : 0.00 row [00:12, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.04: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:12, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 0%| | 0.00/177 [00:13...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 2%|▉ | 3.00/177 [00:13<13:19, 4.59s/ row]\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 554.0KB/1.0GB object store: : 0.00 row [00:15, ? row/s]ks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 2%|▉ | 3.00/177 [00:13<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:13, ? row/s]\n", + "\n", + " 0MB object store: : 0.00 row [00:13, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 554.0KB/1.0GB object store: : 0.00 row [00:15, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 2%|▉ | 3.00/177 [00:14<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:14, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:14<13:19, 4.59s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:14<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:14, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-u9hgsqsldoi0j5e71uw3lw1\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-u9hgsqsldoi0j5e71uw3lw1\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.65: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.65: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.68: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.68: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.86: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.86: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(pid=77877) INFO:root:Spurious log 1: Verifying that log messages are propagated \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:15, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:15, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.18: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.18: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.32: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.32: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.55: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.55: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.79: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.79: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.99: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.99: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:16, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:16, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.17: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.17: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.33: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.33: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.62: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.62: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.69: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.69: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.87: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:17, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.87: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:17, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.05: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.05: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.23: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.23: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.41: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.41: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.59: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.59: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.96: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:18, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.96: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:18, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.12: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.12: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.34: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.34: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.64: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.64: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.78: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.78: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:19, ? row/s]\n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\" [repeated 2x across cluster]\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:19, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:20<13:19, 4.59s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:21, ? row/s]0MB object store: : 0.00 row [00:20, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:21, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.12: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:21, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.12: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:21, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.16: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:21, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.16: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:21, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:22<13:19, 4.59s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:23, ? row/s]0MB object store: : 0.00 row [00:22, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:24, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:24, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-o4kfhvxsic918s6dmk0rkye\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-o4kfhvxsic918s6dmk0rkye\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:23, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:23, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.02: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.02: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.23: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.23: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.34: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.34: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.34: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.34: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.49: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.49: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.49: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.49: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.49: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:24, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.49: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.60: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.60: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.73: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.73: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.83: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.83: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.93: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.93: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.06: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.06: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.26: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.26: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:27, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.84: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:27, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:25, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.84: Completed work on page 15\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:27, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:25, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.05: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.05: Completed work on page 16\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.17: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.17: Completed work on page 17\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.52: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.52: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.58: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.58: Completed work on page 21\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.68: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.68: Completed work on page 22\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.68: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.68: Completed work on page 20\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.87: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.87: Completed work on page 23\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:26, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.03: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.03: Completed work on page 18\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:27, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.19: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.19: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.28: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.28: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.35: Completed work on page 27\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.35: Completed work on page 27\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.44: Completed work on page 26\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.44: Completed work on page 26\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.63: Completed work on page 30\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.63: Completed work on page 30\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.82: Completed work on page 28\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.82: Completed work on page 28\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:27, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.88: Completed work on page 29\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.88: Completed work on page 29\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.97: Completed work on page 33\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.97: Completed work on page 33\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:28, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.17: Completed work on page 32\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.17: Completed work on page 32\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.66: Completed work on page 35\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.66: Completed work on page 35\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.72: Completed work on page 34\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.72: Completed work on page 34\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.72: Completed work on page 36\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.72: Completed work on page 36\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.72: Completed work on page 37\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.72: Completed work on page 37\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.85: Completed work on page 31\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.85: Completed work on page 31\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.97: Completed work on page 38\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:28, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.97: Completed work on page 38\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:29, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.10: Completed work on page 42\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:31, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:29, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.10: Completed work on page 42\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:31, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:29, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:30, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.22: Completed work on page 40\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:30, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.22: Completed work on page 40\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:30, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.35: Completed work on page 39\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:30, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.35: Completed work on page 39\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:30, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.41: Completed work on page 41\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + " 0MB object store: : 0.00 row [00:30, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.41: Completed work on page 41\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:30, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:31<13:19, 4.59s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:31<15:26, 5.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:31<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 2 [backpressured]; Queued blocks: 0; Resources: 2.0 CPU, 512.0MB object store: : 0.00 row [00:31, ? row/s]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:31, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-r7450dne5dssfkfw1nz1sr5\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-r7450dne5dssfkfw1nz1sr5\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.90: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.90: Completed work on page 2\n", + "\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.90: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.90: Completed work on page 3\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(pid=77886) INFO:root:Spurious log 1: Verifying that log messages are propagated \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.97: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.97: Completed work on page 1\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.10: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.10: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.27: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.27: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.45: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.45: Completed work on page 6\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.65: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.65: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.83: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.83: Completed work on page 8\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 2 [backpressured]; Queued blocks: 0; Resources: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.03: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.03: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.21: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.21: Completed work on page 10\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.41: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.41: Completed work on page 11\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.58: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.58: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.77: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.77: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.94: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 2 [backpressured]; Queued blocks: 0; Resources: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: : 0.00 row [00:34, ? row/s] \n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 0%| | 0.00/885 [00:34...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:34<33:47, 2.33s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.13: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.13: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.49: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.49: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.85: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.85: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.04: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.04: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:35<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.38: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.38: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.89: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.89: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.97: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.97: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:36<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.12: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.12: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.33: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.33: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.06: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:39<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.06: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:39<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:37<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:38<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.26: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:39<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:38<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.26: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:40<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:38<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:38<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:39<15:26, 5.42s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:40<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:40<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:41<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:40<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:40<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-pdk486xdb6z179odzq5ct11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-pdk486xdb6z179odzq5ct11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.04: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.04: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.04: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.04: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.78: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.78: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.92: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.92: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:42<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.16: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.16: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.16: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.16: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.40: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.40: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.40: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.40: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.74: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.74: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.85: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.85: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.85: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.85: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.96: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.96: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:43<33:47, 2.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 2%|█▊ | 15.0/885 [00:43<33:47, 2.33s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:43<18:22, 1.29s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.07: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.07: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.50: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.50: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.56: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.56: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.65: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.65: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.66: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.66: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.75: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.75: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.75: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.75: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.94: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:44<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.23: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.23: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.29: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.29: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.50: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.50: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.50: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.50: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.00: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.00: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:45<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.14: Completed work on page 28 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.14: Completed work on page 28 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.43: Completed work on page 32 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.43: Completed work on page 32 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.50: Completed work on page 30 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.50: Completed work on page 30 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.50: Completed work on page 31 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.50: Completed work on page 31 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.50: Completed work on page 29 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.50: Completed work on page 29 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.69: Completed work on page 34 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.69: Completed work on page 34 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.69: Completed work on page 33 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.69: Completed work on page 33 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.69: Completed work on page 35 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.69: Completed work on page 35 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.88: Completed work on page 36 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.88: Completed work on page 36 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.98: Completed work on page 37 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.98: Completed work on page 37 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:46<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.07: Completed work on page 38 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.07: Completed work on page 38 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 41 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 41 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.68: Completed work on page 39 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.68: Completed work on page 39 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.77: Completed work on page 44 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.77: Completed work on page 44 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.77: Completed work on page 40 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.77: Completed work on page 40 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.77: Completed work on page 43 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.77: Completed work on page 43 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.96: Completed work on page 42 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.96: Completed work on page 42 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.02: Completed work on page 45 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.02: Completed work on page 45 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:47<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.72: Completed work on page 50 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.72: Completed work on page 50 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.81: Completed work on page 49 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.81: Completed work on page 49 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.82: Completed work on page 48 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.82: Completed work on page 48 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:47<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.13: Completed work on page 47 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.13: Completed work on page 47 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.15: Completed work on page 46 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.15: Completed work on page 46 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:48<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:49<15:26, 5.42s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:52<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:50<15:26, 5.42s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:50<18:22, 1.29s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:50<15:26, 5.42s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:50<16:21, 5.84s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:50<16:21, 5.84s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.4MB object store: 3%|███▍ | 30.0/885 [00:50<18:22, 1.29s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-kvwdbwmjdpron5iwrbq6wez \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-kvwdbwmjdpron5iwrbq6wez\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.65: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.65: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.94: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.94: Completed work on page 3 \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 8%|████████▋ | 75.0/885 [01:54<15:28, 1.15s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:54<16:05, 1.21s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:55<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:55<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-3nv1slvshtvt64wut3xxrio \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:55<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-3nv1slvshtvt64wut3xxrio \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.02: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.02: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.02: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.02: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:54<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.00: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.00: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.36: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.36: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.51: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.51: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.51: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.51: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.75: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.75: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.86: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.86: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:56<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.06: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.06: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.06: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.06: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.17: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.17: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.38: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.38: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.38: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.38: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.48: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.48: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.59: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.59: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.78: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.78: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.78: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.78: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.87: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.87: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.96: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.96: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:57<16:05, 1.21s/ row]\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row] ArynPartitioner: T+ 3.15: Completed work on page 23apBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.15: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.15: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.15: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.29: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.29: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.49: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.49: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.84: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.84: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.93: Completed work on page 31 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.93: Completed work on page 31 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:57<16:05, 1.21s/ row]\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.66: Completed work on page 29 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.66: Completed work on page 29 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.78: Completed work on page 28 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:58<16:05, 1.21s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.78: Completed work on page 28\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.85: Completed work on page 30 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.85: Completed work on page 30 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:00<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:59<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:59<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:59<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-9q5zy9urvbu79cvw42dhgf4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-9q5zy9urvbu79cvw42dhgf4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:00<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.05: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.05: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.27: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.27: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.50: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.50: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.66: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.66: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.90: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.90: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.00: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.00: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:01<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.16: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.16: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.35: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.35: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.50: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.50: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.77: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.77: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.85: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.85: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.02: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.02: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:02<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.20: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.20: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.37: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.37: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.53: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.53: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.80: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.80: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.89: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.89: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.05: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.05: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\" [repeated 2x across cluster]\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:03<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.36: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.36: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.65: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.65: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.89: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.89: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.96: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.96: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:06<16:09, 1.22s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:04<16:05, 1.21s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 1.8MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 1.8MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 10%|██████████▌ | 90.0/885 [02:04<16:05, 1.21s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 12%|████████████▍ | 105/885 [02:04<13:37, 1.05s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.8MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:06<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:05<13:21, 5.13s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:05<12:38, 4.96s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:05<12:38, 4.96s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 12%|████████████▎ | 105/885 [02:06<13:37, 1.05s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-fcefmenrarq8e5bb1yyb3rg \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-fcefmenrarq8e5bb1yyb3rg \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 12%|████████████▎ | 105/885 [02:07<13:37, 1.05s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.64: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.64: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.06: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.06: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.28: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.28: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.52: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.52: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.71: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.71: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.88: Completed work on page 8 \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.07: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.07: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.29: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.29: Completed work on page 5\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.54: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.54: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.72: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.72: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.90: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.90: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:35<09:25, 1.11 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.09: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.09: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.26: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.26: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.45: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.45: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.62: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.62: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.79: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.79: Completed work on page 13 \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.96: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.96: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:36<09:25, 1.11 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.11: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.11: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.44: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.44: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.62: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.62: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.80: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.80: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.95: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.95: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:37<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.28: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.28: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.42: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.42: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.44: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.44: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.47: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.47: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:38<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:38<09:42, 4.74s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:38<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:39<09:25, 1.11 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:40<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:39<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:39<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:39<09:42, 4.74s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:39<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-1vdx2lct3782x1vprr1ij71 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-1vdx2lct3782x1vprr1ij71 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.64: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.64: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.74: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.74: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.86: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.86: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:40<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.17: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.17: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.36: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.36: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.55: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.55: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.72: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.72: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.91: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.91: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:41<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.10: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.10: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.29: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.29: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.47: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.47: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.65: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.65: Completed work on page 12\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.83: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.83: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.01: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.01: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:42<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.18: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.18: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.36: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.36: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.54: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.54: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.72: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.72: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.05: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.05: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:43<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.23: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.23: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.40: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.40: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.57: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.57: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.96: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.96: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.08: Completed work on page 24\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.08: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\" [repeated 4x across cluster]\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:44<09:25, 1.11 row/s]\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.23: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.23: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.25: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) \n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:45<09:25, 1.11 row/s]\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:46<09:42, 4.74s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:47<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:48<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-wxl681ch50401yw82tpxw8e \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-wxl681ch50401yw82tpxw8e \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:47<09:25, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.18: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.18: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.74: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.74: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:50<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:49<09:25, 1.11 row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 31%|████████████████████████████████ | 270/885 [04:49<09:55, 1.03 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.32: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:50<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.32: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.95: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.95: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.54: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.54: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.64: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\" [repeated 2x across cluster]\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:35<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.12: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.12: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.12: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.12: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.23: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.23: Completed work on page 4\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.45: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.45: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.69: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.69: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:35<08:09, 1.02s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.94: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.94: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.11: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.11: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.28: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.28: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.47: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.47: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.63: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.63: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.80: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.80: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.00: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.00: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:36<08:09, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.17: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.17: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.34: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.34: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.52: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.52: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.71: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.71: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.86: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.86: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:37<08:09, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.12: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.12: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.40: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.40: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.58: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.58: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.75: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.75: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.89: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.89: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:38<08:09, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.18: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.18: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.40: Completed work on page 28 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.40: Completed work on page 28 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.42: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.42: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.43: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.43: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:39<08:09, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-atacj8ysncrz3as573yyo1f \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-atacj8ysncrz3as573yyo1f \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" [repeated 3x across cluster] \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:39<08:09, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.65: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.65: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.74: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.74: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.89: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.89: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:40<08:09, 1.02s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 46%|████████████████████████████████████████████████ | 405/885 [07:41<08:09, 1.02s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:41<07:11, 1.08 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.08: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.08: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.28: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.28: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.50: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.50: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.79: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.79: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.97: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.97: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:42<07:11, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.14: Completed work on page 9\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.14: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.33: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.33: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.53: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.53: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.71: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.71: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.93: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.93: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:42<07:11, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s] ArynPartitioner: T+ 3.24: Completed work on page 14\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.24: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.41: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.41: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.59: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.59: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.76: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.76: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.95: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.95: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:44<07:11, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.13: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.13: Completed work on page 19\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.32: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.32: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.48: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.48: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.68: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.68: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.84: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.84: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.05: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.05: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:45<07:11, 1.08 row/s]\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.32: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:45<07:11, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.32: Completed work on page 25\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.40: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.40: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.46: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.46: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:46<07:40, 4.95s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:46<07:11, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:46<07:40, 4.95s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:46<07:17, 4.86s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:46<07:17, 4.86s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 47%|█████████████████████████████████████████████████▎ | 420/885 [07:46<07:11, 1.08 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-frgsvlh90286wc0wkj0nske\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-frgsvlh90286wc0wkj0nske \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.75: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 47%|█████████████████████████████████████████████████▎ | 420/885 [07:47<07:11, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.75: Completed work on page 2\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.06: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.06: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.22: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.22: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.42: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.42: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.52: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.52: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.81: Completed work on page 7\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.81: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 47%|█████████████████████████████████████████████████▎ | 420/885 [07:48<07:11, 1.08 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.01: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.01: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.21: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.21: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.38: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.38: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.56: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.56: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.76: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.76: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 13\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:49<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-dy062ooso38xhekxp2ljolf \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-dy062ooso38xhekxp2ljolf \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.89: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.89: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.90: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.90: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:50<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.00: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.00: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.00: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.00: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.14: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.14: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.40: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.40: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.50: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.50: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.61: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.61: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.75: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.75: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.85: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.85: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\" [repeated 3x across cluster]\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:51<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.05: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.05: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.05: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.05: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.23: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.23: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.68: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.68: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.85: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.85: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.94: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:52<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.13: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.13: Completed work on page 18 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.62: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.62: Completed work on page 20 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.68: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.68: Completed work on page 19 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 22 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 24 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.96: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.96: Completed work on page 23 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:53<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.18: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.18: Completed work on page 21 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.40: Completed work on page 28 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.40: Completed work on page 28 \n", + "\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 86%|██████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 87%|██████████████████████████████████████████████████████████████████████████████████████████▉ | 729/842 [10:54<00:59, 1.91 row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:54<00:51, 1.95 row/s]\n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.46: Completed work on page 27\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.46: Completed work on page 27 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.46: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.46: Completed work on page 26 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.46: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.46: Completed work on page 25 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.65: Completed work on page 29 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.65: Completed work on page 29 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.74: Completed work on page 31 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.74: Completed work on page 31 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.93: Completed work on page 33 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.93: Completed work on page 33 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:54<00:51, 1.95 row/s]\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.02: Completed work on page 30 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.02: Completed work on page 30 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.32: Completed work on page 32 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.32: Completed work on page 32 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.56: Completed work on page 35 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.56: Completed work on page 35___wrap)) pid=77875)\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 37 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 37 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 34 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 34 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 36 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 36 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.18: Completed work on page 38 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.18: Completed work on page 38 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.24: Completed work on page 39 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.24: Completed work on page 39 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.32: Completed work on page 42 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.32: Completed work on page 42 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.62: Completed work on page 41\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.62: Completed work on page 41 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.94: Completed work on page 40 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.94: Completed work on page 40 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:56<00:51, 1.95 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.02: Completed work on page 43 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.02: Completed work on page 43 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.28: Completed work on page 44 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.28: Completed work on page 44 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.39: Completed work on page 49 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.39: Completed work on page 49 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.50: Completed work on page 45\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.50: Completed work on page 45 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.50: Completed work on page 46 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.50: Completed work on page 46 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.76: Completed work on page 47 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.76: Completed work on page 47___wrap)) pid=77875)\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 8.00: Completed work on page 48 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 8.00: Completed work on page 48 \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:57<00:51, 1.95 row/s]\n", + "Running Dataset. Active & requested resources: 1/11 CPU, 3.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:59<00:51, 1.95 row/s]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [10:58<00:51, 1.95 row/s]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", + "Running Dataset. Active & requested resources: 1/11 CPU, 3.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:00<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:59<01:03, 3.54s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [10:59<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:59<01:03, 3.54s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [10:59<01:05, 4.08s/ row]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [10:59<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-nbapmpe347y51lkvqxyx1ea \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-nbapmpe347y51lkvqxyx1ea \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.69: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.69: Completed work on page 1 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.69: Completed work on page 2 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + " \n", + "\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.69: Completed work on page 2e___wrap)) pid=77875)\n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.01: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.01: Completed work on page 4 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.23: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.23: Completed work on page 5 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.53: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.53: Completed work on page 6 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.65: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.65: Completed work on page 7 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.92: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.92: Completed work on page 8 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [11:01<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.10: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.10: Completed work on page 9 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.28: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.28: Completed work on page 10 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.46: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.46: Completed work on page 11 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.68: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.68: Completed work on page 12 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.95: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.95: Completed work on page 13 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [11:02<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.13: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.13: Completed work on page 14 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.32: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.32: Completed work on page 15 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.50: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.50: Completed work on page 16 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", + "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", + "\n", + " \n", + " \n", + "\n", + "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.67: Completed work on page 17 \n", + "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]" + ] + } + ], + "source": [ + "finalDocSet.embed(embedder=OpenAIEmbedder(model_name=model_name)).write.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=docset.value.docset_id, aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "649a0eaa-4165-49ef-b4f2-c587530fe413", + "metadata": {}, + "outputs": [], + "source": [ + "docset = myClient.get_docset(docset_id=docset.value.docset_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "add93321-626e-4477-a2d5-15ab2be74852", + "metadata": {}, + "outputs": [], + "source": [ + "docset.value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27d62142-1ef2-47b4-855d-68b477896c77", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/QuestionAnsweringNotebook.ipynb b/QuestionAnsweringNotebook.ipynb new file mode 100644 index 0000000..ef6d1e8 --- /dev/null +++ b/QuestionAnsweringNotebook.ipynb @@ -0,0 +1,1005 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 47, + "id": "dada065d-47fc-4b2c-9934-ef6fa9b93dd8", + "metadata": {}, + "outputs": [], + "source": [ + "import sycamore\n", + "import os\n", + "from sycamore.transforms.partition import ArynPartitioner\n", + "from sycamore.transforms.extract_schema import (\n", + " OpenAIPropertyExtractor,\n", + ")\n", + "from sycamore.llms import OpenAI, OpenAIModels\n", + "from sycamore.data.element import Element\n", + "from sycamore.data.document import Document\n", + "from sycamore.functions import HuggingFaceTokenizer, OpenAITokenizer\n", + "from sycamore.llms import OpenAI, OpenAIModels\n", + "from sycamore.transforms.embed import SentenceTransformerEmbedder" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "b0716549-20ce-45db-8d3d-8c3085e32ef9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/ty/flsmmnn17p314__gg336qjy00000gn/T/ipykernel_77419/2683495837.py:2: FutureWarning: Class aryn is experimental and may change in the future.\n", + " initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"aryn:ds-v9tfacka0xifljqaj0l1rbh\", aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")\n" + ] + } + ], + "source": [ + "context = sycamore.init()\n", + "initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"aryn:ds-v9tfacka0xifljqaj0l1rbh\", aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "91c37ade-4294-49b4-8228-e137aa65f038", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-03-26 16:58:51,357\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", + "2025-03-26 16:58:51,358\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)]\n", + "Running 0: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc) 1: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + " \n", + "✔️ Dataset execution finished in 2.07 seconds: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.00/2.00 [00:02<00:00, 1.05s/ row]\n", + "\n", + "- Map(ArynReader._to_doc): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:02, ? row/s]\n", + "- Map(ArynReader._to_doc): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 8.6MB object store: : 0.00 row [00:02, ? row/s]\n", + "- Map(ArynReader._to_doc): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 8.6MB object store: 0%| | 0.00/2.00 [00:02\")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "74fb23b6-5703-4d10-94fb-432729c1c68f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nllm_filtered_docset = exploded_docset2.llm_filter(new_field=\"_autogen_LLMFilterOutput\",\\n prompt=LlmFilterMessagesJinjaPrompt.set(filter_question=\"Did Brian Chesky speak?\", use_elements=False),\\n field = \"text_represenation\",\\n llm=oai,\\n keep_none=True)\\n\\n\\nexploded_docset.llm_filter(new_field=\"_autogen_LLMFilterOutput\",\\n #prompt=LlmFilterMessagesJinjaPrompt.fork(filter_question=\"Did Brian Chesky speak?\", use_elements=False),\\n prompt = prompt,\\n field = \"text_representation\",\\n llm=oai )\\n '" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sycamore.llms.prompts.default_prompts import LlmFilterMessagesJinjaPrompt\n", + "from sycamore.llms.prompts.prompts import JinjaPrompt\n", + "from sycamore.transforms.extract_entity import OpenAIEntityExtractor\n", + "\n", + "prompt = JinjaPrompt(\n", + " system=\"You are a helpful classifier that generously filters database entries based on questions.\",\n", + " user=(\"Did Brian Chesky speak?\" )\n", + ")\n", + "\n", + "entity_extractor = OpenAIEntityExtractor(entity_name=\"num_customers\", llm=oai, num_of_elements=10,\n", + " #prompt = LlmFilterMessagesJinjaPrompt.set(filter_question=\"How many customers did MongoDB have at the end of Q1 in 2024?\", use_elements=False),\n", + " field = \"text_representation\", use_elements=True)\n", + " \n", + "\n", + "mdb_docset = removed_orig_docset.filter( lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'MDB' and doc.properties['earnings_calls']['quarter']=='Q1').extract_entity(entity_extractor)\n", + "\n", + "'''\n", + "llm_filtered_docset = exploded_docset2.llm_filter(new_field=\"_autogen_LLMFilterOutput\",\n", + " prompt=LlmFilterMessagesJinjaPrompt.set(filter_question=\"Did Brian Chesky speak?\", use_elements=False),\n", + " field = \"text_represenation\",\n", + " llm=oai,\n", + " keep_none=True)\n", + "\n", + "\n", + "exploded_docset.llm_filter(new_field=\"_autogen_LLMFilterOutput\",\n", + " #prompt=LlmFilterMessagesJinjaPrompt.fork(filter_question=\"Did Brian Chesky speak?\", use_elements=False),\n", + " prompt = prompt,\n", + " field = \"text_representation\",\n", + " llm=oai )\n", + " '''\n", + "#logical_node = LlmFilter(node_id=0, question=\"Filter all the records where the Brian Chesky spoke\", field=\"Brian Chesky\")\n", + "#sycamore_operator = SycamoreLlmFilter(context, logical_node, query_id=\"test\", inputs=[exploded_docset])" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "7814bb6e-63ea-48ed-b637-543a516b4175", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-03-26 17:30:04,428\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", + "2025-03-26 17:30:04,429\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->MapBatches(spread_properties)->MapBatches()->MapBatches(sort_and_batch_elements)->MapBatches(llm_map)->MapBatches(postprocess)]\n", + "Running 0: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->...->MapBatches(postprocess) 1: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 768.0MB/1.0GB object store: : 0.00 row [00:01, ? row/s] 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 5.3KB object store: : 0.00 row [00:02, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 5.3KB object store: 0%| | 0.00/276 [00:02...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 5.3KB object store: 1%|▉ | 3.00/276 [00:02<03:26, 1.32 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 1%|█▌ | 3.00/276 [00:02<03:38, 1.25 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 5.3KB object store: 1%|▉ | 3.00/276 [00:03<03:26, 1.32 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 5.3KB object store: 2%|█▉ | 6.00/276 [00:03<02:13, 2.02 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 5.3KB/1.0GB object store: 2%|███▏ | 6.00/276 [00:03<02:23, 1.88 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 5.3KB object store: 2%|█▉ | 6.00/276 [00:04<02:13, 2.02 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 5.3KB object store: 4%|███▊ | 12.0/276 [00:04<01:16, 3.44 row/s]\n", + "Running Dataset. Active & requested resources: 9/11 CPU, 5.3KB/1.0GB object store: 4%|██████▍ | 12.0/276 [00:04<01:20, 3.28 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 73; Resources: 11.0 CPU, 5.3KB object store: 4%|███▊ | 12.0/276 [00:05<01:16, 3.44 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 73; Resources: 11.0 CPU, 5.3KB object store: 9%|███████▌ | 24.0/276 [00:05<00:44, 5.63 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 9%|████████████▊ | 24.0/276 [00:05<00:44, 5.69 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 67; Resources: 11.0 CPU, 5.3KB object store: 9%|███████▌ | 24.0/276 [00:06<00:44, 5.63 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 67; Resources: 11.0 CPU, 5.3KB object store: 15%|█████████████▏ | 42.0/276 [00:06<00:25, 9.08 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 15%|██████████████████████▎ | 42.0/276 [00:06<00:25, 9.17 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 5.3KB object store: 15%|█████████████▏ | 42.0/276 [00:07<00:25, 9.08 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 5.3KB object store: 17%|███████████████▏ | 48.0/276 [00:07<00:28, 8.04 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 17%|█████████████████████████▌ | 48.0/276 [00:07<00:28, 8.10 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 62; Resources: 11.0 CPU, 5.3KB object store: 17%|███████████████▏ | 48.0/276 [00:08<00:28, 8.04 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 62; Resources: 11.0 CPU, 5.3KB object store: 21%|█████████████████▉ | 57.0/276 [00:08<00:26, 8.18 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 21%|██████████████████████████████▎ | 57.0/276 [00:08<00:26, 8.21 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 5.3KB object store: 21%|█████████████████▉ | 57.0/276 [00:09<00:26, 8.18 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 5.3KB object store: 27%|███████████████████████▋ | 75.0/276 [00:09<00:18, 10.8 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 27%|███████████████████████████████████████▉ | 75.0/276 [00:09<00:18, 10.9 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 53; Resources: 11.0 CPU, 5.3KB object store: 27%|███████████████████████▋ | 75.0/276 [00:10<00:18, 10.8 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 53; Resources: 11.0 CPU, 5.3KB object store: 30%|██████████████████████████▍ | 84.0/276 [00:10<00:18, 10.2 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 30%|████████████████████████████████████████████▋ | 84.0/276 [00:11<00:19, 10.1 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 52; Resources: 11.0 CPU, 5.3KB object store: 30%|██████████████████████████▍ | 84.0/276 [00:12<00:18, 10.2 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 52; Resources: 11.0 CPU, 5.3KB object store: 33%|████████████████████████████▎ | 90.0/276 [00:12<00:21, 8.68 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 4.8KB/1.0GB object store: 33%|███████████████████████████████████████████████▉ | 90.0/276 [00:12<00:21, 8.67 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 5.3KB object store: 33%|████████████████████████████▎ | 90.0/276 [00:13<00:21, 8.68 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 5.3KB object store: 40%|███████████████████████████████████▍ | 111/276 [00:13<00:13, 11.9 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 40%|███████████████████████████████████████████████████████████▌ | 111/276 [00:13<00:13, 11.9 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 5.3KB object store: 40%|███████████████████████████████████▍ | 111/276 [00:14<00:13, 11.9 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 5.3KB object store: 46%|████████████████████████████████████████▏ | 126/276 [00:14<00:11, 12.5 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 46%|███████████████████████████████████████████████████████████████████▌ | 126/276 [00:14<00:11, 12.6 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 36; Resources: 11.0 CPU, 5.3KB object store: 46%|████████████████████████████████████████▏ | 126/276 [00:15<00:11, 12.5 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 36; Resources: 11.0 CPU, 5.3KB object store: 49%|███████████████████████████████████████████ | 135/276 [00:15<00:12, 11.4 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 49%|████████████████████████████████████████████████████████████████████████▍ | 135/276 [00:15<00:12, 11.4 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 34; Resources: 11.0 CPU, 5.3KB object store: 49%|███████████████████████████████████████████ | 135/276 [00:16<00:12, 11.4 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 34; Resources: 11.0 CPU, 5.3KB object store: 52%|█████████████████████████████████████████████▉ | 144/276 [00:16<00:12, 10.5 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 4.8KB/1.0GB object store: 52%|█████████████████████████████████████████████████████████████████████████████▏ | 144/276 [00:16<00:12, 10.5 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 29; Resources: 11.0 CPU, 5.3KB object store: 52%|█████████████████████████████████████████████▉ | 144/276 [00:17<00:12, 10.5 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 29; Resources: 11.0 CPU, 5.3KB object store: 57%|█████████████████████████████████████████████████▋ | 156/276 [00:17<00:11, 10.7 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 57%|███████████████████████████████████████████████████████████████████████████████████▋ | 156/276 [00:17<00:11, 10.6 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 24; Resources: 11.0 CPU, 5.3KB object store: 57%|█████████████████████████████████████████████████▋ | 156/276 [00:18<00:11, 10.7 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 24; Resources: 11.0 CPU, 5.3KB object store: 62%|██████████████████████████████████████████████████████▌ | 171/276 [00:18<00:09, 11.6 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 62%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 171/276 [00:18<00:09, 11.6 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 22; Resources: 11.0 CPU, 5.3KB object store: 62%|██████████████████████████████████████████████████████▌ | 171/276 [00:19<00:09, 11.6 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 22; Resources: 11.0 CPU, 5.3KB object store: 64%|████████████████████████████████████████████████████████▍ | 177/276 [00:19<00:10, 9.85 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 64%|██████████████████████████████████████████████████████████████████████████████████████████████▉ | 177/276 [00:19<00:10, 9.87 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 19; Resources: 11.0 CPU, 5.8KB object store: 64%|████████████████████████████████████████████████████████▍ | 177/276 [00:20<00:10, 9.85 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 19; Resources: 11.0 CPU, 5.8KB object store: 68%|████████████████████████████████████████████████████████████▎ | 189/276 [00:20<00:08, 10.3 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 4.8KB/1.0GB object store: 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 189/276 [00:20<00:08, 10.3 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 16; Resources: 11.0 CPU, 5.3KB object store: 68%|████████████████████████████████████████████████████████████▎ | 189/276 [00:21<00:08, 10.3 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 16; Resources: 11.0 CPU, 5.3KB object store: 71%|██████████████████████████████████████████████████████████████▏ | 195/276 [00:21<00:09, 8.83 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 71%|████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 195/276 [00:21<00:09, 8.77 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 13; Resources: 11.0 CPU, 5.3KB object store: 71%|██████████████████████████████████████████████████████████████▏ | 195/276 [00:22<00:09, 8.83 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 13; Resources: 11.0 CPU, 5.3KB object store: 74%|█████████████████████████████████████████████████████████████████ | 204/276 [00:22<00:08, 8.78 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 74%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 204/276 [00:22<00:08, 8.80 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 5.3KB object store: 74%|█████████████████████████████████████████████████████████████████ | 204/276 [00:23<00:08, 8.78 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 5.3KB object store: 76%|██████████████████████████████████████████████████████████████████▉ | 210/276 [00:23<00:08, 8.05 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 210/276 [00:23<00:08, 7.83 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 10; Resources: 11.0 CPU, 5.8KB object store: 76%|██████████████████████████████████████████████████████████████████▉ | 210/276 [00:24<00:08, 8.05 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 10; Resources: 11.0 CPU, 5.8KB object store: 78%|████████████████████████████████████████████████████████████████████▊ | 216/276 [00:24<00:08, 7.31 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 219/276 [00:24<00:07, 8.03 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 5.3KB object store: 78%|█████████████████████████████████████████████████████████████████████▋ | 216/276 [00:25<00:08, 7.31 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 5.3KB object store: 82%|████████████████████████████████████████████████████████████████████████▌ | 225/276 [00:25<00:06, 7.66 row/s]\n", + "Running Dataset. Active & requested resources: 9/11 CPU, 5.3KB/1.0GB object store: 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 225/276 [00:26<00:07, 7.21 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 3; Resources: 11.0 CPU, 5.3KB object store: 82%|████████████████████████████████████████████████████████████████████████▌ | 225/276 [00:26<00:06, 7.66 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 3; Resources: 11.0 CPU, 5.3KB object store: 85%|███████████████████████████████████████████████████████████████████████████▍ | 234/276 [00:26<00:05, 8.03 row/s]\n", + "✔️ Dataset execution finished in 27.13 seconds: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 248/248 [00:27<00:00, 9.12 row/s]\n", + "\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 3; Resources: 11.0 CPU, 5.3KB object store: 85%|███████████████████████████████████████████████████████████████████████████▍ | 234/276 [00:27<00:05, 8.03 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 85%|█████████████████████████████████████████████████████████████████████████▊ | 234/276 [00:27<00:05, 8.03 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 94%|██████████████████████████████████████████████████████████████████████████████████ | 234/248 [00:27<00:01, 8.03 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████| 248/248 [00:27<00:00, 11.1 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████| 248/248 [00:27<00:00, 9.01 row/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "'The num_customers of the document is:\\n\\n\"over 49,200 customers\" (from ELEMENT 24).'" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdb_docset.take(1)[0].properties.get(\"num_customers\")" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "192dc7b1-1598-479f-ab99-709d0c5c81d0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-03-26 16:58:58,636\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", + "2025-03-26 16:58:58,637\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->MapBatches(spread_properties)->MapBatches(explode)->MapBatches()->MapBatches(sort_and_batch_elements)->MapBatches(llm_map)->MapBatches(postprocess)->MapBatches()]\n", + "Running 0: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->...->MapBatches() 1: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 768.0MB/1.0GB object store: : 0.00 row [00:01, ? row/s]0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: : 0.00 row [00:02, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: 0%| | 0.00/8.10k [00:02...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: 1%|▉ | 88.0/8.10k [00:02<03:24, 39.2 row/s]\n", + " \n", + "✔️ Dataset execution finished in 2.55 seconds: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 67.4 row/s]\n", + "\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: 1%|▉ | 88.0/8.10k [00:02<03:24, 39.2 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 1%|▉ | 88.0/8.10k [00:02<03:24, 39.2 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 50%|█████████████████████████████████████████████▎ | 88.0/175 [00:02<00:02, 39.2 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 75.3 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 75.3 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 64.4 row/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'id': 'aryn:c-tqdcnzjzos49g1475m01p7w',\n", + " 'type': 'Section-header',\n", + " 'text_representation': \"Thank you, Andy. Good day, everybody. I'm pleased to \"\n", + " 'report that we have made a very strong start in <2899 '\n", + " 'chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5760092735290527,\n", + " 'page_number': 2,\n", + " '_element_index': 25,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.0725143395693318,\n", + " 0.6332171221174915,\n", + " 0.3964668361619971,\n", + " 0.6457289377304671],\n", + " 'doc_id': 'aryn:c-9lduhzgt43hvisvg5sxv9jg',\n", + " 'elements': [],\n", + " 'lineage_id': 'rwnduqxhs1nl554uar2vwoa',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-76aek0t4pozqq83m012yso6',\n", + " 'type': 'Text',\n", + " 'text_representation': \"Thank you, Dave. We've had an exciting start to the \"\n", + " 'year with a number of key presentations, includi '\n", + " '<2234 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5652783513069153,\n", + " 'page_number': 5,\n", + " '_element_index': 51,\n", + " 'speaker_name': 'Susan Galbraith ',\n", + " 'speaker_role': 'Executive Vice President, Oncology Research '\n", + " 'and Development\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07309046270482408,\n", + " 0.5213716465369816,\n", + " 0.7546507090564504,\n", + " 0.534808461815947],\n", + " 'doc_id': 'aryn:c-b2jhbkg6h2ptxv7dzv76lol',\n", + " 'elements': [],\n", + " 'lineage_id': 'b7gopor1hxqh6q8barujase',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-n3icf67tmj2m8dlih3pi1iz',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thank you, Susan. Next slide, please. '\n", + " 'BioPharmaceuticals delivered total revenue of $5.2 '\n", + " 'billion in <2371 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.41509145498275757,\n", + " 'page_number': 6,\n", + " '_element_index': 57,\n", + " 'speaker_name': 'Ruud Dobber ',\n", + " 'speaker_role': 'Executive Vice President and President, '\n", + " 'BioPharmaceuticals\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07204341715347096,\n", + " 0.25080590413237497,\n", + " 0.6988436773338249,\n", + " 0.263547299610544],\n", + " 'doc_id': 'aryn:c-7lh5rymc86bzhfe93bgrflz',\n", + " 'elements': [],\n", + " 'lineage_id': 'mvfivp7mmm5shjejfwdfvxn',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-itp40ja3z1stroficoem2rs',\n", + " 'type': 'Text',\n", + " 'text_representation': 'Thanks, Ruud. I wanted to take the opportunity to '\n", + " 'highlight results from a 66-week analysis of explo '\n", + " '<1432 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.47726330161094666,\n", + " 'page_number': 7,\n", + " '_element_index': 65,\n", + " 'speaker_name': 'Sharon Barr ',\n", + " 'speaker_role': 'Executive Vice President, BioPharmaceuticals '\n", + " 'Research and Development\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07267752461049748,\n", + " 0.07626251442506814,\n", + " 0.8047300362976406,\n", + " 0.08979756264362442],\n", + " 'doc_id': 'aryn:c-3hftoktpj9ho3tcehku8z7x',\n", + " 'elements': [],\n", + " 'lineage_id': 'fkd93gt4cinojsaln54awou',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-3ano24lrtwwhhsyvsde84de',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thank you, Pascal. And as usual, I will start with '\n", + " 'our reported P&L. Please turn to the next slide. '\n", + " '<3254 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.650137722492218,\n", + " 'page_number': 3,\n", + " '_element_index': 34,\n", + " 'speaker_name': 'Aradhana Sarin ',\n", + " 'speaker_role': 'Chief Financial Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.0726197064463759,\n", + " 0.5763961837657653,\n", + " 0.41028275117683755,\n", + " 0.5888303941822093],\n", + " 'doc_id': 'aryn:c-zpoz72b7evqlx9wmgknzwef',\n", + " 'elements': [],\n", + " 'lineage_id': '02zgotaioy8wio6zr0venz3',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-uomkep2xgoph35kzas5sega',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thank you, Aradhana. Next slide, please. Oncology '\n", + " 'revenues grew 26% to $5.1 billion in the first quar '\n", + " '<2633 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5771273970603943,\n", + " 'page_number': 4,\n", + " '_element_index': 43,\n", + " 'speaker_name': 'Dave Fredrickson ',\n", + " 'speaker_role': 'Executive Vice President, Oncology Business\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07230571047580393,\n", + " 0.649608362531397,\n", + " 0.6152173161723004,\n", + " 0.663033383073028],\n", + " 'doc_id': 'aryn:c-vl4w143sgalz7jv718k8a9h',\n", + " 'elements': [],\n", + " 'lineage_id': 'eryy4dzr0vkigkohskynm17',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-bgm4z5elxh64476u9qyc1n8',\n", + " 'type': 'Text',\n", + " 'text_representation': 'Thank you, Sharon. Can I get the next slide, please. '\n", + " \"I'm delighted to report Rare Disease delivered <2318 \"\n", + " 'chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.4892081022262573,\n", + " 'page_number': 7,\n", + " '_element_index': 69,\n", + " 'speaker_name': 'Marc Dunoyer ',\n", + " 'speaker_role': 'Chief Executive Officer, Alexion, and Chief '\n", + " 'Strategy Officer, AstraZeneca\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07219824355368316,\n", + " 0.5335392228249252,\n", + " 0.8001898773064126,\n", + " 0.5462835408928762],\n", + " 'doc_id': 'aryn:c-vcceb78cgzfwdfms9lw21x5',\n", + " 'elements': [],\n", + " 'lineage_id': '42jaugk11ttaigm2j3lfdy2',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-vol06ocr4gsnwx5bww8krpd',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thanks, Marc. Can I have the next slide, please? As '\n", + " 'you have heard, our company has made a very\\n'\n", + " ' str <1354 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.6381353735923767,\n", + " 'page_number': 8,\n", + " '_element_index': 78,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07248473124149274,\n", + " 0.35918581450072146,\n", + " 0.3963169621757789,\n", + " 0.371825754362174],\n", + " 'doc_id': 'aryn:c-hu9thepubaxsjvj6gtpvous',\n", + " 'elements': [],\n", + " 'lineage_id': 'u2rtj8scun611k6o5zqilxf',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-o7ydlpfitzgwmz8c5dt88xi',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Hello. James Gordon, J.P. Morgan. First question is '\n", + " \"on '24 guidance.\\n\"\n", + " \"So it's a very strong revenue g <1275 chars>\",\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5881184339523315,\n", + " 'page_number': 8,\n", + " '_element_index': 83,\n", + " 'speaker_name': 'James Gordon ',\n", + " 'speaker_external_org': 'JPMorgan Chase and Company',\n", + " 'speaker_role': 'Analyst\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07105229741792858,\n", + " 0.8330905844177533,\n", + " 0.5645136062982078,\n", + " 0.8459853636703719],\n", + " 'doc_id': 'aryn:c-9d5layn7gm5w98ye8iflv9c',\n", + " 'elements': [],\n", + " 'lineage_id': 'zpqdey94fhauazilubwug80',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-3flzrcf4utec3sygovb9h7k',\n", + " 'type': 'Text',\n", + " 'text_representation': 'Sure. So thank you for the question about AZD5004 '\n", + " \"oral GLP-1 receptor agonist. We're really excited\\n\"\n", + " ' <1600 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.46687883138656616,\n", + " 'page_number': 9,\n", + " '_element_index': 95,\n", + " 'speaker_name': 'Sharon Barr ',\n", + " 'speaker_role': 'Executive Vice President, BioPharmaceuticals '\n", + " 'Research and Development\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07184836219746059,\n", + " 0.8500147799540402,\n", + " 0.8043362063057699,\n", + " 0.8629591388547456],\n", + " 'doc_id': 'aryn:c-ity4qdwenp35ldkkfwxn5mm',\n", + " 'elements': [],\n", + " 'lineage_id': 'u6b90cbv7tkad7263njyf1e',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-4mdwbuas47h87mm72k3fdz3',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thank you, Sharon. And James, you asked the question '\n", + " 'of the doors, but you got to remember that not\\n'\n", + " ' <404 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.6443524360656738,\n", + " 'page_number': 10,\n", + " '_element_index': 100,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07268579094177727,\n", + " 0.4330463645905702,\n", + " 0.3967575008625416,\n", + " 0.44555925008016245],\n", + " 'doc_id': 'aryn:c-nyeo6v24funjmk1fbpfvx3c',\n", + " 'elements': [],\n", + " 'lineage_id': '3ks8nwcw12gxndtgq2a88it',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-0u80gdjejlqvcnzbk2854wg',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Great. Thanks for the question. So I guess as, '\n", + " 'Pascal, a little bit of a preview of the upcoming ana '\n", + " '<654 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5857893824577332,\n", + " 'page_number': 10,\n", + " '_element_index': 103,\n", + " 'speaker_name': 'Seamus Fernandez ',\n", + " 'speaker_external_org': 'Guggenheim Partners',\n", + " 'speaker_role': 'Analyst\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07241827289191291,\n", + " 0.6438110925642636,\n", + " 0.5217128229804711,\n", + " 0.6568917172135528],\n", + " 'doc_id': 'aryn:c-vmsbd3pyphjvj9zl49i9hvk',\n", + " 'elements': [],\n", + " 'lineage_id': 'qo88t5asp5z54mkqz5wfdhm',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-wsvyy8d39obxo6v69powi77',\n", + " 'type': 'Text',\n", + " 'text_representation': \"Sure. So I'll jump in on both. People often ask about \"\n", + " \"which molecule I'm more excited about. And I fi <2113 \"\n", + " 'chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.45202550292015076,\n", + " 'page_number': 11,\n", + " '_element_index': 113,\n", + " 'speaker_name': 'Sharon Barr ',\n", + " 'speaker_role': 'Executive Vice President, BioPharmaceuticals '\n", + " 'Research and Development\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07235127914622562,\n", + " 0.6672766703018117,\n", + " 0.8036774844742514,\n", + " 0.6798483990053175],\n", + " 'doc_id': 'aryn:c-bqov66vgesuktevmk1y9lyk',\n", + " 'elements': [],\n", + " 'lineage_id': '4i8442e85hecndxxtx4ntov',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-lo2wegf7qrtwofbfd0sdavj',\n", + " 'type': 'Text',\n", + " 'text_representation': 'No.\\n',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5130598545074463,\n", + " 'page_number': 12,\n", + " '_element_index': 122,\n", + " 'speaker_name': 'Ruud Dobber ',\n", + " 'speaker_role': 'Executive Vice President and President, '\n", + " 'BioPharmaceuticals\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07184797449683104,\n", + " 0.46429195461134565,\n", + " 0.6999568581508999,\n", + " 0.4779964999899797],\n", + " 'doc_id': 'aryn:c-zoz6immogc95vvaygx8jco4',\n", + " 'elements': [],\n", + " 'lineage_id': 'geebxb06fuiu07pn6xzn4jm',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-cx8a4eq56nnovxianz7kttn',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thank you, James. Just sorry, Aradhana, maybe you can '\n", + " 'take the first one. And Sharon, would you take\\n'\n", + " ' <17 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.5694078803062439,\n", + " 'page_number': 9,\n", + " '_element_index': 88,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07264135768034209,\n", + " 0.31944271640524796,\n", + " 0.39724637289445325,\n", + " 0.3321466662018357],\n", + " 'doc_id': 'aryn:c-4low1i3lwzblh3j9863myri',\n", + " 'elements': [],\n", + " 'lineage_id': 'pvi4payeaktsjv4l8y75j2x',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-0v88y8r627847dgrs9hzt9s',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Great. Thank you, James, for the question. it is '\n", + " 'obviously early in the year. And as you know, gener '\n", + " '<1140 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.6427060961723328,\n", + " 'page_number': 9,\n", + " '_element_index': 90,\n", + " 'speaker_name': 'Aradhana Sarin ',\n", + " 'speaker_role': 'Chief Financial Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07239368621032309,\n", + " 0.42219562375581177,\n", + " 0.4105753728424645,\n", + " 0.43487817602006734],\n", + " 'doc_id': 'aryn:c-0brxr7ns4knz4utx8m4rbe4',\n", + " 'elements': [],\n", + " 'lineage_id': 'apxg4a3sm8tp33v46lep9m5',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-x4tb7hhp2w7h3uuo2ityd8k',\n", + " 'type': 'Section-header',\n", + " 'text_representation': \"Cool. So let's move to the next question, Sachin Jain \"\n", + " 'at Bank of America. Sachin over to you.\\n',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.41843941807746887,\n", + " 'page_number': 12,\n", + " '_element_index': 124,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07183900661203116,\n", + " 0.5442762436037035,\n", + " 0.39754575162819494,\n", + " 0.5576024179837804],\n", + " 'doc_id': 'aryn:c-4rjogd1gay7yngjctuir3wq',\n", + " 'elements': [],\n", + " 'lineage_id': 'lsld8yjah9bwfpkojvaiymo',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-28n8qdc7b7m4e5kumfsdm5t',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Hi there. Thanks for taking my questions. Sachin '\n", + " 'Jain, Bank of America. First one today for Truqap.\\n'\n", + " ' <738 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.4497426152229309,\n", + " 'page_number': 12,\n", + " '_element_index': 126,\n", + " 'speaker_name': 'Sachin Jain ',\n", + " 'speaker_external_org': 'Bank of America Merrill Lynch',\n", + " 'speaker_role': 'Analyst\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07173691672840338,\n", + " 0.6239503727554511,\n", + " 0.5197493592970168,\n", + " 0.6373367211715744],\n", + " 'doc_id': 'aryn:c-iv6mz9dw8hearqbhkktfqwo',\n", + " 'elements': [],\n", + " 'lineage_id': 'gda8tf0bsfs01rsfccfm2ga',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-32u0dgy3bo7qbqck2i4k5l8',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thanks, Susan.\\n',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.4778229296207428,\n", + " 'page_number': 14,\n", + " '_element_index': 143,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.072463527709444,\n", + " 0.0728359279493941,\n", + " 0.39807427989592786,\n", + " 0.08655828938314986],\n", + " 'doc_id': 'aryn:c-1tfy0tip2yzz4ivrxmn27rr',\n", + " 'elements': [],\n", + " 'lineage_id': 'kjmudo0x8z009iwlfktd312',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", + "{'id': 'aryn:c-fu5r56876twfh7egfvs7aol',\n", + " 'type': 'Section-header',\n", + " 'text_representation': 'Thanks, James. So the first question about the '\n", + " 'Investor Day, I guess I would like to invite you to '\n", + " 'jo <2224 chars>',\n", + " 'embedding': '<1536 floats>',\n", + " 'properties': {'score': 0.6408979296684265,\n", + " 'page_number': 10,\n", + " '_element_index': 106,\n", + " 'speaker_name': 'Pascal Soriot ',\n", + " 'speaker_role': 'Chief Executive Officer\\n',\n", + " 'speaker': True,\n", + " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", + " 'company_ticker': 'AZN',\n", + " 'quarter': 'Q1',\n", + " 'date': 'Apr 25, 2024'},\n", + " '_autogen_LLMFilterOutput_batches': [],\n", + " '_autogen_LLMFilterOutput_i': 0},\n", + " 'bbox': [0.07216195662095196,\n", + " 0.9010030119375267,\n", + " 0.39688171275947143,\n", + " 0.9135709830255986],\n", + " 'doc_id': 'aryn:c-jfs34e6huyz6in4dn9kqu24',\n", + " 'elements': [],\n", + " 'lineage_id': '7bsewlcfl666lv1g3apu5sc',\n", + " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n" + ] + } + ], + "source": [ + "llm_filtered_docset.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2f144ffe-eeb3-4770-a1da-3143efbe3ed1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-03-26 16:13:49,144\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", + "2025-03-26 16:13:49,145\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->MapBatches()->MapBatches(spread_properties)->MapBatches(explode)]\n", + "Running 0: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->...->MapBatches(explode) 1: 0.00 row [00:00, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + "Running Dataset. Active & requested resources: 3/11 CPU, 768.0MB/1.0GB object store: : 0.00 row [00:01, ? row/s] CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 3.4KB object store: : 0.00 row [00:02, ? row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 3.4KB object store: 0%| | 0.00/184 [00:02...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 3.4KB object store: 3%|██▉ | 6.00/184 [00:02<01:04, 2.76 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 3%|████▊ | 6.00/184 [00:02<01:05, 2.70 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 3.4KB object store: 3%|██▉ | 6.00/184 [00:03<01:04, 2.76 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 3.4KB object store: 4%|███▉ | 8.00/184 [00:03<01:11, 2.45 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 4%|██████▍ | 8.00/184 [00:03<01:12, 2.43 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 75; Resources: 11.0 CPU, 3.4KB object store: 4%|███▉ | 8.00/184 [00:04<01:11, 2.45 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 75; Resources: 11.0 CPU, 3.4KB object store: 7%|█████▉ | 12.0/184 [00:04<00:56, 3.02 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 7%|█████████▌ | 12.0/184 [00:04<00:59, 2.91 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 71; Resources: 11.0 CPU, 3.4KB object store: 7%|█████▉ | 12.0/184 [00:05<00:56, 3.02 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 71; Resources: 11.0 CPU, 3.4KB object store: 11%|█████████▉ | 20.0/184 [00:05<00:35, 4.60 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 11%|███████████████▉ | 20.0/184 [00:05<00:36, 4.48 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 69; Resources: 11.0 CPU, 3.4KB object store: 11%|█████████▉ | 20.0/184 [00:06<00:35, 4.60 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 69; Resources: 11.0 CPU, 3.4KB object store: 13%|███████████▊ | 24.0/184 [00:06<00:37, 4.28 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 13%|███████████████████▏ | 24.0/184 [00:06<00:37, 4.32 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 3.8KB object store: 13%|███████████▊ | 24.0/184 [00:07<00:37, 4.28 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 3.8KB object store: 18%|████████████████▊ | 34.0/184 [00:07<00:24, 6.04 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 18%|███████████████████████████▏ | 34.0/184 [00:07<00:25, 5.90 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 3.4KB object store: 18%|████████████████▊ | 34.0/184 [00:08<00:24, 6.04 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 3.4KB object store: 27%|████████████████████████▋ | 50.0/184 [00:08<00:15, 8.87 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 27%|███████████████████████████████████████▉ | 50.0/184 [00:08<00:15, 8.75 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 55; Resources: 11.0 CPU, 3.4KB object store: 27%|████████████████████████▋ | 50.0/184 [00:09<00:15, 8.87 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 55; Resources: 11.0 CPU, 3.4KB object store: 28%|█████████████████████████▋ | 52.0/184 [00:09<00:19, 6.66 row/s]\n", + "Running Dataset. Active & requested resources: 8/11 CPU, 3.4KB/1.0GB object store: 28%|█████████████████████████████████████████▊ | 52.0/184 [00:09<00:20, 6.60 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 50; Resources: 11.0 CPU, 3.4KB object store: 28%|█████████████████████████▋ | 52.0/184 [00:10<00:19, 6.66 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 50; Resources: 11.0 CPU, 3.4KB object store: 34%|██████████████████████████████▋ | 62.0/184 [00:10<00:16, 7.45 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 34%|█████████████████████████████████████████████████▌ | 62.0/184 [00:10<00:16, 7.37 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 49; Resources: 11.0 CPU, 3.8KB object store: 34%|██████████████████████████████▋ | 62.0/184 [00:11<00:16, 7.45 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 49; Resources: 11.0 CPU, 3.8KB object store: 36%|████████████████████████████████▋ | 66.0/184 [00:11<00:18, 6.28 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 36%|████████████████████████████████████████████████████▋ | 66.0/184 [00:11<00:18, 6.26 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 3.4KB object store: 36%|████████████████████████████████▋ | 66.0/184 [00:12<00:18, 6.28 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 3.4KB object store: 40%|████████████████████████████████████▌ | 74.0/184 [00:12<00:16, 6.60 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 40%|███████████████████████████████████████████████████████████ | 74.0/184 [00:12<00:16, 6.66 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 3.4KB object store: 40%|████████████████████████████████████▌ | 74.0/184 [00:13<00:16, 6.60 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 3.4KB object store: 46%|█████████████████████████████████████████▌ | 84.0/184 [00:13<00:13, 7.48 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 46%|███████████████████████████████████████████████████████████████████ | 84.0/184 [00:13<00:13, 7.43 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 33; Resources: 11.0 CPU, 3.4KB object store: 46%|█████████████████████████████████████████▌ | 84.0/184 [00:14<00:13, 7.48 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 33; Resources: 11.0 CPU, 3.4KB object store: 52%|███████████████████████████████████████████████▍ | 96.0/184 [00:14<00:10, 8.54 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 52%|████████████████████████████████████████████████████████████████████████████▋ | 96.0/184 [00:15<00:10, 8.50 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 30; Resources: 11.0 CPU, 3.4KB object store: 52%|███████████████████████████████████████████████▍ | 96.0/184 [00:16<00:10, 8.54 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 30; Resources: 11.0 CPU, 3.4KB object store: 55%|███████████████████████████████████████████████████ | 102/184 [00:16<00:10, 7.60 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 55%|██████████████████████████████████████████████████████████████████████████████████ | 102/184 [00:16<00:10, 7.58 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 26; Resources: 11.0 CPU, 3.4KB object store: 55%|███████████████████████████████████████████████████ | 102/184 [00:17<00:10, 7.60 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 26; Resources: 11.0 CPU, 3.4KB object store: 60%|███████████████████████████████████████████████████████ | 110/184 [00:17<00:09, 7.51 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 60%|████████████████████████████████████████████████████████████████████████████████████████▍ | 110/184 [00:17<00:09, 7.48 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 23; Resources: 11.0 CPU, 3.4KB object store: 60%|███████████████████████████████████████████████████████ | 110/184 [00:18<00:09, 7.51 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 23; Resources: 11.0 CPU, 3.4KB object store: 63%|██████████████████████████████████████████████████████████ | 116/184 [00:18<00:09, 6.88 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 63%|█████████████████████████████████████████████████████████████████████████████████████████████▎ | 116/184 [00:18<00:09, 6.86 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 20; Resources: 11.0 CPU, 4.1KB object store: 63%|██████████████████████████████████████████████████████████ | 116/184 [00:19<00:09, 6.88 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 20; Resources: 11.0 CPU, 4.1KB object store: 67%|██████████████████████████████████████████████████████████████ | 124/184 [00:19<00:08, 7.21 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 126/184 [00:19<00:07, 7.57 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 15; Resources: 11.0 CPU, 3.4KB object store: 67%|██████████████████████████████████████████████████████████████ | 124/184 [00:20<00:08, 7.21 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 15; Resources: 11.0 CPU, 3.4KB object store: 72%|██████████████████████████████████████████████████████████████████ | 132/184 [00:20<00:07, 7.27 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 134/184 [00:20<00:06, 7.52 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 3.4KB object store: 72%|██████████████████████████████████████████████████████████████████ | 132/184 [00:21<00:07, 7.27 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 3.4KB object store: 76%|██████████████████████████████████████████████████████████████████████ | 140/184 [00:21<00:06, 7.30 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 140/184 [00:21<00:06, 6.92 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 3.4KB object store: 76%|██████████████████████████████████████████████████████████████████████▊ | 140/184 [00:22<00:06, 7.30 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 3.4KB object store: 80%|██████████████████████████████████████████████████████████████████████████▊ | 148/184 [00:22<00:04, 7.32 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 148/184 [00:22<00:05, 7.06 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 4; Resources: 11.0 CPU, 3.4KB object store: 80%|██████████████████████████████████████████████████████████████████████████▊ | 148/184 [00:23<00:04, 7.32 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 4; Resources: 11.0 CPU, 3.4KB object store: 84%|█████████████████████████████████████████████████████████████████████████████▊ | 154/184 [00:23<00:04, 6.78 row/s]\n", + "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 154/184 [00:23<00:04, 6.70 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 2; Resources: 11.0 CPU, 3.4KB object store: 84%|█████████████████████████████████████████████████████████████████████████████▊ | 154/184 [00:24<00:04, 6.78 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 2; Resources: 11.0 CPU, 3.4KB object store: 86%|███████████████████████████████████████████████████████████████████████████████▊ | 158/184 [00:24<00:04, 5.95 row/s]\n", + "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 158/184 [00:24<00:04, 5.78 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 10; Queued blocks: 0; Resources: 10.0 CPU, 3.1KB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████████▌ | 158/184 [00:25<00:04, 5.95 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 10; Queued blocks: 0; Resources: 10.0 CPU, 3.1KB object store: 89%|█████████████████████████████████████████████████████████████████████████████████████████████████▏ | 164/184 [00:25<00:03, 5.81 row/s]\n", + "Running Dataset. Active & requested resources: 9/11 CPU, 2.8KB/1.0GB object store: 90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 166/184 [00:25<00:02, 6.25 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 2; Queued blocks: 0; Resources: 2.0 CPU, 642.0B object store: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████ | 164/184 [00:26<00:03, 5.81 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 2; Queued blocks: 0; Resources: 2.0 CPU, 642.0B object store: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 180/184 [00:26<00:00, 8.52 row/s]\n", + " \n", + "✔️ Dataset execution finished in 27.68 seconds: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 6.64 row/s]\n", + "\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 2; Queued blocks: 0; Resources: 2.0 CPU, 642.0B object store: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 180/184 [00:27<00:00, 8.52 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 180/184 [00:27<00:00, 8.52 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 7.31 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 7.31 row/s]\n", + "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 6.64 row/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "exploded_docset2.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fddbb1c1-4fe4-4aa1-a959-4c9e1e624e7b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sycamore b/sycamore index 7265779..ea6be3f 160000 --- a/sycamore +++ b/sycamore @@ -1 +1 @@ -Subproject commit 726577912c2c14aee1579508e83e86d55e4ef785 +Subproject commit ea6be3f4e86c840fa89ffbbf8263fd67bf736d47 diff --git a/uv.lock b/uv.lock index 9b63e16..ce3ca9b 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.9.2, <3.13" resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'darwin'", @@ -1275,6 +1276,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 }, ] +[[package]] +name = "intel-openmp" +version = "2021.4.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/18/527f247d673ff84c38e0b353b6901539b99e83066cd505be42ad341ab16d/intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9", size = 1860605 }, + { url = "https://files.pythonhosted.org/packages/6f/21/b590c0cc3888b24f2ac9898c41d852d7454a1695fbad34bee85dba6dc408/intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f", size = 3516906 }, +] + [[package]] name = "ipykernel" version = "6.29.5" @@ -1955,6 +1965,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/92/30b4e54c4d7c48c06db61595cffbbf4f19588ea177896f9b78f0fbe021fd/mistune-3.1.2-py3-none-any.whl", hash = "sha256:4b47731332315cdca99e0ded46fc0004001c1299ff773dfb48fbe1fd226de319", size = 53696 }, ] +[[package]] +name = "mkl" +version = "2021.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "intel-openmp", marker = "(python_full_version < '3.10' and platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "tbb", marker = "(python_full_version < '3.10' and platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/c6/892fe3bc91e811b78e4f85653864f2d92541d5e5c306b0cb3c2311e9ca64/mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8", size = 129048357 }, + { url = "https://files.pythonhosted.org/packages/fe/1c/5f6dbf18e8b73e0a5472466f0ea8d48ce9efae39bd2ff38cebf8dce61259/mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718", size = 228499609 }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2393,13 +2416,13 @@ wheels = [ [[package]] name = "nvidia-cudnn-cu12" -version = "9.1.0.70" +version = "8.9.2.26" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.10' and platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 }, + { url = "https://files.pythonhosted.org/packages/ff/74/a2e2be7fb83aaedec84f391f082cf765dfb635e7caa9b49065f73e4835d8/nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9", size = 731725872 }, ] [[package]] @@ -2447,7 +2470,6 @@ name = "nvidia-nccl-cu12" version = "2.20.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", size = 176238458 }, { url = "https://files.pythonhosted.org/packages/4b/2a/0a131f572aa09f741c30ccd45a8e56316e8be8dfc7bc19bf0ab7cfef7b19/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56", size = 176249402 }, ] @@ -2457,7 +2479,6 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836 }, - { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204 }, ] [[package]] @@ -4360,7 +4381,7 @@ wheels = [ [[package]] name = "sycamore-ai" -version = "0.1.31" +version = "0.1.30" source = { directory = "sycamore/lib/sycamore" } dependencies = [ { name = "aiohttp" }, @@ -4462,14 +4483,15 @@ requires-dist = [ { name = "tenacity", specifier = ">=8.4.1,<9.0.0" }, { name = "tiktoken", specifier = ">=0.8.0,<0.9.0" }, { name = "timm", marker = "extra == 'local-inference'", specifier = ">=0.9.12,<0.10.0" }, - { name = "torch", marker = "extra == 'local-inference'", specifier = ">=2.4.0,<3.0.0" }, - { name = "torchvision", marker = "extra == 'local-inference'", specifier = ">=0.19.1,<0.20.0" }, + { name = "torch", marker = "extra == 'local-inference'", specifier = ">=2.3.0,<3.0.0" }, + { name = "torchvision", marker = "extra == 'local-inference'", specifier = ">=0.18.1,<0.19.0" }, { name = "transformers", marker = "extra == 'local-inference'", specifier = ">=4.43.1,<5.0.0" }, { name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" }, { name = "unstructured", extras = ["pdf"], marker = "extra == 'legacy-partitioners'", specifier = ">=0.16.2,<0.17.0" }, { name = "urllib3", specifier = ">=1.26.0,<3" }, { name = "weaviate-client", marker = "extra == 'weaviate'", specifier = ">=4.6.1,<4.7" }, ] +provides-extras = ["anthropic", "docs", "duckdb", "elasticsearch", "eval", "google-genai", "legacy-partitioners", "local-inference", "neo4j", "opensearch", "pinecone", "qdrant", "weaviate"] [[package]] name = "sympy" @@ -4492,6 +4514,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 }, ] +[[package]] +name = "tbb" +version = "2021.13.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/8a/5062b00c378c051e26507e5eca8d3b5c91ed63f8a2139f6f0f422be84b02/tbb-2021.13.1-py3-none-win32.whl", hash = "sha256:00f5e5a70051650ddd0ab6247c0549521968339ec21002e475cd23b1cbf46d66", size = 248994 }, + { url = "https://files.pythonhosted.org/packages/9b/24/84ce997e8ae6296168a74d0d9c4dde572d90fb23fd7c0b219c30ff71e00e/tbb-2021.13.1-py3-none-win_amd64.whl", hash = "sha256:cbf024b2463fdab3ebe3fa6ff453026358e6b903839c80d647e08ad6d0796ee9", size = 286908 }, +] + [[package]] name = "tenacity" version = "8.5.0" @@ -4693,12 +4724,13 @@ wheels = [ [[package]] name = "torch" -version = "2.4.1" +version = "2.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, + { name = "mkl", marker = "sys_platform == 'win32'" }, { name = "networkx", version = "3.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -4713,31 +4745,31 @@ dependencies = [ { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "python_full_version < '3.12' and platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "typing-extensions" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/41/05/d540049b1832d1062510efc6829634b7fbef5394c757d8312414fb65a3cb/torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971", size = 797072810 }, - { url = "https://files.pythonhosted.org/packages/a0/12/2162df9c47386ae7cedbc938f9703fee4792d93504fab8608d541e71ece3/torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3", size = 89699259 }, - { url = "https://files.pythonhosted.org/packages/5d/4c/b2a59ff0e265f5ee154f0d81e948b1518b94f545357731e1a3245ee5d45b/torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada", size = 199433813 }, - { url = "https://files.pythonhosted.org/packages/dc/fb/1333ba666bbd53846638dd75a7a1d4eaf964aff1c482fc046e2311a1b499/torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd", size = 62139309 }, - { url = "https://files.pythonhosted.org/packages/ea/ea/4ab009e953bca6ff35ad75b8ab58c0923308636c182c145dc63084f7d136/torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113", size = 797111232 }, - { url = "https://files.pythonhosted.org/packages/8f/a1/b31f94b4631c1731261db9fdc9a749ef58facc3b76094a6fe974f611f239/torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8", size = 89719574 }, - { url = "https://files.pythonhosted.org/packages/5a/6a/775b93d6888c31f1f1fc457e4f5cc89f0984412d5dcdef792b8f2aa6e812/torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c", size = 199436128 }, - { url = "https://files.pythonhosted.org/packages/1f/34/c93873c37f93154d982172755f7e504fdbae6c760499303a3111ce6ce327/torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea", size = 62145176 }, - { url = "https://files.pythonhosted.org/packages/cc/df/5204a13a7a973c23c7ade615bafb1a3112b5d0ec258d8390f078fa4ab0f7/torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042", size = 797019590 }, - { url = "https://files.pythonhosted.org/packages/4f/16/d23a689e5ef8001ed2ace1a3a59f2fda842889b0c3f3877799089925282a/torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d", size = 89613802 }, - { url = "https://files.pythonhosted.org/packages/a8/e0/ca8354dfb8d834a76da51b06e8248b70fc182bc163540507919124974bdf/torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c", size = 199387694 }, - { url = "https://files.pythonhosted.org/packages/ac/30/8b6f77ea4ce84f015ee024b8dfef0dac289396254e8bfd493906d4cbb848/torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d", size = 62123443 }, - { url = "https://files.pythonhosted.org/packages/14/d6/caa3ccde685a3bfedeed1454d82b2eb520e611d1b36bf748f54475de333f/torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d", size = 797088350 }, - { url = "https://files.pythonhosted.org/packages/3d/5d/4e9a7e5b7f11710519c38fe6a9f588a91fd23e6e9722e79f90f03823222d/torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db", size = 89706796 }, - { url = "https://files.pythonhosted.org/packages/ef/44/238ef95daf345bab21afa0ca37b2896dfc20cd93b6b75722717685fdeb10/torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846", size = 199332260 }, - { url = "https://files.pythonhosted.org/packages/e7/81/c05013695bfb3762f3c657a557407f152a0a0452b3ccec437a4a59848fb5/torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec", size = 62139344 }, + { url = "https://files.pythonhosted.org/packages/cb/e2/1bd899d3eb60c6495cf5d0d2885edacac08bde7a1407eadeb2ab36eca3c7/torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3", size = 779135478 }, + { url = "https://files.pythonhosted.org/packages/d5/67/93143534e1c1293a08fcb96cced205c199c6ae9306707b1a29f533e359f0/torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308", size = 86932717 }, + { url = "https://files.pythonhosted.org/packages/85/fc/ee5bb50eff313149657f173b003649677e27fa3aaae1ecc806add37f017c/torch-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:32b05fe0d1ada7f69c9f86c14ff69b0ef1957a5a54199bacba63d22d8fab720b", size = 159777142 }, + { url = "https://files.pythonhosted.org/packages/2c/52/7ab0a00b54aa1651e79a9ebc721d45fba86d8c8ab65c4ec6e0a49f09527a/torch-2.3.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7c09a94362778428484bcf995f6004b04952106aee0ef45ff0b4bab484f5498d", size = 61002907 }, + { url = "https://files.pythonhosted.org/packages/07/9a/4c5e74264439837814656201da13a898056a5201c976ef042544bceb840f/torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b2ec81b61bb094ea4a9dee1cd3f7b76a44555375719ad29f05c0ca8ef596ad39", size = 779156414 }, + { url = "https://files.pythonhosted.org/packages/5c/dc/82b5314ffcffa071440108fdccf59159abcd937b8e4d53f3237914089e60/torch-2.3.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:490cc3d917d1fe0bd027057dfe9941dc1d6d8e3cae76140f5dd9a7e5bc7130ab", size = 86949326 }, + { url = "https://files.pythonhosted.org/packages/d3/1d/a257913c89572de61316461db91867f87519146e58132cdeace3d9ffbe1f/torch-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5802530783bd465fe66c2df99123c9a54be06da118fbd785a25ab0a88123758a", size = 159781829 }, + { url = "https://files.pythonhosted.org/packages/d0/5f/f41b14a398d484bf218d5167ec9061c1e76f500d9e25166117818c8bacda/torch-2.3.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:a7dd4ed388ad1f3d502bf09453d5fe596c7b121de7e0cfaca1e2017782e9bbac", size = 61007595 }, + { url = "https://files.pythonhosted.org/packages/f3/82/68ccd49add4d21937f087871350905ffc709f32c92bf95334e7abf442147/torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:a486c0b1976a118805fc7c9641d02df7afbb0c21e6b555d3bb985c9f9601b61a", size = 779079866 }, + { url = "https://files.pythonhosted.org/packages/1b/a1/e8b286b85f19dd701a4b853c0554898b1fa69cea552c7d1ec39bc86f59aa/torch-2.3.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:224259821fe3e4c6f7edf1528e4fe4ac779c77addaa74215eb0b63a5c474d66c", size = 86853451 }, + { url = "https://files.pythonhosted.org/packages/af/77/cf6ceb000f8a064c7b373fb3471d85bcc39917d175af82fead4a2857c669/torch-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5fdccbf6f1334b2203a61a0e03821d5845f1421defe311dabeae2fc8fbeac2d", size = 159727172 }, + { url = "https://files.pythonhosted.org/packages/49/b6/1a2e3d43d4bc4ad7a4575b3745d707a68d5ed00ba263b205b6281bdd0921/torch-2.3.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:3c333dc2ebc189561514eda06e81df22bf8fb64e2384746b2cb9f04f96d1d4c8", size = 60978559 }, + { url = "https://files.pythonhosted.org/packages/74/b3/1febb6be57a4f68cb55ea178f5ffca6a10b01b47e182f7b76eddd9168632/torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aaa872abde9a3d4f91580f6396d54888620f4a0b92e3976a6034759df4b961ad", size = 779131284 }, + { url = "https://files.pythonhosted.org/packages/2c/63/1654275881bb550a6e6beebef4590aee702fce1cf3d12880b7da7091e248/torch-2.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:3d7a7f7ef21a7520510553dc3938b0c57c116a7daee20736a9e25cbc0e832bdc", size = 86933061 }, + { url = "https://files.pythonhosted.org/packages/19/b8/9f9f6b40d6b485f42ef560990e27722046d3bcd0ebcde47d54adc2d74432/torch-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:4777f6cefa0c2b5fa87223c213e7b6f417cf254a45e5829be4ccd1b2a4ee1011", size = 159723492 }, + { url = "https://files.pythonhosted.org/packages/3e/17/d605f9b95078fb9a4a5d931480b5d35755dc8018349bf70c859f0be47c6d/torch-2.3.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:2bb5af780c55be68fe100feb0528d2edebace1d55cb2e351de735809ba7391eb", size = 61003330 }, ] [[package]] name = "torchvision" -version = "0.19.1" +version = "0.18.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, @@ -4745,22 +4777,22 @@ dependencies = [ { name = "torch" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/90/cab820b96d4d1a36b088774209d2379cf49eda8210c8fee13552383860b7/torchvision-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:54e8513099e6f586356c70f809d34f391af71ad182fe071cc328a28af2c40608", size = 1660236 }, - { url = "https://files.pythonhosted.org/packages/72/55/e0b3821c5595a9a2c8ec98d234b4a0d1142d91daac61f007503d3158f857/torchvision-0.19.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:20a1f5e02bfdad7714e55fa3fa698347c11d829fa65e11e5a84df07d93350eed", size = 7026373 }, - { url = "https://files.pythonhosted.org/packages/db/71/da0f71c2765feee125b1dc280a6432aa88c510aedf9a36987f3fe7ed05ea/torchvision-0.19.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:7b063116164be52fc6deb4762de7f8c90bfa3a65f8d5caf17f8e2d5aadc75a04", size = 14072253 }, - { url = "https://files.pythonhosted.org/packages/f7/8e/cbae11f8046d433881b478afc9e7589a76158124779cbc3a40163ec716bf/torchvision-0.19.1-cp310-cp310-win_amd64.whl", hash = "sha256:f40b6acabfa886da1bc3768f47679c61feee6bde90deb979d9f300df8c8a0145", size = 1288329 }, - { url = "https://files.pythonhosted.org/packages/66/f6/a2f07a3f5385b37c45b8e14448b8610a8618dfad18ea437cb23b4edc50c5/torchvision-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:40514282b4896d62765b8e26d7091c32e17c35817d00ec4be2362ea3ba3d1787", size = 1660235 }, - { url = "https://files.pythonhosted.org/packages/28/9d/40d1b943bbbd02a30d6b4f691d6de37a7e4c92f90bed0f8f47379e90eec6/torchvision-0.19.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:5a91be061ae5d6d5b95e833b93e57ca4d3c56c5a57444dd15da2e3e7fba96050", size = 7026152 }, - { url = "https://files.pythonhosted.org/packages/36/04/36e1d35b864f4a7c8f3056a427542b14b3bcdbc66edd36faadee109b86c5/torchvision-0.19.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d71a6a6fe3a5281ca3487d4c56ad4aad20ff70f82f1d7c79bcb6e7b0c2af00c8", size = 14072255 }, - { url = "https://files.pythonhosted.org/packages/f8/69/dc769cf54df8e828c0b8957b4521f35178f5bd4cc5b8fbe8a37ffd89a27c/torchvision-0.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:70dea324174f5e9981b68e4b7cd524512c106ba64aedef560a86a0bbf2fbf62c", size = 1288330 }, - { url = "https://files.pythonhosted.org/packages/a4/d0/b1029ab95d9219cac2dfc0d835e9ab4cebb01f5cb6b48e736778020fb995/torchvision-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27ece277ff0f6cdc7fed0627279c632dcb2e58187da771eca24b0fbcf3f8590d", size = 1660230 }, - { url = "https://files.pythonhosted.org/packages/8b/34/fdd2d9e01228a069b28473a7c020bf1812c8ecab8565666feb247659ed30/torchvision-0.19.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:c659ff92a61f188a1a7baef2850f3c0b6c85685447453c03d0e645ba8f1dcc1c", size = 7026404 }, - { url = "https://files.pythonhosted.org/packages/da/b2/9da42d67dfc30d9e3b161f7a37f6c7eca86a80e6caef4a9aa11727faa4f5/torchvision-0.19.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:c07bf43c2a145d792ecd9d0503d6c73577147ece508d45600d8aac77e4cdfcf9", size = 14072022 }, - { url = "https://files.pythonhosted.org/packages/6b/b2/fd577e1622b43cdeb74782a60cea4909f88f471813c215ea7b4e7ea84a74/torchvision-0.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b4283d283675556bb0eae31d29996f53861b17cbdcdf3509e6bc050414ac9289", size = 1288328 }, - { url = "https://files.pythonhosted.org/packages/61/37/3aff3b9d89b8676f11702840fba7d7ef1d8e91d750426214cc55f6c3fee1/torchvision-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:731f434d91586769e255b5d70ed1a4457e0a1394a95f4aacf0e1e7e21f80c098", size = 1660246 }, - { url = "https://files.pythonhosted.org/packages/c3/4f/67b40e50d5dd1f9200421ab31b17d337162742b4f6676a0f4a917b3acdf1/torchvision-0.19.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:febe4f14d4afcb47cc861d8be7760ab6a123cd0817f97faf5771488cb6aa90f4", size = 7027693 }, - { url = "https://files.pythonhosted.org/packages/6d/11/16d63ede75bd1433aa84f1d9156b058b3ed4976749972220a90c13a1df64/torchvision-0.19.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e328309b8670a2e889b2fe76a1c2744a099c11c984da9a822357bd9debd699a5", size = 1702641 }, - { url = "https://files.pythonhosted.org/packages/b0/7b/2e55c0c613af4df93ef5b9ab8f652226c07b3fb9b0c742ceedcdd6cec2e5/torchvision-0.19.1-cp39-cp39-win_amd64.whl", hash = "sha256:6616f12e00a22e7f3fedbd0fccb0804c05e8fe22871668f10eae65cf3f283614", size = 1288330 }, + { url = "https://files.pythonhosted.org/packages/f2/31/867be50508348030afea933e859bd7bbeb86924a6c2e35faf7777fbd6f55/torchvision-0.18.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e694e54b0548dad99c12af6bf0c8e4f3350137d391dcd19af22a1c5f89322b3", size = 1555036 }, + { url = "https://files.pythonhosted.org/packages/08/04/17425bf3c0620465ee182cea5c674db4debab87ed0627145d38039cb2a9e/torchvision-0.18.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:0b3bda0aa5b416eeb547143b8eeaf17720bdba9cf516dc991aacb81811aa96a5", size = 6955008 }, + { url = "https://files.pythonhosted.org/packages/9d/7e/38b7d6689e988f23a2c07782e045abaf2d54c7b63086f164c4dbd41228b5/torchvision-0.18.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:573ff523c739405edb085f65cb592f482d28a30e29b0be4c4ba08040b3ae785f", size = 13995860 }, + { url = "https://files.pythonhosted.org/packages/4e/62/3816637079b77875077678bd7087285a5b5589664f94f5ceb2d080cc024c/torchvision-0.18.1-cp310-cp310-win_amd64.whl", hash = "sha256:ef7bbbc60b38e831a75e547c66ca1784f2ac27100f9e4ddbe9614cef6cbcd942", size = 1183257 }, + { url = "https://files.pythonhosted.org/packages/0f/02/8d3f83e01cf7fd72884fe84d4ff737ce774c5a7653bb826cf7acd39179f3/torchvision-0.18.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:80b5d794dd0fdba787adc22f1a367a5ead452327686473cb260dd94364bc56a6", size = 1555041 }, + { url = "https://files.pythonhosted.org/packages/82/d8/fad23c368781b6e6df254287511683b4d151132de64c47a6fea5c3280ba6/torchvision-0.18.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:9077cf590cdb3a5e8fdf5cdb71797f8c67713f974cf0228ecb17fcd670ab42f9", size = 6955165 }, + { url = "https://files.pythonhosted.org/packages/04/d8/13287fc08ed60553033233f47478b8af9f56e2432333e9ad1400a28084db/torchvision-0.18.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ceb993a882f1ae7ae373ed39c28d7e3e802205b0e59a7ed84ef4028f0bba8d7f", size = 13995865 }, + { url = "https://files.pythonhosted.org/packages/e4/c3/a21a75dd2de8114a6876f16a36b033e3e62f8ade68085a711b24f4b57c17/torchvision-0.18.1-cp311-cp311-win_amd64.whl", hash = "sha256:52f7436140045dc2239cdc502aa76b2bd8bd676d64244ff154d304aa69852046", size = 1183279 }, + { url = "https://files.pythonhosted.org/packages/4e/90/92c927d4ca82934c3a1cb0ee3374067bd79f7a465395c1261d3eb17a511f/torchvision-0.18.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2be6f0bf7c455c89a51a1dbb6f668d36c6edc479f49ac912d745d10df5715657", size = 1555035 }, + { url = "https://files.pythonhosted.org/packages/cf/ea/02b9fdc72aac151313c95f214fdbb50f95152f33e7cb7ae4e14d717bfa01/torchvision-0.18.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:f118d887bfde3a948a41d56587525401e5cac1b7db2eaca203324d6ed2b1caca", size = 6955220 }, + { url = "https://files.pythonhosted.org/packages/a0/55/21e2849ecdab10a04392e2f892f5c70eff72adf71507e852e091e8c5f88f/torchvision-0.18.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:13d24d904f65e62d66a1e0c41faec630bc193867b8a4a01166769e8a8e8df8e9", size = 13996690 }, + { url = "https://files.pythonhosted.org/packages/0f/05/30ed3e81a610a236426eed6ac57ae0f1bbc4d526008e331f750e34e88c06/torchvision-0.18.1-cp312-cp312-win_amd64.whl", hash = "sha256:ed6340b69a63a625e512a66127210d412551d9c5f2ad2978130c6a45bf56cd4a", size = 1183281 }, + { url = "https://files.pythonhosted.org/packages/e8/26/e414c50dd13a19f5a4a68e32503e86066ce1b2cdf84b6ec49d25e3a85465/torchvision-0.18.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:975b8594c0f5288875408acbb74946eea786c5b008d129c0d045d0ead23742bc", size = 1555078 }, + { url = "https://files.pythonhosted.org/packages/77/b7/76e7c97aea1df00aabef8413d0048c0f693bb44027e57362768199f4e8e8/torchvision-0.18.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:da83c8bbd34d8bee48bfa1d1b40e0844bc3cba10ed825a5a8cbe3ce7b62264cd", size = 6956234 }, + { url = "https://files.pythonhosted.org/packages/09/93/29e01f8b4cd81d16b3020f3dab0c7837efc1c099b8998fd581e36e4dbdd7/torchvision-0.18.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:54bfcd352abb396d5c9c237d200167c178bd136051b138e1e8ef46ce367c2773", size = 1626207 }, + { url = "https://files.pythonhosted.org/packages/3f/73/5dadc116a8e6115f0f3c015ed0c415d301330ddb385bc6c501686e019443/torchvision-0.18.1-cp39-cp39-win_amd64.whl", hash = "sha256:5c8366a1aeee49e9ea9e64b30d199debdf06b1bd7610a76165eb5d7869c3bde5", size = 1183257 }, ] [[package]] @@ -4825,16 +4857,16 @@ wheels = [ [[package]] name = "triton" -version = "3.0.0" +version = "2.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock", marker = "(python_full_version < '3.10' and platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "filelock", marker = "(python_full_version < '3.10' and platform_machine != 'arm64' and sys_platform == 'darwin') or (python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/45/27/14cc3101409b9b4b9241d2ba7deaa93535a217a211c86c4cc7151fb12181/triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a", size = 209376304 }, - { url = "https://files.pythonhosted.org/packages/33/3e/a2f59384587eff6aeb7d37b6780de7fedd2214935e27520430ca9f5b7975/triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c", size = 209438883 }, - { url = "https://files.pythonhosted.org/packages/fe/7b/7757205dee3628f75e7991021d15cd1bd0c9b044ca9affe99b50879fc0e1/triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb", size = 209464695 }, - { url = "https://files.pythonhosted.org/packages/6c/bf/55cccf57c14787ad81ee827526ddd48fd0aff0291fcc7b8c2e2bdf28da0a/triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609", size = 209377082 }, + { url = "https://files.pythonhosted.org/packages/d7/69/8a9fde07d2d27a90e16488cdfe9878e985a247b2496a4b5b1a2126042528/triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33", size = 168055249 }, + { url = "https://files.pythonhosted.org/packages/64/16/956b7b9d2ed3a437a1a06792b2ae2e3c49147296ba2f4d59fcee376ded8f/triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e", size = 168079264 }, + { url = "https://files.pythonhosted.org/packages/ea/a4/e66cbd7befaf44a84cfb367b00a0331735cd56d4b2076533dec9b0b255fe/triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10", size = 168090656 }, + { url = "https://files.pythonhosted.org/packages/fe/31/a3783aaab3a75d8b622b0fa822eb3ae95063dec8e866a18d574ae64f33bd/triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124", size = 168051040 }, ] [[package]] From 6be6179e197ddf0ae21131e2e27b8e2c903ca636 Mon Sep 17 00:00:00 2001 From: Abhijit Pujare Date: Thu, 27 Mar 2025 10:00:24 -0700 Subject: [PATCH 2/5] Fixing output of cells for earnings_calls ingestion --- Earnings_Call_Ingestion_Script.ipynb | 7950 +------------------------- 1 file changed, 13 insertions(+), 7937 deletions(-) diff --git a/Earnings_Call_Ingestion_Script.ipynb b/Earnings_Call_Ingestion_Script.ipynb index f1e0065..228c265 100644 --- a/Earnings_Call_Ingestion_Script.ipynb +++ b/Earnings_Call_Ingestion_Script.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8c351421-90b1-421a-8ba3-7abe33c7ce7a", "metadata": {}, "outputs": [], @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "558b6660-c646-47b7-9f0a-08ab39c42606", "metadata": {}, "outputs": [], @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "101c2417-2e9e-4e32-ad77-31f88c287b21", "metadata": {}, "outputs": [], @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "b497902d-6f93-40f1-b4e6-b24ef7d096d0", "metadata": {}, "outputs": [], @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "4de771fb-da21-4855-beba-a4a5d49d338c", "metadata": {}, "outputs": [], @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "8b424f07-750b-488d-91cf-c2c23186ed00", "metadata": {}, "outputs": [], @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "533f2ff7-0a6e-4032-91c0-7aa6b35bfc34", "metadata": {}, "outputs": [], @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "cf5413c3-cb14-4bf5-917d-125784096c26", "metadata": {}, "outputs": [], @@ -141,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "27170a2a-47f1-4f19-93b0-1c314757e284", "metadata": {}, "outputs": [], @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "71d5158d-9e31-432b-af55-c283905f3f8e", "metadata": {}, "outputs": [], @@ -211,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "2d7db173-2116-4d8f-b8ea-f266be975eff", "metadata": {}, "outputs": [], @@ -225,7 +225,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "9cec59d1-f457-4509-98f2-ac8b62cdf19c", "metadata": {}, "outputs": [], @@ -242,7931 +242,7 @@ "execution_count": null, "id": "f2871d26-680a-47c2-b6b1-1ccaafa1f0e2", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/ty/flsmmnn17p314__gg336qjy00000gn/T/ipykernel_77841/3213490678.py:1: FutureWarning: Class aryn is experimental and may change in the future.\n", - " finalDocSet.embed(embedder=OpenAIEmbedder(model_name=model_name)).write.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=docset.value.docset_id, aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")\n", - "/Users/abhijitpujare/workspace/haystack-workshop-2025/.venv/lib/python3.11/site-packages/sycamore/writer.py:861: FutureWarning: Class ArynWriter is experimental and may change in the future.\n", - " writer: Node = ArynWriter(self.plan, client_params=client_params, target_params=target_params, **kwargs)\n", - "/Users/abhijitpujare/workspace/haystack-workshop-2025/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2025-03-26 16:46:26,934\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", - "2025-03-26 16:46:27,907\tINFO worker.py:1832 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32mhttp://127.0.0.1:8265 \u001b[39m\u001b[22m\n", - "2025-03-26 16:46:28,630\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", - "2025-03-26 16:46:30,038\tWARNING util.py:576 -- The argument ``compute`` is deprecated in Ray 2.9. Please specify argument ``concurrency`` instead. For more information, see https://docs.ray.io/en/master/data/transforming-data.html#stateful-transforms.\n", - "(pid=77873)2025-03-26 16:46:30,046\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-46-26_951347_77841/logs/ray-data\n", - "2025-03-26 16:46:30,046\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> ActorPoolMapOperator[ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)] -> TaskPoolMapOperator[MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt)]\n", - " INFO:root:Spurious log 1: Verifying that log messages are propagated\n", - "Running 0: 0.00 row [00:00, ? row/s]\n", - "- ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap) 1: 0.00 row [00:00, ? row/s]2025-03-26 16:46:31,361\tWARNING progress_bar.py:120 -- Truncating long operator name to 100 characters. To disable this behavior, set `ray.data.DataContext.get_current().DEFAULT_ENABLE_PROGRESS_BAR_NAME_TRUNCATION = False`.\n", - "\n", - "\n", - " 0 row [00:00, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 0.0B/1.0GB object store: : 0.00 row [00:01, ? row/s]\n", - "- ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap) 1: 0.00 row [00:00, ? row/s]\n", - "\n", - "- MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->...->MapBatches(_write_docs_tt) 2: 0.00 row [00:00, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-4dvkd9uyoo8krhhs5bmkndg\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-4dvkd9uyoo8krhhs5bmkndg\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:01, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:02, ? row/s]\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.75: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.75: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.75: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.75: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.85: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.85: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.18: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.18: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.31: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.31: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.57: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.57: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:02, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.76: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:02, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.76: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.92: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.92: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:03, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.14: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.14: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.29: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.29: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.46: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.46: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.67: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:03, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:03, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.67: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.99: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.99: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.00: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.00: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s](pid=77876)\n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s] object store: : 0.00 row [00:04, ? row/s] [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.15: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.15: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.67: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.67: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.80: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:04, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.80: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:04, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:04, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.35: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.35: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.37: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.37: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.51: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:05, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.51: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:05, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:05, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.23: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.23: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.34: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.34: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.80: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.80: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.88: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:06, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.88: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:06, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:06, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:07, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:07, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:08, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:07, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:07, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-aywcdsx681bfya8pwtutmww\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-aywcdsx681bfya8pwtutmww\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:08, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:08, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.94: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.94: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.16: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.16: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.29: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.29: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.55: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:10, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.55: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.73: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.73: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.91: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:09, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.91: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:09, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:09, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.08: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.08: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:11, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.24: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.24: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.42: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.42: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.59: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.59: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.76: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.76: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.93: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.93: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:10, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:10, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.10: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.10: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:11, ? row/s]\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.38: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.45: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.45: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.63: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.63: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.76: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.76: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:11, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:11, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:13, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.34: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:12, ? row/s]\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.96: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.96: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.04: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - " object store: : 0.00 row [00:12, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.04: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 256.0MB/1.0GB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: : 0.00 row [00:12, ? row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: : 0.00 row [00:12, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 0%| | 0.00/177 [00:13...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 2%|▉ | 3.00/177 [00:13<13:19, 4.59s/ row]\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 554.0KB/1.0GB object store: : 0.00 row [00:15, ? row/s]ks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 2%|▉ | 3.00/177 [00:13<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:13, ? row/s]\n", - "\n", - " 0MB object store: : 0.00 row [00:13, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 554.0KB/1.0GB object store: : 0.00 row [00:15, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 1 [backpressured]; Actors: 1; Queued blocks: 58; Resources: 1.0 CPU, 256.0MB object store; [locality off]: 2%|▉ | 3.00/177 [00:14<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:14, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:14<13:19, 4.59s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:14<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:14, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-u9hgsqsldoi0j5e71uw3lw1\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-u9hgsqsldoi0j5e71uw3lw1\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.65: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.65: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.68: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.68: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.86: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.86: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(pid=77877) INFO:root:Spurious log 1: Verifying that log messages are propagated \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:15, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:15<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:15, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.18: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.18: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.32: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.32: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.55: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.55: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.79: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.79: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.99: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.99: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:16, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:16<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:16, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.17: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.17: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.33: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.33: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.62: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.62: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.69: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.69: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.87: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:17, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.87: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:17<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:17, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.05: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.05: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.23: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.23: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.41: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.41: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.59: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.59: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.96: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:18, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.96: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:18<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:18, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.12: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.12: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.34: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.34: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.64: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.64: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.78: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.78: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:19, ? row/s]\n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\" [repeated 2x across cluster]\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:19<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:19, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:20<13:19, 4.59s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:21, ? row/s]0MB object store: : 0.00 row [00:20, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:21, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.12: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:21, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.12: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:21, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.16: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:21, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.16: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:22, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:21<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:21, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:22<13:19, 4.59s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:23, ? row/s]0MB object store: : 0.00 row [00:22, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:24, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:24, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-o4kfhvxsic918s6dmk0rkye\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-o4kfhvxsic918s6dmk0rkye\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:23, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:23<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:23, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.02: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.02: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.23: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.23: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.34: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.34: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.34: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.34: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.49: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.49: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.49: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.49: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.49: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:24<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:24, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.49: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.60: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.60: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.73: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.73: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.83: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.83: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.93: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.93: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.06: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.06: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.26: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:26, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.26: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:27, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.84: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:27, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:25, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.84: Completed work on page 15\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:27, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:25<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:25, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.05: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.05: Completed work on page 16\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.17: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.17: Completed work on page 17\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.52: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.52: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.58: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.58: Completed work on page 21\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.68: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.68: Completed work on page 22\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.68: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.68: Completed work on page 20\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.87: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.87: Completed work on page 23\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:26<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:26, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.03: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.03: Completed work on page 18\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:27, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.19: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.19: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.28: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.28: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.35: Completed work on page 27\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.35: Completed work on page 27\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.44: Completed work on page 26\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.44: Completed work on page 26\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.63: Completed work on page 30\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.63: Completed work on page 30\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.82: Completed work on page 28\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.82: Completed work on page 28\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:27<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:27, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.88: Completed work on page 29\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.88: Completed work on page 29\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.97: Completed work on page 33\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.97: Completed work on page 33\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:28, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.17: Completed work on page 32\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.17: Completed work on page 32\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.66: Completed work on page 35\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.66: Completed work on page 35\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.72: Completed work on page 34\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.72: Completed work on page 34\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.72: Completed work on page 36\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.72: Completed work on page 36\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.72: Completed work on page 37\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.72: Completed work on page 37\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.85: Completed work on page 31\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.85: Completed work on page 31\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.97: Completed work on page 38\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:28<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:28, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.97: Completed work on page 38\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:29, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.10: Completed work on page 42\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:31, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:29, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.10: Completed work on page 42\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:31, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:29<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:29, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:30, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.22: Completed work on page 40\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:30, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.22: Completed work on page 40\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:30, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.35: Completed work on page 39\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:30, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.35: Completed work on page 39\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:30, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.41: Completed work on page 41\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - " 0MB object store: : 0.00 row [00:30, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.41: Completed work on page 41\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 257.1MB/1.0GB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 54; Resources: 1.0 CPU, 1.1MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:30<13:19, 4.59s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 256.0MB object store: : 0.00 row [00:30, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 2%|█▏ | 3.00/177 [00:31<13:19, 4.59s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:31<15:26, 5.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:31<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 2 [backpressured]; Queued blocks: 0; Resources: 2.0 CPU, 512.0MB object store: : 0.00 row [00:31, ? row/s]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:31, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-r7450dne5dssfkfw1nz1sr5\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-r7450dne5dssfkfw1nz1sr5\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.90: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.90: Completed work on page 2\n", - "\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.90: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.90: Completed work on page 3\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(pid=77886) INFO:root:Spurious log 1: Verifying that log messages are propagated \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:32<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:32, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.97: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.97: Completed work on page 1\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.10: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.10: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.27: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.27: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.45: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.45: Completed work on page 6\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.65: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.65: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.83: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.83: Completed work on page 8\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 2 [backpressured]; Queued blocks: 0; Resources: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:33<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:33, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.03: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.03: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.21: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.21: Completed work on page 10\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.41: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.41: Completed work on page 11\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:35, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.58: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.58: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.77: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.77: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.94: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - " s: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 515.9MB/1.0GB object store: : 0.00 row [00:36, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.9MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 2 [backpressured]; Queued blocks: 0; Resources: 2.0 CPU, 512.0MB object store: : 0.00 row [00:34, ? row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:34<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: : 0.00 row [00:34, ? row/s] \n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 0%| | 0.00/885 [00:34...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:34<33:47, 2.33s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.13: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.13: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:36<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.49: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.49: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.85: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.85: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.04: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.04: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:35<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:35<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:37<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.38: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.38: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.89: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.89: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.97: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.97: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:36<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:36<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.12: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.12: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.33: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.33: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:38<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.06: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:39<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.06: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:39<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:37<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:37<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:38<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.26: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:39<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:38<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.26: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:40<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:38<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:38<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:39<15:26, 5.42s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:40<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:40<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:41<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:40<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:40<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-pdk486xdb6z179odzq5ct11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-pdk486xdb6z179odzq5ct11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.04: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.04: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.04: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.04: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:41<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.78: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.78: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.92: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.92: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:42<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.16: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.16: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:43<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.16: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.16: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.40: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.40: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.40: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.40: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.74: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.74: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.85: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.85: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.85: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.85: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.96: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:42<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.96: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.3MB/1.0GB object store: 2%|██▌ | 15.0/885 [00:44<35:02, 2.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 1.9MB object store: 2%|█▋ | 15.0/885 [00:43<33:47, 2.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 2%|█▊ | 15.0/885 [00:43<33:47, 2.33s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:43<18:22, 1.29s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.07: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.07: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.50: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.50: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.56: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.56: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.65: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.65: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.66: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.66: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.75: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.75: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.75: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.75: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:43<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.94: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:45<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:44<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.23: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.23: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.29: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.29: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.50: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.50: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:44<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.50: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.50: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.00: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.00: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:46<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:45<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.14: Completed work on page 28 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.14: Completed work on page 28 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.43: Completed work on page 32 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.43: Completed work on page 32 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.50: Completed work on page 30 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.50: Completed work on page 30 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.50: Completed work on page 31 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.50: Completed work on page 31 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.50: Completed work on page 29 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.50: Completed work on page 29 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.69: Completed work on page 34 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.69: Completed work on page 34 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.69: Completed work on page 33 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.69: Completed work on page 33 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:45<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.69: Completed work on page 35 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.69: Completed work on page 35 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.88: Completed work on page 36 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.88: Completed work on page 36 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.98: Completed work on page 37 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.98: Completed work on page 37 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:47<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:46<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.07: Completed work on page 38 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.07: Completed work on page 38 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 41 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 41 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.68: Completed work on page 39 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.68: Completed work on page 39 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.77: Completed work on page 44 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.77: Completed work on page 44 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.77: Completed work on page 40 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.77: Completed work on page 40 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:46<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.77: Completed work on page 43 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.77: Completed work on page 43 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.96: Completed work on page 42 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.96: Completed work on page 42 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.02: Completed work on page 45 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.02: Completed work on page 45 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:48<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:47<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.72: Completed work on page 50 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.72: Completed work on page 50 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.81: Completed work on page 49 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.81: Completed work on page 49 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.82: Completed work on page 48 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.82: Completed work on page 48 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:49<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:47<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:47<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.13: Completed work on page 47 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.13: Completed work on page 47 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.15: Completed work on page 46 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.15: Completed work on page 46 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:50<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:48<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:48<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:49<15:26, 5.42s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.3MB/1.0GB object store: 3%|█████ | 30.0/885 [00:52<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 53; Resources: 1.0 CPU, 1.3MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:50<15:26, 5.42s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 3%|███▌ | 30.0/885 [00:50<18:22, 1.29s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 3%|██▍ | 6.00/177 [00:50<15:26, 5.42s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:50<16:21, 5.84s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:50<16:21, 5.84s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.4MB object store: 3%|███▍ | 30.0/885 [00:50<18:22, 1.29s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-kvwdbwmjdpron5iwrbq6wez \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-kvwdbwmjdpron5iwrbq6wez\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:51<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.65: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.65: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.94: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.2MB/1.0GB object store: 3%|█████ | 30.0/885 [00:53<18:53, 1.33s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 52; Resources: 1.0 CPU, 4.8MB object store; [locality off]: 5%|███▌ | 9.00/177 [00:52<16:21, 5.84s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.94: Completed work on page 3 \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 8%|████████▋ | 75.0/885 [01:54<15:28, 1.15s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:54<16:05, 1.21s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:55<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:55<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-3nv1slvshtvt64wut3xxrio \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:55<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-3nv1slvshtvt64wut3xxrio \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.02: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.02: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.02: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.02: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:54<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:54<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.00: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.00: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:56<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.36: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.36: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.51: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.51: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.51: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.51: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.75: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:55<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.75: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.86: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.86: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:56<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.06: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.06: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.06: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.06: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.17: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.17: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.38: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.38: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:57<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.38: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.38: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.48: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.48: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.59: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.59: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.78: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.78: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.78: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.78: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:56<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.87: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.87: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.96: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.96: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:57<16:05, 1.21s/ row]\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row] ArynPartitioner: T+ 3.15: Completed work on page 23apBatches(BaseMapTransformCallable___wrap)) pid=77875)\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.15: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.15: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.15: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.29: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.29: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:58<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.49: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.49: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.84: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.84: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.93: Completed work on page 31 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.93: Completed work on page 31 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:57<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:57<16:05, 1.21s/ row]\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.66: Completed work on page 29 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.66: Completed work on page 29 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.78: Completed work on page 28 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:58<16:05, 1.21s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.78: Completed work on page 28\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.85: Completed work on page 30 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.85: Completed work on page 30 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [01:59<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:58<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:00<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:59<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [01:59<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [01:59<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-9q5zy9urvbu79cvw42dhgf4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-9q5zy9urvbu79cvw42dhgf4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.66: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.66: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:01<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:00<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:00<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.05: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.05: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.27: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.27: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.50: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.50: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.66: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.66: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.90: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.90: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.00: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.00: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:02<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:01<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:01<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.16: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.16: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.35: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.35: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.50: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.50: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.77: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.77: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.85: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.85: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:03<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.02: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.02: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:02<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:02<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.20: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.20: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.37: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.37: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.53: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.53: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.80: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.80: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:04<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.89: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.89: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.05: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.05: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\" [repeated 2x across cluster]\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:03<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:03<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.36: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.36: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.65: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.65: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.89: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.89: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.96: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:05<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.96: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.4MB/1.0GB object store: 10%|███████████████ | 90.0/885 [02:06<16:09, 1.22s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 3.5MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 10%|██████████▍ | 90.0/885 [02:04<16:05, 1.21s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 1.8MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 48; Resources: 1.0 CPU, 1.8MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:04<13:21, 5.13s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 10%|██████████▌ | 90.0/885 [02:04<16:05, 1.21s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 12%|████████████▍ | 105/885 [02:04<13:37, 1.05s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.8MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:06<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 12%|████████▍ | 21.0/177 [02:05<13:21, 5.13s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:05<12:38, 4.96s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:05<12:38, 4.96s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 12%|████████████▎ | 105/885 [02:06<13:37, 1.05s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-fcefmenrarq8e5bb1yyb3rg \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-fcefmenrarq8e5bb1yyb3rg \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:06<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 12%|████████████▎ | 105/885 [02:07<13:37, 1.05s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.64: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.64: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:08<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.06: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:07<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.06: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.28: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.28: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.52: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.52: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.71: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.71: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 6.2MB/1.0GB object store: 12%|█████████████████▋ | 105/885 [02:09<13:39, 1.05s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 47; Resources: 1.0 CPU, 3.4MB object store; [locality off]: 14%|█████████▋ | 24.0/177 [02:08<12:38, 4.96s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.88: Completed work on page 8 \n", - "\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.07: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.07: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.29: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.29: Completed work on page 5\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.54: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.54: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.72: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.72: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.90: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:36<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.90: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:35<09:45, 4.65s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:35<09:25, 1.11 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.09: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.09: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.26: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.26: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.45: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.45: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.62: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.62: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.79: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.79: Completed work on page 13 \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.96: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.96: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:37<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:36<09:45, 4.65s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:36<09:25, 1.11 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.11: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.11: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.44: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.44: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.62: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.62: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.80: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.80: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.95: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.95: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:38<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:37<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:37<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.28: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.28: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.42: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.42: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.44: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.44: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.47: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.47: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:39<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 29%|████████████████████▍ | 51.0/177 [04:38<09:45, 4.65s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:38<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:38<09:42, 4.74s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:38<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:39<09:25, 1.11 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:40<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 38; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:39<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:39<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:39<09:42, 4.74s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:39<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-1vdx2lct3782x1vprr1ij71 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-1vdx2lct3782x1vprr1ij71 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.64: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.64: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.74: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.74: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.86: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.86: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:41<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:40<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:40<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.17: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.17: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.36: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.36: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.55: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.55: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.72: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.72: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.91: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.91: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:42<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:41<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:41<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.10: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.10: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.29: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.29: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.47: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.47: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.65: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.65: Completed work on page 12\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.83: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.83: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.01: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.01: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:43<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:42<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:42<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.18: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.18: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.36: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.36: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.54: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.54: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.72: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.72: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.05: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.05: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:44<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:43<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:43<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.23: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.23: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.40: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.40: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.57: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.57: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.96: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.96: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.08: Completed work on page 24\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.08: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\" [repeated 4x across cluster]\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:45<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:44<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:44<09:25, 1.11 row/s]\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.23: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.23: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.25: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) \n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:45<09:25, 1.11 row/s]\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:45<09:42, 4.74s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:46<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:46<09:42, 4.74s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:47<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:48<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-wxl681ch50401yw82tpxw8e \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-wxl681ch50401yw82tpxw8e \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:47<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.8MB object store: 29%|█████████████████████████████▉ | 255/885 [04:47<09:25, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.18: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.18: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.74: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.74: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 7.0MB/1.0GB object store: 29%|██████████████████████████████████████████▉ | 255/885 [04:49<09:27, 1.11 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 4.2MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:48<09:42, 4.74s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:50<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 29%|██████████████████████████████▎ | 255/885 [04:49<09:25, 1.11 row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 31%|████████████████████████████████ | 270/885 [04:49<09:55, 1.03 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.32: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:50<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.32: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.95: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.95: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:49<09:42, 4.74s/ row]\n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.30: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.30: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.54: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.54: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.64: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.6MB/1.0GB object store: 31%|█████████████████████████████████████████████▍ | 270/885 [04:51<09:43, 1.05 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 37; Resources: 1.0 CPU, 1.6MB object store; [locality off]: 31%|█████████████████████▋ | 54.0/177 [04:50<09:42, 4.74s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77886) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\" [repeated 2x across cluster]\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:35<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.12: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.12: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.12: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.12: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.23: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:34<07:40, 4.95s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.23: Completed work on page 4\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.45: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.45: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.69: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.69: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:35<08:09, 1.02s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.94: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.94: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:36<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.11: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.11: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.28: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.28: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.47: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.47: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.63: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.63: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:35<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.80: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.80: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.00: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.00: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:37<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:36<08:09, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.17: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.17: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.34: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.34: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.52: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.52: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.71: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.71: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:36<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.86: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.86: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:38<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:37<08:09, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.12: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.12: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.21: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:37<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.21: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.40: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.40: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.58: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.58: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.75: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.75: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.89: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.89: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:39<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:38<08:09, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.18: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.18: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.40: Completed work on page 28 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.40: Completed work on page 28 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.42: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.42: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:38<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.43: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.43: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:40<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:39<08:09, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-atacj8ysncrz3as573yyo1f \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-atacj8ysncrz3as573yyo1f \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" [repeated 3x across cluster] \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:41<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:39<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:39<08:09, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.65: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.65: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.74: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.74: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.89: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.89: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 46%|████████████████████████████████████████████████████████████████████▏ | 405/885 [07:42<08:10, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:40<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 3.0MB object store: 46%|███████████████████████████████████████████████▌ | 405/885 [07:40<08:09, 1.02s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 46%|████████████████████████████████████████████████ | 405/885 [07:41<08:09, 1.02s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:41<07:11, 1.08 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.08: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.08: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.28: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.28: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.50: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.50: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.79: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.79: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:41<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.97: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.97: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:43<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:42<07:11, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.14: Completed work on page 9\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.14: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.33: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.33: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.53: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.53: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.71: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.71: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.93: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.93: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:44<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:42<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:42<07:11, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s] ArynPartitioner: T+ 3.24: Completed work on page 14\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.24: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.41: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.41: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.59: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.59: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.76: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.76: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.95: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:43<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.95: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:45<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:44<07:11, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.13: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.13: Completed work on page 19\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.32: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.32: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.48: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.48: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:44<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.68: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.68: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.84: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.84: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.05: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.05: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:46<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:45<07:11, 1.08 row/s]\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.32: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:45<07:11, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.32: Completed work on page 25\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.40: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.40: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.46: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.46: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:45<07:40, 4.95s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.7MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:47<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 27; Resources: 1.0 CPU, 1.7MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:46<07:40, 4.95s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 47%|█████████████████████████████████████████████████▊ | 420/885 [07:46<07:11, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 47%|█████████████████████████████████▋ | 84.0/177 [07:46<07:40, 4.95s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:46<07:17, 4.86s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:46<07:17, 4.86s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 47%|█████████████████████████████████████████████████▎ | 420/885 [07:46<07:11, 1.08 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-frgsvlh90286wc0wkj0nske\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-frgsvlh90286wc0wkj0nske \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.73: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.73: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.75: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 47%|█████████████████████████████████████████████████▎ | 420/885 [07:47<07:11, 1.08 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.75: Completed work on page 2\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:47<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.06: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.06: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:49<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.22: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.22: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.42: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.42: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.52: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.52: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.81: Completed work on page 7\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.81: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:50<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:48<07:17, 4.86s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.9MB object store: 47%|█████████████████████████████████████████████████▎ | 420/885 [07:48<07:11, 1.08 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.01: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.01: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.21: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.21: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.38: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.38: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.56: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.56: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.76: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.76: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 13\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.2MB/1.0GB object store: 47%|██████████████████████████████████████████████████████████████████████▋ | 420/885 [07:51<07:10, 1.08 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 26; Resources: 1.0 CPU, 2.2MB object store; [locality off]: 49%|██████████████████████████████████▉ | 87.0/177 [07:49<07:17, 4.86s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:49<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-dy062ooso38xhekxp2ljolf \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-dy062ooso38xhekxp2ljolf \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.03: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.03: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.89: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.89: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.90: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.90: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:50<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:50<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.00: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.00: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.00: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.00: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.14: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.14: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.25: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.25: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.40: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.40: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.50: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.50: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.61: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.61: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.75: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.75: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.85: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.85: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapBatches(materialize)->MapBatches(split_doc)->MapBatches(llm_map)->MapBatches(parse_json_and_cast)->MapBatches(process_doc)->MapBatches(removeOriginalElements)->MapBatches(process_doc)->MapBatches(mergeDialogue)->MapBatches(process_doc)->MapBatches(OpenAIEmbedder)->MapBatches(_write_docs_tt) pid=77877) INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\" [repeated 3x across cluster]\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:51<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:51<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.05: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.05: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.05: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.05: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.23: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.23: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.68: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.68: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.85: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.85: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.94: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.94: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:52<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:52<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.13: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.13: Completed work on page 18 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.62: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.62: Completed work on page 20 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.68: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.68: Completed work on page 19 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 22 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.77: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.77: Completed work on page 24 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.96: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.96: Completed work on page 23 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.5MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:53<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████▉ | 729/843 [10:53<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.18: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.18: Completed work on page 21 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.40: Completed work on page 28 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.40: Completed work on page 28 \n", - "\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 86%|██████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:54<00:59, 1.91 row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 87%|██████████████████████████████████████████████████████████████████████████████████████████▉ | 729/842 [10:54<00:59, 1.91 row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:54<00:51, 1.95 row/s]\n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 3.7MB/1.0GB object store: 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 729/843 [10:55<00:59, 1.91 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.46: Completed work on page 27\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.46: Completed work on page 27 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.46: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.46: Completed work on page 26 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.46: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.46: Completed work on page 25 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.65: Completed work on page 29 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.65: Completed work on page 29 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.74: Completed work on page 31 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.74: Completed work on page 31 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 4.93: Completed work on page 33 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 4.93: Completed work on page 33 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:55<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:54<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:54<00:51, 1.95 row/s]\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.02: Completed work on page 30 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.02: Completed work on page 30 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.32: Completed work on page 32 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.32: Completed work on page 32 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.56: Completed work on page 35 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.56: Completed work on page 35___wrap)) pid=77875)\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 37 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 37 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 34 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 34 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 5.62: Completed work on page 36 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 5.62: Completed work on page 36 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:55<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.18: Completed work on page 38 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.18: Completed work on page 38 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.24: Completed work on page 39 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.24: Completed work on page 39 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.32: Completed work on page 42 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.32: Completed work on page 42 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.62: Completed work on page 41\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.62: Completed work on page 41 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 6.94: Completed work on page 40 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 6.94: Completed work on page 40 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:56<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:56<00:51, 1.95 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.02: Completed work on page 43 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.02: Completed work on page 43 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.28: Completed work on page 44 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.28: Completed work on page 44 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.39: Completed work on page 49 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.39: Completed work on page 49 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.50: Completed work on page 45\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.50: Completed work on page 45 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.50: Completed work on page 46 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.50: Completed work on page 46 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 7.76: Completed work on page 47 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 7.76: Completed work on page 47___wrap)) pid=77875)\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 8.00: Completed work on page 48 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 8.00: Completed work on page 48 \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 1.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:57<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 0.0B object store: 88%|████████████████████████████████████████████████████████████████████████████████████████████▌ | 742/842 [10:57<00:51, 1.95 row/s]\n", - "Running Dataset. Active & requested resources: 1/11 CPU, 3.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [10:59<00:51, 1.95 row/s]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [10:58<00:51, 1.95 row/s]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:httpx:HTTP Request: POST https://api.aryn.cloud/v1/document/partition \"HTTP/1.1 200 OK\" \n", - "Running Dataset. Active & requested resources: 1/11 CPU, 3.2MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:00<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 3; Resources: 1.0 CPU, 1.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:59<01:03, 3.54s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [10:59<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 88%|████████████████████████████████████████████████████████████████▌ | 137/155 [10:59<01:03, 3.54s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [10:59<01:05, 4.08s/ row]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [10:59<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Server version aryn-partitioner-0.20250323.124348 Model version 1.4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-nbapmpe347y51lkvqxyx1ea \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Received request with aryn_call_id=aryn:call-nbapmpe347y51lkvqxyx1ea \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.00: Waiting for scheduling \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.01: Done preprocessing document \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row](MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.01: Done preprocessing document\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.69: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.69: Completed work on page 1 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.69: Completed work on page 2 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - " \n", - "\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s] INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.69: Completed work on page 2e___wrap)) pid=77875)\n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 0.84: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 0.84: Completed work on page 3 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.01: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.01: Completed work on page 4 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:00<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.23: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.23: Completed work on page 5 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.53: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.53: Completed work on page 6 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.65: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.65: Completed work on page 7 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 1.92: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 1.92: Completed work on page 8 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [11:01<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:01<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.10: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.10: Completed work on page 9 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.28: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.28: Completed work on page 10 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.46: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.46: Completed work on page 11 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.68: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.68: Completed work on page 12 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 2.95: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 2.95: Completed work on page 13 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:03<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - "- MapBatches(materialize)->...->MapBatches(_write_docs_tt): Tasks: 1; Queued blocks: 0; Resources: 1.0 CPU, 2.2MB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 742/842 [11:02<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:02<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.13: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.13: Completed work on page 14 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.32: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.32: Completed work on page 15 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.50: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) INFO:aryn_sdk.partition.partition:ArynPartitioner: T+ 3.50: Completed work on page 16 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]\n", - "- ReadBinary->...->MapBatches(BaseMapTransformCallable___wrap): Tasks: 4; Actors: 1; Queued blocks: 2; Resources: 1.0 CPU, 3.2MB object store; [locality off]: 90%|█████████████████████████████████████████████████████████████████▍ | 139/155 [11:03<01:05, 4.08s/ row]\n", - "\n", - " \n", - " \n", - "\n", - "(MapWorker(ReadBinary->Map(BinaryScan._to_document)->MapBatches(BaseMapTransformCallable___wrap)) pid=77875) ArynPartitioner: T+ 3.67: Completed work on page 17 \n", - "Running Dataset. Active & requested resources: 2/11 CPU, 5.4MB/1.0GB object store: 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 742/842 [11:04<00:51, 1.95 row/s]" - ] - } - ], + "outputs": [], "source": [ "finalDocSet.embed(embedder=OpenAIEmbedder(model_name=model_name)).write.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=docset.value.docset_id, aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")" ] From c6b8939141ae4cdb9f04be4162b9b40136915653 Mon Sep 17 00:00:00 2001 From: Abhijit Pujare Date: Thu, 27 Mar 2025 10:06:13 -0700 Subject: [PATCH 3/5] Removing some keys and cleaning up the notebooks --- Earnings_Call_Ingestion_Script.ipynb | 2 +- QuestionAnsweringNotebook.ipynb | 849 +-------------------------- 2 files changed, 20 insertions(+), 831 deletions(-) diff --git a/Earnings_Call_Ingestion_Script.ipynb b/Earnings_Call_Ingestion_Script.ipynb index 228c265..21e0197 100644 --- a/Earnings_Call_Ingestion_Script.ipynb +++ b/Earnings_Call_Ingestion_Script.ipynb @@ -233,7 +233,7 @@ "from sycamore.transforms.embed import OpenAIEmbedder\n", "model_name = \"text-embedding-3-small\"\n", "from aryn_sdk.client.client import Client \n", - "myClient = Client(aryn_url=\"https://test-api.aryn.ai\", aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")\n", + "myClient = Client(aryn_url=\"https://test-api.aryn.ai\", aryn_api_key=\"\")\n", "docset = myClient.create_docset(name=\"haystack_workshop_target_correct\")" ] }, diff --git a/QuestionAnsweringNotebook.ipynb b/QuestionAnsweringNotebook.ipynb index ef6d1e8..96d8819 100644 --- a/QuestionAnsweringNotebook.ipynb +++ b/QuestionAnsweringNotebook.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "id": "dada065d-47fc-4b2c-9934-ef6fa9b93dd8", "metadata": {}, "outputs": [], @@ -23,59 +23,28 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "id": "b0716549-20ce-45db-8d3d-8c3085e32ef9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/ty/flsmmnn17p314__gg336qjy00000gn/T/ipykernel_77419/2683495837.py:2: FutureWarning: Class aryn is experimental and may change in the future.\n", - " initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"aryn:ds-v9tfacka0xifljqaj0l1rbh\", aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")\n" - ] - } - ], + "outputs": [], "source": [ "context = sycamore.init()\n", - "initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"aryn:ds-v9tfacka0xifljqaj0l1rbh\", aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")" + "initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"aryn:ds-v9tfacka0xifljqaj0l1rbh\", aryn_api_key=\"\")" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "id": "91c37ade-4294-49b4-8228-e137aa65f038", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-03-26 16:58:51,357\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", - "2025-03-26 16:58:51,358\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)]\n", - "Running 0: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc) 1: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - " \n", - "✔️ Dataset execution finished in 2.07 seconds: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.00/2.00 [00:02<00:00, 1.05s/ row]\n", - "\n", - "- Map(ArynReader._to_doc): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:02, ? row/s]\n", - "- Map(ArynReader._to_doc): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 8.6MB object store: : 0.00 row [00:02, ? row/s]\n", - "- Map(ArynReader._to_doc): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 8.6MB object store: 0%| | 0.00/2.00 [00:02 TaskPoolMapOperator[Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->MapBatches(spread_properties)->MapBatches()->MapBatches(sort_and_batch_elements)->MapBatches(llm_map)->MapBatches(postprocess)]\n", - "Running 0: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->...->MapBatches(postprocess) 1: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 768.0MB/1.0GB object store: : 0.00 row [00:01, ? row/s] 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 5.3KB object store: : 0.00 row [00:02, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 5.3KB object store: 0%| | 0.00/276 [00:02...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 5.3KB object store: 1%|▉ | 3.00/276 [00:02<03:26, 1.32 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 1%|█▌ | 3.00/276 [00:02<03:38, 1.25 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 5.3KB object store: 1%|▉ | 3.00/276 [00:03<03:26, 1.32 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 79; Resources: 11.0 CPU, 5.3KB object store: 2%|█▉ | 6.00/276 [00:03<02:13, 2.02 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 5.3KB/1.0GB object store: 2%|███▏ | 6.00/276 [00:03<02:23, 1.88 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 5.3KB object store: 2%|█▉ | 6.00/276 [00:04<02:13, 2.02 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 5.3KB object store: 4%|███▊ | 12.0/276 [00:04<01:16, 3.44 row/s]\n", - "Running Dataset. Active & requested resources: 9/11 CPU, 5.3KB/1.0GB object store: 4%|██████▍ | 12.0/276 [00:04<01:20, 3.28 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 73; Resources: 11.0 CPU, 5.3KB object store: 4%|███▊ | 12.0/276 [00:05<01:16, 3.44 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 73; Resources: 11.0 CPU, 5.3KB object store: 9%|███████▌ | 24.0/276 [00:05<00:44, 5.63 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 9%|████████████▊ | 24.0/276 [00:05<00:44, 5.69 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 67; Resources: 11.0 CPU, 5.3KB object store: 9%|███████▌ | 24.0/276 [00:06<00:44, 5.63 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 67; Resources: 11.0 CPU, 5.3KB object store: 15%|█████████████▏ | 42.0/276 [00:06<00:25, 9.08 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 15%|██████████████████████▎ | 42.0/276 [00:06<00:25, 9.17 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 5.3KB object store: 15%|█████████████▏ | 42.0/276 [00:07<00:25, 9.08 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 5.3KB object store: 17%|███████████████▏ | 48.0/276 [00:07<00:28, 8.04 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 17%|█████████████████████████▌ | 48.0/276 [00:07<00:28, 8.10 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 62; Resources: 11.0 CPU, 5.3KB object store: 17%|███████████████▏ | 48.0/276 [00:08<00:28, 8.04 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 62; Resources: 11.0 CPU, 5.3KB object store: 21%|█████████████████▉ | 57.0/276 [00:08<00:26, 8.18 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 21%|██████████████████████████████▎ | 57.0/276 [00:08<00:26, 8.21 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 5.3KB object store: 21%|█████████████████▉ | 57.0/276 [00:09<00:26, 8.18 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 5.3KB object store: 27%|███████████████████████▋ | 75.0/276 [00:09<00:18, 10.8 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 27%|███████████████████████████████████████▉ | 75.0/276 [00:09<00:18, 10.9 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 53; Resources: 11.0 CPU, 5.3KB object store: 27%|███████████████████████▋ | 75.0/276 [00:10<00:18, 10.8 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 53; Resources: 11.0 CPU, 5.3KB object store: 30%|██████████████████████████▍ | 84.0/276 [00:10<00:18, 10.2 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 30%|████████████████████████████████████████████▋ | 84.0/276 [00:11<00:19, 10.1 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 52; Resources: 11.0 CPU, 5.3KB object store: 30%|██████████████████████████▍ | 84.0/276 [00:12<00:18, 10.2 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 52; Resources: 11.0 CPU, 5.3KB object store: 33%|████████████████████████████▎ | 90.0/276 [00:12<00:21, 8.68 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 4.8KB/1.0GB object store: 33%|███████████████████████████████████████████████▉ | 90.0/276 [00:12<00:21, 8.67 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 5.3KB object store: 33%|████████████████████████████▎ | 90.0/276 [00:13<00:21, 8.68 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 5.3KB object store: 40%|███████████████████████████████████▍ | 111/276 [00:13<00:13, 11.9 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 40%|███████████████████████████████████████████████████████████▌ | 111/276 [00:13<00:13, 11.9 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 5.3KB object store: 40%|███████████████████████████████████▍ | 111/276 [00:14<00:13, 11.9 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 5.3KB object store: 46%|████████████████████████████████████████▏ | 126/276 [00:14<00:11, 12.5 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 46%|███████████████████████████████████████████████████████████████████▌ | 126/276 [00:14<00:11, 12.6 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 36; Resources: 11.0 CPU, 5.3KB object store: 46%|████████████████████████████████████████▏ | 126/276 [00:15<00:11, 12.5 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 36; Resources: 11.0 CPU, 5.3KB object store: 49%|███████████████████████████████████████████ | 135/276 [00:15<00:12, 11.4 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 49%|████████████████████████████████████████████████████████████████████████▍ | 135/276 [00:15<00:12, 11.4 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 34; Resources: 11.0 CPU, 5.3KB object store: 49%|███████████████████████████████████████████ | 135/276 [00:16<00:12, 11.4 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 34; Resources: 11.0 CPU, 5.3KB object store: 52%|█████████████████████████████████████████████▉ | 144/276 [00:16<00:12, 10.5 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 4.8KB/1.0GB object store: 52%|█████████████████████████████████████████████████████████████████████████████▏ | 144/276 [00:16<00:12, 10.5 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 29; Resources: 11.0 CPU, 5.3KB object store: 52%|█████████████████████████████████████████████▉ | 144/276 [00:17<00:12, 10.5 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 29; Resources: 11.0 CPU, 5.3KB object store: 57%|█████████████████████████████████████████████████▋ | 156/276 [00:17<00:11, 10.7 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 57%|███████████████████████████████████████████████████████████████████████████████████▋ | 156/276 [00:17<00:11, 10.6 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 24; Resources: 11.0 CPU, 5.3KB object store: 57%|█████████████████████████████████████████████████▋ | 156/276 [00:18<00:11, 10.7 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 24; Resources: 11.0 CPU, 5.3KB object store: 62%|██████████████████████████████████████████████████████▌ | 171/276 [00:18<00:09, 11.6 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 62%|███████████████████████████████████████████████████████████████████████████████████████████▋ | 171/276 [00:18<00:09, 11.6 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 22; Resources: 11.0 CPU, 5.3KB object store: 62%|██████████████████████████████████████████████████████▌ | 171/276 [00:19<00:09, 11.6 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 22; Resources: 11.0 CPU, 5.3KB object store: 64%|████████████████████████████████████████████████████████▍ | 177/276 [00:19<00:10, 9.85 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 64%|██████████████████████████████████████████████████████████████████████████████████████████████▉ | 177/276 [00:19<00:10, 9.87 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 19; Resources: 11.0 CPU, 5.8KB object store: 64%|████████████████████████████████████████████████████████▍ | 177/276 [00:20<00:10, 9.85 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 19; Resources: 11.0 CPU, 5.8KB object store: 68%|████████████████████████████████████████████████████████████▎ | 189/276 [00:20<00:08, 10.3 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 4.8KB/1.0GB object store: 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 189/276 [00:20<00:08, 10.3 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 16; Resources: 11.0 CPU, 5.3KB object store: 68%|████████████████████████████████████████████████████████████▎ | 189/276 [00:21<00:08, 10.3 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 16; Resources: 11.0 CPU, 5.3KB object store: 71%|██████████████████████████████████████████████████████████████▏ | 195/276 [00:21<00:09, 8.83 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 71%|████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 195/276 [00:21<00:09, 8.77 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 13; Resources: 11.0 CPU, 5.3KB object store: 71%|██████████████████████████████████████████████████████████████▏ | 195/276 [00:22<00:09, 8.83 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 13; Resources: 11.0 CPU, 5.3KB object store: 74%|█████████████████████████████████████████████████████████████████ | 204/276 [00:22<00:08, 8.78 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 74%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 204/276 [00:22<00:08, 8.80 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 5.3KB object store: 74%|█████████████████████████████████████████████████████████████████ | 204/276 [00:23<00:08, 8.78 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 5.3KB object store: 76%|██████████████████████████████████████████████████████████████████▉ | 210/276 [00:23<00:08, 8.05 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 210/276 [00:23<00:08, 7.83 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 10; Resources: 11.0 CPU, 5.8KB object store: 76%|██████████████████████████████████████████████████████████████████▉ | 210/276 [00:24<00:08, 8.05 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 10; Resources: 11.0 CPU, 5.8KB object store: 78%|████████████████████████████████████████████████████████████████████▊ | 216/276 [00:24<00:08, 7.31 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 5.3KB/1.0GB object store: 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 219/276 [00:24<00:07, 8.03 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 5.3KB object store: 78%|█████████████████████████████████████████████████████████████████████▋ | 216/276 [00:25<00:08, 7.31 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 5.3KB object store: 82%|████████████████████████████████████████████████████████████████████████▌ | 225/276 [00:25<00:06, 7.66 row/s]\n", - "Running Dataset. Active & requested resources: 9/11 CPU, 5.3KB/1.0GB object store: 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 225/276 [00:26<00:07, 7.21 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 3; Resources: 11.0 CPU, 5.3KB object store: 82%|████████████████████████████████████████████████████████████████████████▌ | 225/276 [00:26<00:06, 7.66 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 3; Resources: 11.0 CPU, 5.3KB object store: 85%|███████████████████████████████████████████████████████████████████████████▍ | 234/276 [00:26<00:05, 8.03 row/s]\n", - "✔️ Dataset execution finished in 27.13 seconds: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 248/248 [00:27<00:00, 9.12 row/s]\n", - "\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 3; Resources: 11.0 CPU, 5.3KB object store: 85%|███████████████████████████████████████████████████████████████████████████▍ | 234/276 [00:27<00:05, 8.03 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 85%|█████████████████████████████████████████████████████████████████████████▊ | 234/276 [00:27<00:05, 8.03 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 94%|██████████████████████████████████████████████████████████████████████████████████ | 234/248 [00:27<00:01, 8.03 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████| 248/248 [00:27<00:00, 11.1 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(postprocess): Tasks: 11 [backpressured]; Queued blocks: 0; Resources: 11.0 CPU, 120.6KB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████| 248/248 [00:27<00:00, 9.01 row/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "'The num_customers of the document is:\\n\\n\"over 49,200 customers\" (from ELEMENT 24).'" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mdb_docset.take(1)[0].properties.get(\"num_customers\")" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": null, "id": "192dc7b1-1598-479f-ab99-709d0c5c81d0", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-03-26 16:58:58,636\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", - "2025-03-26 16:58:58,637\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->MapBatches(spread_properties)->MapBatches(explode)->MapBatches()->MapBatches(sort_and_batch_elements)->MapBatches(llm_map)->MapBatches(postprocess)->MapBatches()]\n", - "Running 0: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->...->MapBatches() 1: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 768.0MB/1.0GB object store: : 0.00 row [00:01, ? row/s]0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: : 0.00 row [00:02, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: 0%| | 0.00/8.10k [00:02...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: 1%|▉ | 88.0/8.10k [00:02<03:24, 39.2 row/s]\n", - " \n", - "✔️ Dataset execution finished in 2.55 seconds: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 67.4 row/s]\n", - "\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 80; Resources: 11.0 CPU, 12.5MB object store: 1%|▉ | 88.0/8.10k [00:02<03:24, 39.2 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 1%|▉ | 88.0/8.10k [00:02<03:24, 39.2 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 50%|█████████████████████████████████████████████▎ | 88.0/175 [00:02<00:02, 39.2 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 75.3 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 75.3 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 7.9MB object store: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 175/175 [00:02<00:00, 64.4 row/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'id': 'aryn:c-tqdcnzjzos49g1475m01p7w',\n", - " 'type': 'Section-header',\n", - " 'text_representation': \"Thank you, Andy. Good day, everybody. I'm pleased to \"\n", - " 'report that we have made a very strong start in <2899 '\n", - " 'chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5760092735290527,\n", - " 'page_number': 2,\n", - " '_element_index': 25,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.0725143395693318,\n", - " 0.6332171221174915,\n", - " 0.3964668361619971,\n", - " 0.6457289377304671],\n", - " 'doc_id': 'aryn:c-9lduhzgt43hvisvg5sxv9jg',\n", - " 'elements': [],\n", - " 'lineage_id': 'rwnduqxhs1nl554uar2vwoa',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-76aek0t4pozqq83m012yso6',\n", - " 'type': 'Text',\n", - " 'text_representation': \"Thank you, Dave. We've had an exciting start to the \"\n", - " 'year with a number of key presentations, includi '\n", - " '<2234 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5652783513069153,\n", - " 'page_number': 5,\n", - " '_element_index': 51,\n", - " 'speaker_name': 'Susan Galbraith ',\n", - " 'speaker_role': 'Executive Vice President, Oncology Research '\n", - " 'and Development\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07309046270482408,\n", - " 0.5213716465369816,\n", - " 0.7546507090564504,\n", - " 0.534808461815947],\n", - " 'doc_id': 'aryn:c-b2jhbkg6h2ptxv7dzv76lol',\n", - " 'elements': [],\n", - " 'lineage_id': 'b7gopor1hxqh6q8barujase',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-n3icf67tmj2m8dlih3pi1iz',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thank you, Susan. Next slide, please. '\n", - " 'BioPharmaceuticals delivered total revenue of $5.2 '\n", - " 'billion in <2371 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.41509145498275757,\n", - " 'page_number': 6,\n", - " '_element_index': 57,\n", - " 'speaker_name': 'Ruud Dobber ',\n", - " 'speaker_role': 'Executive Vice President and President, '\n", - " 'BioPharmaceuticals\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07204341715347096,\n", - " 0.25080590413237497,\n", - " 0.6988436773338249,\n", - " 0.263547299610544],\n", - " 'doc_id': 'aryn:c-7lh5rymc86bzhfe93bgrflz',\n", - " 'elements': [],\n", - " 'lineage_id': 'mvfivp7mmm5shjejfwdfvxn',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-itp40ja3z1stroficoem2rs',\n", - " 'type': 'Text',\n", - " 'text_representation': 'Thanks, Ruud. I wanted to take the opportunity to '\n", - " 'highlight results from a 66-week analysis of explo '\n", - " '<1432 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.47726330161094666,\n", - " 'page_number': 7,\n", - " '_element_index': 65,\n", - " 'speaker_name': 'Sharon Barr ',\n", - " 'speaker_role': 'Executive Vice President, BioPharmaceuticals '\n", - " 'Research and Development\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07267752461049748,\n", - " 0.07626251442506814,\n", - " 0.8047300362976406,\n", - " 0.08979756264362442],\n", - " 'doc_id': 'aryn:c-3hftoktpj9ho3tcehku8z7x',\n", - " 'elements': [],\n", - " 'lineage_id': 'fkd93gt4cinojsaln54awou',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-3ano24lrtwwhhsyvsde84de',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thank you, Pascal. And as usual, I will start with '\n", - " 'our reported P&L. Please turn to the next slide. '\n", - " '<3254 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.650137722492218,\n", - " 'page_number': 3,\n", - " '_element_index': 34,\n", - " 'speaker_name': 'Aradhana Sarin ',\n", - " 'speaker_role': 'Chief Financial Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.0726197064463759,\n", - " 0.5763961837657653,\n", - " 0.41028275117683755,\n", - " 0.5888303941822093],\n", - " 'doc_id': 'aryn:c-zpoz72b7evqlx9wmgknzwef',\n", - " 'elements': [],\n", - " 'lineage_id': '02zgotaioy8wio6zr0venz3',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-uomkep2xgoph35kzas5sega',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thank you, Aradhana. Next slide, please. Oncology '\n", - " 'revenues grew 26% to $5.1 billion in the first quar '\n", - " '<2633 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5771273970603943,\n", - " 'page_number': 4,\n", - " '_element_index': 43,\n", - " 'speaker_name': 'Dave Fredrickson ',\n", - " 'speaker_role': 'Executive Vice President, Oncology Business\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07230571047580393,\n", - " 0.649608362531397,\n", - " 0.6152173161723004,\n", - " 0.663033383073028],\n", - " 'doc_id': 'aryn:c-vl4w143sgalz7jv718k8a9h',\n", - " 'elements': [],\n", - " 'lineage_id': 'eryy4dzr0vkigkohskynm17',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-bgm4z5elxh64476u9qyc1n8',\n", - " 'type': 'Text',\n", - " 'text_representation': 'Thank you, Sharon. Can I get the next slide, please. '\n", - " \"I'm delighted to report Rare Disease delivered <2318 \"\n", - " 'chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.4892081022262573,\n", - " 'page_number': 7,\n", - " '_element_index': 69,\n", - " 'speaker_name': 'Marc Dunoyer ',\n", - " 'speaker_role': 'Chief Executive Officer, Alexion, and Chief '\n", - " 'Strategy Officer, AstraZeneca\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07219824355368316,\n", - " 0.5335392228249252,\n", - " 0.8001898773064126,\n", - " 0.5462835408928762],\n", - " 'doc_id': 'aryn:c-vcceb78cgzfwdfms9lw21x5',\n", - " 'elements': [],\n", - " 'lineage_id': '42jaugk11ttaigm2j3lfdy2',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-vol06ocr4gsnwx5bww8krpd',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thanks, Marc. Can I have the next slide, please? As '\n", - " 'you have heard, our company has made a very\\n'\n", - " ' str <1354 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.6381353735923767,\n", - " 'page_number': 8,\n", - " '_element_index': 78,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07248473124149274,\n", - " 0.35918581450072146,\n", - " 0.3963169621757789,\n", - " 0.371825754362174],\n", - " 'doc_id': 'aryn:c-hu9thepubaxsjvj6gtpvous',\n", - " 'elements': [],\n", - " 'lineage_id': 'u2rtj8scun611k6o5zqilxf',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-o7ydlpfitzgwmz8c5dt88xi',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Hello. James Gordon, J.P. Morgan. First question is '\n", - " \"on '24 guidance.\\n\"\n", - " \"So it's a very strong revenue g <1275 chars>\",\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5881184339523315,\n", - " 'page_number': 8,\n", - " '_element_index': 83,\n", - " 'speaker_name': 'James Gordon ',\n", - " 'speaker_external_org': 'JPMorgan Chase and Company',\n", - " 'speaker_role': 'Analyst\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07105229741792858,\n", - " 0.8330905844177533,\n", - " 0.5645136062982078,\n", - " 0.8459853636703719],\n", - " 'doc_id': 'aryn:c-9d5layn7gm5w98ye8iflv9c',\n", - " 'elements': [],\n", - " 'lineage_id': 'zpqdey94fhauazilubwug80',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-3flzrcf4utec3sygovb9h7k',\n", - " 'type': 'Text',\n", - " 'text_representation': 'Sure. So thank you for the question about AZD5004 '\n", - " \"oral GLP-1 receptor agonist. We're really excited\\n\"\n", - " ' <1600 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.46687883138656616,\n", - " 'page_number': 9,\n", - " '_element_index': 95,\n", - " 'speaker_name': 'Sharon Barr ',\n", - " 'speaker_role': 'Executive Vice President, BioPharmaceuticals '\n", - " 'Research and Development\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07184836219746059,\n", - " 0.8500147799540402,\n", - " 0.8043362063057699,\n", - " 0.8629591388547456],\n", - " 'doc_id': 'aryn:c-ity4qdwenp35ldkkfwxn5mm',\n", - " 'elements': [],\n", - " 'lineage_id': 'u6b90cbv7tkad7263njyf1e',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-4mdwbuas47h87mm72k3fdz3',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thank you, Sharon. And James, you asked the question '\n", - " 'of the doors, but you got to remember that not\\n'\n", - " ' <404 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.6443524360656738,\n", - " 'page_number': 10,\n", - " '_element_index': 100,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07268579094177727,\n", - " 0.4330463645905702,\n", - " 0.3967575008625416,\n", - " 0.44555925008016245],\n", - " 'doc_id': 'aryn:c-nyeo6v24funjmk1fbpfvx3c',\n", - " 'elements': [],\n", - " 'lineage_id': '3ks8nwcw12gxndtgq2a88it',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-0u80gdjejlqvcnzbk2854wg',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Great. Thanks for the question. So I guess as, '\n", - " 'Pascal, a little bit of a preview of the upcoming ana '\n", - " '<654 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5857893824577332,\n", - " 'page_number': 10,\n", - " '_element_index': 103,\n", - " 'speaker_name': 'Seamus Fernandez ',\n", - " 'speaker_external_org': 'Guggenheim Partners',\n", - " 'speaker_role': 'Analyst\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07241827289191291,\n", - " 0.6438110925642636,\n", - " 0.5217128229804711,\n", - " 0.6568917172135528],\n", - " 'doc_id': 'aryn:c-vmsbd3pyphjvj9zl49i9hvk',\n", - " 'elements': [],\n", - " 'lineage_id': 'qo88t5asp5z54mkqz5wfdhm',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-wsvyy8d39obxo6v69powi77',\n", - " 'type': 'Text',\n", - " 'text_representation': \"Sure. So I'll jump in on both. People often ask about \"\n", - " \"which molecule I'm more excited about. And I fi <2113 \"\n", - " 'chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.45202550292015076,\n", - " 'page_number': 11,\n", - " '_element_index': 113,\n", - " 'speaker_name': 'Sharon Barr ',\n", - " 'speaker_role': 'Executive Vice President, BioPharmaceuticals '\n", - " 'Research and Development\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07235127914622562,\n", - " 0.6672766703018117,\n", - " 0.8036774844742514,\n", - " 0.6798483990053175],\n", - " 'doc_id': 'aryn:c-bqov66vgesuktevmk1y9lyk',\n", - " 'elements': [],\n", - " 'lineage_id': '4i8442e85hecndxxtx4ntov',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-lo2wegf7qrtwofbfd0sdavj',\n", - " 'type': 'Text',\n", - " 'text_representation': 'No.\\n',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5130598545074463,\n", - " 'page_number': 12,\n", - " '_element_index': 122,\n", - " 'speaker_name': 'Ruud Dobber ',\n", - " 'speaker_role': 'Executive Vice President and President, '\n", - " 'BioPharmaceuticals\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07184797449683104,\n", - " 0.46429195461134565,\n", - " 0.6999568581508999,\n", - " 0.4779964999899797],\n", - " 'doc_id': 'aryn:c-zoz6immogc95vvaygx8jco4',\n", - " 'elements': [],\n", - " 'lineage_id': 'geebxb06fuiu07pn6xzn4jm',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-cx8a4eq56nnovxianz7kttn',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thank you, James. Just sorry, Aradhana, maybe you can '\n", - " 'take the first one. And Sharon, would you take\\n'\n", - " ' <17 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.5694078803062439,\n", - " 'page_number': 9,\n", - " '_element_index': 88,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07264135768034209,\n", - " 0.31944271640524796,\n", - " 0.39724637289445325,\n", - " 0.3321466662018357],\n", - " 'doc_id': 'aryn:c-4low1i3lwzblh3j9863myri',\n", - " 'elements': [],\n", - " 'lineage_id': 'pvi4payeaktsjv4l8y75j2x',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-0v88y8r627847dgrs9hzt9s',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Great. Thank you, James, for the question. it is '\n", - " 'obviously early in the year. And as you know, gener '\n", - " '<1140 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.6427060961723328,\n", - " 'page_number': 9,\n", - " '_element_index': 90,\n", - " 'speaker_name': 'Aradhana Sarin ',\n", - " 'speaker_role': 'Chief Financial Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07239368621032309,\n", - " 0.42219562375581177,\n", - " 0.4105753728424645,\n", - " 0.43487817602006734],\n", - " 'doc_id': 'aryn:c-0brxr7ns4knz4utx8m4rbe4',\n", - " 'elements': [],\n", - " 'lineage_id': 'apxg4a3sm8tp33v46lep9m5',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-x4tb7hhp2w7h3uuo2ityd8k',\n", - " 'type': 'Section-header',\n", - " 'text_representation': \"Cool. So let's move to the next question, Sachin Jain \"\n", - " 'at Bank of America. Sachin over to you.\\n',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.41843941807746887,\n", - " 'page_number': 12,\n", - " '_element_index': 124,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07183900661203116,\n", - " 0.5442762436037035,\n", - " 0.39754575162819494,\n", - " 0.5576024179837804],\n", - " 'doc_id': 'aryn:c-4rjogd1gay7yngjctuir3wq',\n", - " 'elements': [],\n", - " 'lineage_id': 'lsld8yjah9bwfpkojvaiymo',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-28n8qdc7b7m4e5kumfsdm5t',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Hi there. Thanks for taking my questions. Sachin '\n", - " 'Jain, Bank of America. First one today for Truqap.\\n'\n", - " ' <738 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.4497426152229309,\n", - " 'page_number': 12,\n", - " '_element_index': 126,\n", - " 'speaker_name': 'Sachin Jain ',\n", - " 'speaker_external_org': 'Bank of America Merrill Lynch',\n", - " 'speaker_role': 'Analyst\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07173691672840338,\n", - " 0.6239503727554511,\n", - " 0.5197493592970168,\n", - " 0.6373367211715744],\n", - " 'doc_id': 'aryn:c-iv6mz9dw8hearqbhkktfqwo',\n", - " 'elements': [],\n", - " 'lineage_id': 'gda8tf0bsfs01rsfccfm2ga',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-32u0dgy3bo7qbqck2i4k5l8',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thanks, Susan.\\n',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.4778229296207428,\n", - " 'page_number': 14,\n", - " '_element_index': 143,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.072463527709444,\n", - " 0.0728359279493941,\n", - " 0.39807427989592786,\n", - " 0.08655828938314986],\n", - " 'doc_id': 'aryn:c-1tfy0tip2yzz4ivrxmn27rr',\n", - " 'elements': [],\n", - " 'lineage_id': 'kjmudo0x8z009iwlfktd312',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n", - "{'id': 'aryn:c-fu5r56876twfh7egfvs7aol',\n", - " 'type': 'Section-header',\n", - " 'text_representation': 'Thanks, James. So the first question about the '\n", - " 'Investor Day, I guess I would like to invite you to '\n", - " 'jo <2224 chars>',\n", - " 'embedding': '<1536 floats>',\n", - " 'properties': {'score': 0.6408979296684265,\n", - " 'page_number': 10,\n", - " '_element_index': 106,\n", - " 'speaker_name': 'Pascal Soriot ',\n", - " 'speaker_role': 'Chief Executive Officer\\n',\n", - " 'speaker': True,\n", - " 'earnings_calls': {'company_name': 'AstraZeneca Plc',\n", - " 'company_ticker': 'AZN',\n", - " 'quarter': 'Q1',\n", - " 'date': 'Apr 25, 2024'},\n", - " '_autogen_LLMFilterOutput_batches': [],\n", - " '_autogen_LLMFilterOutput_i': 0},\n", - " 'bbox': [0.07216195662095196,\n", - " 0.9010030119375267,\n", - " 0.39688171275947143,\n", - " 0.9135709830255986],\n", - " 'doc_id': 'aryn:c-jfs34e6huyz6in4dn9kqu24',\n", - " 'elements': [],\n", - " 'lineage_id': '7bsewlcfl666lv1g3apu5sc',\n", - " 'parent_id': 'aryn:f-0pqfe6b6hlxkbfaweauqhyh'}\n" - ] - } - ], + "outputs": [], "source": [ "llm_filtered_docset.show()" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "2f144ffe-eeb3-4770-a1da-3143efbe3ed1", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-03-26 16:13:49,144\tINFO streaming_executor.py:108 -- Starting execution of Dataset. Full logs are in /tmp/ray/session_2025-03-26_16-00-08_504381_77419/logs/ray-data\n", - "2025-03-26 16:13:49,145\tINFO streaming_executor.py:109 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->MapBatches()->MapBatches(spread_properties)->MapBatches(explode)]\n", - "Running 0: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc)->MapBatches(remove_original_elements)->...->MapBatches(explode) 1: 0.00 row [00:00, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 3 [backpressured]; Queued blocks: 89; Resources: 3.0 CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - "Running Dataset. Active & requested resources: 3/11 CPU, 768.0MB/1.0GB object store: : 0.00 row [00:01, ? row/s] CPU, 768.0MB object store: : 0.00 row [00:01, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 3.4KB object store: : 0.00 row [00:02, ? row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 3.4KB object store: 0%| | 0.00/184 [00:02...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 78; Resources: 11.0 CPU, 3.4KB object store: 3%|██▉ | 6.00/184 [00:02<01:04, 2.76 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 3%|████▊ | 6.00/184 [00:02<01:05, 2.70 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 3.4KB object store: 3%|██▉ | 6.00/184 [00:03<01:04, 2.76 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 77; Resources: 11.0 CPU, 3.4KB object store: 4%|███▉ | 8.00/184 [00:03<01:11, 2.45 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 4%|██████▍ | 8.00/184 [00:03<01:12, 2.43 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 75; Resources: 11.0 CPU, 3.4KB object store: 4%|███▉ | 8.00/184 [00:04<01:11, 2.45 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 75; Resources: 11.0 CPU, 3.4KB object store: 7%|█████▉ | 12.0/184 [00:04<00:56, 3.02 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 7%|█████████▌ | 12.0/184 [00:04<00:59, 2.91 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 71; Resources: 11.0 CPU, 3.4KB object store: 7%|█████▉ | 12.0/184 [00:05<00:56, 3.02 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 71; Resources: 11.0 CPU, 3.4KB object store: 11%|█████████▉ | 20.0/184 [00:05<00:35, 4.60 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 11%|███████████████▉ | 20.0/184 [00:05<00:36, 4.48 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 69; Resources: 11.0 CPU, 3.4KB object store: 11%|█████████▉ | 20.0/184 [00:06<00:35, 4.60 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 69; Resources: 11.0 CPU, 3.4KB object store: 13%|███████████▊ | 24.0/184 [00:06<00:37, 4.28 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 13%|███████████████████▏ | 24.0/184 [00:06<00:37, 4.32 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 3.8KB object store: 13%|███████████▊ | 24.0/184 [00:07<00:37, 4.28 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 65; Resources: 11.0 CPU, 3.8KB object store: 18%|████████████████▊ | 34.0/184 [00:07<00:24, 6.04 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 18%|███████████████████████████▏ | 34.0/184 [00:07<00:25, 5.90 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 3.4KB object store: 18%|████████████████▊ | 34.0/184 [00:08<00:24, 6.04 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 56; Resources: 11.0 CPU, 3.4KB object store: 27%|████████████████████████▋ | 50.0/184 [00:08<00:15, 8.87 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 27%|███████████████████████████████████████▉ | 50.0/184 [00:08<00:15, 8.75 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 55; Resources: 11.0 CPU, 3.4KB object store: 27%|████████████████████████▋ | 50.0/184 [00:09<00:15, 8.87 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 55; Resources: 11.0 CPU, 3.4KB object store: 28%|█████████████████████████▋ | 52.0/184 [00:09<00:19, 6.66 row/s]\n", - "Running Dataset. Active & requested resources: 8/11 CPU, 3.4KB/1.0GB object store: 28%|█████████████████████████████████████████▊ | 52.0/184 [00:09<00:20, 6.60 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 50; Resources: 11.0 CPU, 3.4KB object store: 28%|█████████████████████████▋ | 52.0/184 [00:10<00:19, 6.66 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 50; Resources: 11.0 CPU, 3.4KB object store: 34%|██████████████████████████████▋ | 62.0/184 [00:10<00:16, 7.45 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 34%|█████████████████████████████████████████████████▌ | 62.0/184 [00:10<00:16, 7.37 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 49; Resources: 11.0 CPU, 3.8KB object store: 34%|██████████████████████████████▋ | 62.0/184 [00:11<00:16, 7.45 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 49; Resources: 11.0 CPU, 3.8KB object store: 36%|████████████████████████████████▋ | 66.0/184 [00:11<00:18, 6.28 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 36%|████████████████████████████████████████████████████▋ | 66.0/184 [00:11<00:18, 6.26 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 3.4KB object store: 36%|████████████████████████████████▋ | 66.0/184 [00:12<00:18, 6.28 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 44; Resources: 11.0 CPU, 3.4KB object store: 40%|████████████████████████████████████▌ | 74.0/184 [00:12<00:16, 6.60 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 40%|███████████████████████████████████████████████████████████ | 74.0/184 [00:12<00:16, 6.66 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 3.4KB object store: 40%|████████████████████████████████████▌ | 74.0/184 [00:13<00:16, 6.60 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 39; Resources: 11.0 CPU, 3.4KB object store: 46%|█████████████████████████████████████████▌ | 84.0/184 [00:13<00:13, 7.48 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 46%|███████████████████████████████████████████████████████████████████ | 84.0/184 [00:13<00:13, 7.43 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 33; Resources: 11.0 CPU, 3.4KB object store: 46%|█████████████████████████████████████████▌ | 84.0/184 [00:14<00:13, 7.48 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 33; Resources: 11.0 CPU, 3.4KB object store: 52%|███████████████████████████████████████████████▍ | 96.0/184 [00:14<00:10, 8.54 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 52%|████████████████████████████████████████████████████████████████████████████▋ | 96.0/184 [00:15<00:10, 8.50 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 30; Resources: 11.0 CPU, 3.4KB object store: 52%|███████████████████████████████████████████████▍ | 96.0/184 [00:16<00:10, 8.54 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 30; Resources: 11.0 CPU, 3.4KB object store: 55%|███████████████████████████████████████████████████ | 102/184 [00:16<00:10, 7.60 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 55%|██████████████████████████████████████████████████████████████████████████████████ | 102/184 [00:16<00:10, 7.58 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 26; Resources: 11.0 CPU, 3.4KB object store: 55%|███████████████████████████████████████████████████ | 102/184 [00:17<00:10, 7.60 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 26; Resources: 11.0 CPU, 3.4KB object store: 60%|███████████████████████████████████████████████████████ | 110/184 [00:17<00:09, 7.51 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 60%|████████████████████████████████████████████████████████████████████████████████████████▍ | 110/184 [00:17<00:09, 7.48 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 23; Resources: 11.0 CPU, 3.4KB object store: 60%|███████████████████████████████████████████████████████ | 110/184 [00:18<00:09, 7.51 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 23; Resources: 11.0 CPU, 3.4KB object store: 63%|██████████████████████████████████████████████████████████ | 116/184 [00:18<00:09, 6.88 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 63%|█████████████████████████████████████████████████████████████████████████████████████████████▎ | 116/184 [00:18<00:09, 6.86 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 20; Resources: 11.0 CPU, 4.1KB object store: 63%|██████████████████████████████████████████████████████████ | 116/184 [00:19<00:09, 6.88 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 20; Resources: 11.0 CPU, 4.1KB object store: 67%|██████████████████████████████████████████████████████████████ | 124/184 [00:19<00:08, 7.21 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 126/184 [00:19<00:07, 7.57 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 15; Resources: 11.0 CPU, 3.4KB object store: 67%|██████████████████████████████████████████████████████████████ | 124/184 [00:20<00:08, 7.21 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 15; Resources: 11.0 CPU, 3.4KB object store: 72%|██████████████████████████████████████████████████████████████████ | 132/184 [00:20<00:07, 7.27 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 134/184 [00:20<00:06, 7.52 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 3.4KB object store: 72%|██████████████████████████████████████████████████████████████████ | 132/184 [00:21<00:07, 7.27 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 11; Resources: 11.0 CPU, 3.4KB object store: 76%|██████████████████████████████████████████████████████████████████████ | 140/184 [00:21<00:06, 7.30 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 140/184 [00:21<00:06, 6.92 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 3.4KB object store: 76%|██████████████████████████████████████████████████████████████████████▊ | 140/184 [00:22<00:06, 7.30 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 7; Resources: 11.0 CPU, 3.4KB object store: 80%|██████████████████████████████████████████████████████████████████████████▊ | 148/184 [00:22<00:04, 7.32 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 148/184 [00:22<00:05, 7.06 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 4; Resources: 11.0 CPU, 3.4KB object store: 80%|██████████████████████████████████████████████████████████████████████████▊ | 148/184 [00:23<00:04, 7.32 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 4; Resources: 11.0 CPU, 3.4KB object store: 84%|█████████████████████████████████████████████████████████████████████████████▊ | 154/184 [00:23<00:04, 6.78 row/s]\n", - "Running Dataset. Active & requested resources: 11/11 CPU, 3.4KB/1.0GB object store: 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 154/184 [00:23<00:04, 6.70 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 2; Resources: 11.0 CPU, 3.4KB object store: 84%|█████████████████████████████████████████████████████████████████████████████▊ | 154/184 [00:24<00:04, 6.78 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 11 [backpressured]; Queued blocks: 2; Resources: 11.0 CPU, 3.4KB object store: 86%|███████████████████████████████████████████████████████████████████████████████▊ | 158/184 [00:24<00:04, 5.95 row/s]\n", - "Running Dataset. Active & requested resources: 10/11 CPU, 3.4KB/1.0GB object store: 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 158/184 [00:24<00:04, 5.78 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 10; Queued blocks: 0; Resources: 10.0 CPU, 3.1KB object store: 86%|█████████████████████████████████████████████████████████████████████████████████████████████▌ | 158/184 [00:25<00:04, 5.95 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 10; Queued blocks: 0; Resources: 10.0 CPU, 3.1KB object store: 89%|█████████████████████████████████████████████████████████████████████████████████████████████████▏ | 164/184 [00:25<00:03, 5.81 row/s]\n", - "Running Dataset. Active & requested resources: 9/11 CPU, 2.8KB/1.0GB object store: 90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 166/184 [00:25<00:02, 6.25 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 2; Queued blocks: 0; Resources: 2.0 CPU, 642.0B object store: 89%|██████████████████████████████████████████████████████████████████████████████████████████████████ | 164/184 [00:26<00:03, 5.81 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 2; Queued blocks: 0; Resources: 2.0 CPU, 642.0B object store: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 180/184 [00:26<00:00, 8.52 row/s]\n", - " \n", - "✔️ Dataset execution finished in 27.68 seconds: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 6.64 row/s]\n", - "\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 2; Queued blocks: 0; Resources: 2.0 CPU, 642.0B object store: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 180/184 [00:27<00:00, 8.52 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 180/184 [00:27<00:00, 8.52 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 7.31 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 7.31 row/s]\n", - "- Map(ArynReader._to_doc)->...->MapBatches(explode): Tasks: 0; Queued blocks: 0; Resources: 0.0 CPU, 321.0B object store: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 184/184 [00:27<00:00, 6.64 row/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "exploded_docset2.count()" ] From 1689ed04d3ed0ea9a9173f4760acc56a228065d6 Mon Sep 17 00:00:00 2001 From: Abhijit Pujare Date: Thu, 27 Mar 2025 10:35:21 -0700 Subject: [PATCH 4/5] Removing one more API key --- Earnings_Call_Ingestion_Script.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Earnings_Call_Ingestion_Script.ipynb b/Earnings_Call_Ingestion_Script.ipynb index 21e0197..f869720 100644 --- a/Earnings_Call_Ingestion_Script.ipynb +++ b/Earnings_Call_Ingestion_Script.ipynb @@ -244,7 +244,7 @@ "metadata": {}, "outputs": [], "source": [ - "finalDocSet.embed(embedder=OpenAIEmbedder(model_name=model_name)).write.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=docset.value.docset_id, aryn_api_key=\"eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzdWIiOnsiZW1sIjoiZG9jc3RvcmUtdGVhbUBhcnluLmFpIiwiYWN0IjoiMjI0NjYxMzg4MzA4In0sImlhdCI6MTczMzk1MDY3OH0.xqmVCfnu0RJ2RW-74-to4_hbeTIZflToj7YZs6vGtNxCkNroEE5Quzro8ztxTdI6Yt-9HzPepIXdA2QnKDtWBQ\")" + "finalDocSet.embed(embedder=OpenAIEmbedder(model_name=model_name)).write.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=docset.value.docset_id, aryn_api_key=\"\")" ] }, { From c18d01f56623979654839ddbfab803c21ad3d77e Mon Sep 17 00:00:00 2001 From: Abhijit Pujare Date: Fri, 28 Mar 2025 13:42:52 -0700 Subject: [PATCH 5/5] Fixing up both ingestion notebook and the qna notebook and answering more questions in the qna notebook --- Earnings_Call_Ingestion_Script.ipynb | 32 ++---- QuestionAnsweringNotebook.ipynb | 161 +++++++++++++++++---------- 2 files changed, 113 insertions(+), 80 deletions(-) diff --git a/Earnings_Call_Ingestion_Script.ipynb b/Earnings_Call_Ingestion_Script.ipynb index f869720..755859e 100644 --- a/Earnings_Call_Ingestion_Script.ipynb +++ b/Earnings_Call_Ingestion_Script.ipynb @@ -18,7 +18,10 @@ "from sycamore.data.document import Document\n", "from sycamore.functions import HuggingFaceTokenizer, OpenAITokenizer\n", "from sycamore.llms import OpenAI, OpenAIModels\n", - "from sycamore.transforms.embed import SentenceTransformerEmbedder" + "from sycamore.transforms.embed import SentenceTransformerEmbedder\n", + "from aryn_sdk.client.client import Client\n", + "from sycamore.transforms.embed import OpenAIEmbedder\n", + "from sycamore import MaterializeSourceMode" ] }, { @@ -28,21 +31,10 @@ "metadata": {}, "outputs": [], "source": [ + "## Set your api-keys. You'll need an ARYN_API_KEY and an OPENAI_API_KEY\n", "context = sycamore.init()\n", - "# local file path to the SortBenchmark dataset\n", "paths = \"\"\n", - "initial_docset = context.read.binary(paths, binary_format=\"pdf\")\n", - "#initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai\", docset_id=\"aryn:f-trcw7rui6kg2t9os03owzjf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "101c2417-2e9e-4e32-ad77-31f88c287b21", - "metadata": {}, - "outputs": [], - "source": [ - "## set your keys here" + "initial_docset = context.read.binary(paths, binary_format=\"pdf\")" ] }, { @@ -109,9 +101,8 @@ "metadata": {}, "outputs": [], "source": [ - "from sycamore import MaterializeSourceMode\n", "partitioned_docset = (initial_docset.partition(partitioner=ArynPartitioner())\n", - " .materialize(path=\"/Users/abhijitpujare/workspace/haystack-workshop-2025/materialize/partitioned_docset\", source_mode=MaterializeSourceMode.USE_STORED)\n", + " .materialize(path=\"PATH_TO_STORE_MATERIALIZED\", source_mode=MaterializeSourceMode.USE_STORED)\n", " .split_elements(tokenizer=tokenizer, max_tokens=512)\n", " .extract_properties(property_extractor=OpenAIPropertyExtractor(llm=llm, schema=schema_json, schema_name=\"earnings_calls\")))" ] @@ -175,7 +166,6 @@ "\n", " return doc\n", "\n", - "#filtered_Docset.map_elements(markSpeakers).show()\n", "speakersMarkedDocSet = docset_no_orig_elements.map_elements(markSpeakers)" ] }, @@ -219,8 +209,7 @@ "def filterOnlySpeakers(elem: Element):\n", " return 'speaker' in elem.properties\n", "\n", - "finalDocSet = mergedDialogeSet.filter_elements(filterOnlySpeakers)\n", - "#finalDocSet.show()" + "finalDocSet = mergedDialogeSet.filter_elements(filterOnlySpeakers)" ] }, { @@ -230,11 +219,10 @@ "metadata": {}, "outputs": [], "source": [ - "from sycamore.transforms.embed import OpenAIEmbedder\n", "model_name = \"text-embedding-3-small\"\n", - "from aryn_sdk.client.client import Client \n", + "docset_name = \"\"\n", "myClient = Client(aryn_url=\"https://test-api.aryn.ai\", aryn_api_key=\"\")\n", - "docset = myClient.create_docset(name=\"haystack_workshop_target_correct\")" + "docset = myClient.create_docset(name=docset_name)" ] }, { diff --git a/QuestionAnsweringNotebook.ipynb b/QuestionAnsweringNotebook.ipynb index 96d8819..364f764 100644 --- a/QuestionAnsweringNotebook.ipynb +++ b/QuestionAnsweringNotebook.ipynb @@ -18,7 +18,10 @@ "from sycamore.data.document import Document\n", "from sycamore.functions import HuggingFaceTokenizer, OpenAITokenizer\n", "from sycamore.llms import OpenAI, OpenAIModels\n", - "from sycamore.transforms.embed import SentenceTransformerEmbedder" + "from sycamore.transforms.embed import SentenceTransformerEmbedder\n", + "from sycamore.llms.prompts.default_prompts import LlmFilterMessagesJinjaPrompt\n", + "from sycamore.llms.prompts.prompts import JinjaPrompt\n", + "from sycamore.transforms.extract_entity import OpenAIEntityExtractor" ] }, { @@ -29,142 +32,184 @@ "outputs": [], "source": [ "context = sycamore.init()\n", - "initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"aryn:ds-v9tfacka0xifljqaj0l1rbh\", aryn_api_key=\"\")" + "initial_docset = context.read.aryn(aryn_url=\"https://test-api.aryn.ai/v1/storage\", docset_id=\"\", aryn_api_key=\"\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "91c37ade-4294-49b4-8228-e137aa65f038", + "id": "2d39829d-9df2-4ddd-8b72-682db0aa41ce", "metadata": {}, "outputs": [], "source": [ - "first_doc = initial_docset.take(1)" + "def remove_original_elements(doc: Document):\n", + " del doc.properties['_original_elements']\n", + " return doc\n", + "\n", + "def filter_parent_documents(doc:Document):\n", + " return \"parent_id\" in doc\n", + " \n", + "\n", + "exploded_docset2 = initial_docset.map(remove_original_elements).spread_properties([\"earnings_calls\"]).explode().filter(filter_parent_documents)\n", + "removed_orig_docset = initial_docset.map(remove_original_elements).spread_properties([\"earnings_calls\"])" ] }, { "cell_type": "code", "execution_count": null, - "id": "7458b26c-a5ac-4407-bc7b-2f207aac6414", + "id": "71f1ba91-fd8a-4ac8-ab20-1911a4e4113f", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from sycamore.llms import OpenAI, OpenAIModels\n", + "oai = OpenAI(OpenAIModels.GPT_4O)" + ] }, { "cell_type": "code", "execution_count": null, - "id": "fa3c92e5-f47d-4f92-91c5-4c080a6a23aa", + "id": "74fb23b6-5703-4d10-94fb-432729c1c68f", "metadata": {}, "outputs": [], "source": [ - "print(first_doc[0].properties['earnings_calls'])" + "## Cell to answer question: tell me the number of customers MongoDB had at the end of Q1\n", + "entity_extractor = OpenAIEntityExtractor(entity_name=\"num_customers\", llm=oai, num_of_elements=10,\n", + " field = \"text_representation\", use_elements=True)\n", + " \n", + "\n", + "mdb_docset = removed_orig_docset.filter( lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'MDB' and doc.properties['earnings_calls']['quarter']=='Q1').extract_entity(entity_extractor)" ] }, { "cell_type": "code", "execution_count": null, - "id": "2d39829d-9df2-4ddd-8b72-682db0aa41ce", + "id": "7814bb6e-63ea-48ed-b637-543a516b4175", "metadata": {}, "outputs": [], "source": [ - "def remove_original_elements(doc: Document):\n", - " del doc.properties['_original_elements']\n", - " return doc\n", - "\n", - "def filter_parent_documents(doc:Document):\n", - " return \"parent_id\" in doc\n", - " \n", - "\n", - "exploded_docset2 = initial_docset.map(remove_original_elements).spread_properties([\"earnings_calls\"]).explode().filter(filter_parent_documents)\n", - "removed_orig_docset = initial_docset.map(remove_original_elements).spread_properties([\"earnings_calls\"])" + "mdb_docset.take(1)[0].properties.get(\"num_customers\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "71f1ba91-fd8a-4ac8-ab20-1911a4e4113f", + "id": "fddbb1c1-4fe4-4aa1-a959-4c9e1e624e7b", "metadata": {}, "outputs": [], "source": [ - "from sycamore.llms import OpenAI, OpenAIModels\n", - "oai = OpenAI(OpenAIModels.GPT_4O, api_key=\"\")" + "avgo_docset = removed_orig_docset.filter( lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'AVGO').llm_filter(new_field=\"_autogen_LLMFilterOutput\",\n", + " prompt=LlmFilterMessagesJinjaPrompt.set(filter_question=\"Does this mention the VMWare acquisition?\", use_elements=True),\n", + " field = \"text_representation\",\n", + " llm=oai,\n", + " keep_none=True)" ] }, { "cell_type": "code", "execution_count": null, - "id": "74fb23b6-5703-4d10-94fb-432729c1c68f", + "id": "887bdb16-1a49-4eb8-bb6d-e663aad23d43", "metadata": {}, "outputs": [], "source": [ - "from sycamore.llms.prompts.default_prompts import LlmFilterMessagesJinjaPrompt\n", - "from sycamore.llms.prompts.prompts import JinjaPrompt\n", - "from sycamore.transforms.extract_entity import OpenAIEntityExtractor\n", + "## Cell to answer the question tell me the first earnings call where the VMWare acquisiton was mentioned\n", "\n", - "prompt = JinjaPrompt(\n", - " system=\"You are a helpful classifier that generously filters database entries based on questions.\",\n", - " user=(\"Did Brian Chesky speak?\" )\n", - ")\n", - "\n", - "entity_extractor = OpenAIEntityExtractor(entity_name=\"num_customers\", llm=oai, num_of_elements=10,\n", - " #prompt = LlmFilterMessagesJinjaPrompt.set(filter_question=\"How many customers did MongoDB have at the end of Q1 in 2024?\", use_elements=False),\n", - " field = \"text_representation\", use_elements=True)\n", - " \n", - "\n", - "mdb_docset = removed_orig_docset.filter( lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'MDB' and doc.properties['earnings_calls']['quarter']=='Q1').extract_entity(entity_extractor)\n", + "from sycamore.transforms import DateTimeStandardizer\n", "\n", - "'''\n", - "llm_filtered_docset = exploded_docset2.llm_filter(new_field=\"_autogen_LLMFilterOutput\",\n", - " prompt=LlmFilterMessagesJinjaPrompt.set(filter_question=\"Did Brian Chesky speak?\", use_elements=False),\n", - " field = \"text_represenation\",\n", - " llm=oai,\n", - " keep_none=True)\n", + "def filterVMware(elem: Element) -> bool:\n", + " return \"VMware\" in elem.text_representation\n", "\n", + "vwmare_docset_sorted = (removed_orig_docset\n", + " .filter(lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'AVGO')\n", + " .filter_elements(filterVMware)\n", + " .map(lambda doc: DateTimeStandardizer.standardize(doc, key_path = [\"properties\",\"earnings_calls\",\"date\"]))\n", + " .sort(descending=False, field=\"properties.earnings_calls.dateTime\"))\n", + "vwmare_docset_sorted.take(1)[0].properties['earnings_calls']['day']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d3de501-27cf-452a-8366-8d0ec60c307d", + "metadata": {}, + "outputs": [], + "source": [ + "## Cell to answer question summarize the impact of the VMWare acquisiton on Broadcom's earnings\n", + "from sycamore.llms.prompts.prompts import JinjaPrompt\n", + "from sycamore.llms.llms import LLMMode\n", + "myprompt = JinjaPrompt(\n", + " system=\"You are a robot\",\n", + " user=\"\"\"Here's an earnings call. Please answer the question {{ question }}\n", + " {% for elt in doc.elements %}\n", + " {{ elt.text_representation }}\n", + " {% endfor %}\"\"\",\n", + " question=\"Summarize the impact of the vmware acquisition on broadcom's earnings\"\n", + ")\n", "\n", - "exploded_docset.llm_filter(new_field=\"_autogen_LLMFilterOutput\",\n", - " #prompt=LlmFilterMessagesJinjaPrompt.fork(filter_question=\"Did Brian Chesky speak?\", use_elements=False),\n", - " prompt = prompt,\n", - " field = \"text_representation\",\n", - " llm=oai )\n", - " '''\n", - "#logical_node = LlmFilter(node_id=0, question=\"Filter all the records where the Brian Chesky spoke\", field=\"Brian Chesky\")\n", - "#sycamore_operator = SycamoreLlmFilter(context, logical_node, query_id=\"test\", inputs=[exploded_docset])" + "vmware_acquistion_summary = (removed_orig_docset.filter(lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'AVGO')\n", + " .filter_elements(filterVMware)\n", + " .llm_map(prompt=myprompt, output_field=\"acquisition_impact_summary\", llm=oai, llm_mode=LLMMode.ASYNC))\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "7814bb6e-63ea-48ed-b637-543a516b4175", + "id": "bb56b73a-60f0-4d2b-9cb8-3aed4c0c72d8", "metadata": {}, "outputs": [], "source": [ - "mdb_docset.take(1)[0].properties.get(\"num_customers\")" + "ls = []\n", + "for doc in vmware_acquistion_summary.take_all():\n", + " ls.append((doc.properties['earnings_calls']['quarter'], doc.properties['acquisition_impact_summary']))\n", + "print(ls)" ] }, { "cell_type": "code", "execution_count": null, - "id": "192dc7b1-1598-479f-ab99-709d0c5c81d0", + "id": "24fe8623-a63c-45ad-a973-8575fc23d57b", "metadata": {}, "outputs": [], "source": [ - "llm_filtered_docset.show()" + "## Cell to answer the question tell me how Intuit is integrating Intuit Assist (their new AI offering) into existing products\n", + "intuit_prompt = JinjaPrompt(\n", + " system=\"You are a robot\",\n", + " user=\"\"\"Here's an earnings call. Please answer the question {{ question }}\n", + " {% for elt in doc.elements %}\n", + " {{ elt.text_representation }}\n", + " {% endfor %}\"\"\",\n", + " question=\"Summarize how Intuit Assist is being integrated into Intuit's existing products\"\n", + ")\n", + "\n", + "def filterForAssist(elem: Element) -> bool:\n", + " return \"Assist\" in elem.text_representation\n", + "\n", + "intuit_assist_summary = (removed_orig_docset.filter(lambda doc: doc.properties['earnings_calls']['company_ticker'] == 'INTU')\n", + " .filter_elements(filterForAssist)\n", + " .llm_map(prompt=intuit_prompt, output_field=\"intuit_assist_summary\", llm=oai, llm_mode=LLMMode.ASYNC))\n", + "\n", + "ls = []\n", + "for doc in intuit_assist_summary.take_all():\n", + " ls.append((doc.properties['earnings_calls']['quarter'], doc.properties['intuit_assist_summary']))\n", + "print(ls)" ] }, { "cell_type": "code", "execution_count": null, - "id": "2f144ffe-eeb3-4770-a1da-3143efbe3ed1", + "id": "c7c7d15d-dcfa-4bd6-a210-824aca1af220", "metadata": {}, "outputs": [], "source": [ - "exploded_docset2.count()" + "## Cell to answer the question 'Return all the companies that mentioned inflation and return a count of the number of times inflation was mentioned'\n", + "inflation_mentioned = (exploded_docset2.filter(lambda element: 'inflation' in element.text_representation.lower())\n", + " .groupby_count(field='properties.earnings_calls.company_name'))\n", + "inflation_mentioned.show()" ] }, { "cell_type": "code", "execution_count": null, - "id": "fddbb1c1-4fe4-4aa1-a959-4c9e1e624e7b", + "id": "d66caeeb-cbe3-4f92-862f-33923012e1c6", "metadata": {}, "outputs": [], "source": []