Skip to content

Commit b6b7186

Browse files
authored
fixing linting issue (#31)
1 parent 83a9dbe commit b6b7186

5 files changed

Lines changed: 101 additions & 37 deletions

File tree

src/nutrient_dws/api/direct.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,8 @@ def optimize_pdf(
478478
grayscale_annotations: Convert annotations to grayscale (default: False).
479479
disable_images: Remove all images from the PDF (default: False).
480480
mrc_compression: MCR compression (default: False).
481-
image_optimization_quality: Image optimization quality from 1 (least optimized) to 4 (most optimized) (default: 2).
481+
image_optimization_quality: Image optimization quality from 1 (least optimized)
482+
to 4 (most optimized) (default: 2).
482483
linearize: Linearize (optimize for web viewing) the PDF (default: False).
483484
484485
Returns:
@@ -487,7 +488,8 @@ def optimize_pdf(
487488
Raises:
488489
AuthenticationError: If API key is missing or invalid.
489490
APIError: For other API errors.
490-
ValueError: If image_optimization_quality is not between 1-4 or no optimization is enabled
491+
ValueError: If image_optimization_quality is not between 1-4
492+
or no optimization is enabled
491493
492494
Example:
493495
# Aggressive optimization for minimum file size
@@ -709,7 +711,11 @@ def split_pdf(
709711
output_paths=["part1.pdf", "part2.pdf"]
710712
)
711713
"""
712-
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output, get_pdf_page_count
714+
from nutrient_dws.file_handler import (
715+
get_pdf_page_count,
716+
prepare_file_for_upload,
717+
save_file_output,
718+
)
713719

714720
# Validate inputs
715721
if not page_ranges:
@@ -731,15 +737,21 @@ def split_pdf(
731737

732738
# Validate start is within document bounds
733739
if start < 0 or start >= num_of_pages:
734-
raise ValueError(f"Page range {i}: start index {start} is out of bounds (0-{num_of_pages-1})")
740+
raise ValueError(
741+
f"Page range {i}: start index {start} is out of bounds (0-{num_of_pages - 1})"
742+
)
735743

736744
# If end is specified, validate it's within document bounds
737745
if "end" in page_range:
738746
end = page_range["end"]
739747
if end < 0 or end >= num_of_pages:
740-
raise ValueError(f"Page range {i}: end index {end} is out of bounds (0-{num_of_pages-1})")
748+
raise ValueError(
749+
f"Page range {i}: end index {end} is out of bounds (0-{num_of_pages - 1})"
750+
)
741751
if end < start:
742-
raise ValueError(f"Page range {i}: end index {end} cannot be less than start index {start}")
752+
raise ValueError(
753+
f"Page range {i}: end index {end} cannot be less than start index {start}"
754+
)
743755

744756
results = []
745757

@@ -814,7 +826,11 @@ def duplicate_pdf_pages(
814826
output_path="reordered.pdf"
815827
)
816828
"""
817-
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output, get_pdf_page_count
829+
from nutrient_dws.file_handler import (
830+
get_pdf_page_count,
831+
prepare_file_for_upload,
832+
save_file_output,
833+
)
818834

819835
# Validate inputs
820836
if not page_indexes:
@@ -837,7 +853,9 @@ def duplicate_pdf_pages(
837853
else:
838854
# Validate positive indexes are within bounds
839855
if page_index >= num_of_pages:
840-
raise ValueError(f"Page index {page_index} is out of bounds (0-{num_of_pages-1})")
856+
raise ValueError(
857+
f"Page index {page_index} is out of bounds (0-{num_of_pages - 1})"
858+
)
841859
# For positive indexes, create single-page range
842860
parts.append({"file": "file", "pages": {"start": page_index, "end": page_index}})
843861

@@ -905,7 +923,11 @@ def delete_pdf_pages(
905923
output_path="pages_deleted.pdf"
906924
)
907925
"""
908-
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output, get_pdf_page_count
926+
from nutrient_dws.file_handler import (
927+
get_pdf_page_count,
928+
prepare_file_for_upload,
929+
save_file_output,
930+
)
909931

910932
# Validate inputs
911933
if not page_indexes:
@@ -924,7 +946,7 @@ def delete_pdf_pages(
924946
# Validate page indexes are within bounds
925947
for idx in page_indexes:
926948
if idx >= num_of_pages:
927-
raise ValueError(f"Page index {idx} is out of bounds (0-{num_of_pages-1})")
949+
raise ValueError(f"Page index {idx} is out of bounds (0-{num_of_pages - 1})")
928950

929951
# Prepare file for upload
930952
file_field, file_data = prepare_file_for_upload(input_file, "file")
@@ -952,7 +974,9 @@ def delete_pdf_pages(
952974

953975
# Add remaining pages after the last deleted page
954976
num_of_pages = get_pdf_page_count(input_file)
955-
if (current_page > 0 or (current_page == 0 and len(sorted_indexes) == 0)) and current_page < num_of_pages:
977+
if (
978+
current_page > 0 or (current_page == 0 and len(sorted_indexes) == 0)
979+
) and current_page < num_of_pages:
956980
# Add all remaining pages from current_page onwards
957981
parts.append({"file": "file", "pages": {"start": current_page}})
958982

@@ -1098,7 +1122,11 @@ def add_page(
10981122
output_path="with_blank_pages.pdf"
10991123
)
11001124
"""
1101-
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output, get_pdf_page_count
1125+
from nutrient_dws.file_handler import (
1126+
get_pdf_page_count,
1127+
prepare_file_for_upload,
1128+
save_file_output,
1129+
)
11021130

11031131
# Validate inputs
11041132
if page_count < 1:
@@ -1394,7 +1422,11 @@ def set_page_label(
13941422
labels=[{"pages": {"start": 0, "end": 0}, "label": "Cover Page"}]
13951423
)
13961424
"""
1397-
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output, get_pdf_page_count
1425+
from nutrient_dws.file_handler import (
1426+
get_pdf_page_count,
1427+
prepare_file_for_upload,
1428+
save_file_output,
1429+
)
13981430

13991431
# Validate inputs
14001432
if not labels:
@@ -1422,18 +1454,27 @@ def set_page_label(
14221454
# Validate start is within document bounds
14231455
start = pages["start"]
14241456
if start < 0 or start >= num_of_pages:
1425-
raise ValueError(f"Label configuration {i}: start index {start} is out of bounds (0-{num_of_pages-1})")
1457+
raise ValueError(
1458+
f"Label configuration {i}: start index {start}"
1459+
f" is out of bounds (0-{num_of_pages - 1})"
1460+
)
14261461

14271462
# Normalize pages - only include 'end' if explicitly provided
14281463
normalized_pages = {"start": start}
14291464
if "end" in pages:
14301465
end = pages["end"]
14311466
# Validate end is within document bounds
14321467
if end < 0 or end >= num_of_pages:
1433-
raise ValueError(f"Label configuration {i}: end index {end} is out of bounds (0-{num_of_pages-1})")
1468+
raise ValueError(
1469+
f"Label configuration {i}: end index {end}"
1470+
f" is out of bounds (0-{num_of_pages - 1})"
1471+
)
14341472
# Validate end is not less than start
14351473
if end < start:
1436-
raise ValueError(f"Label configuration {i}: end index {end} cannot be less than start index {start}")
1474+
raise ValueError(
1475+
f"Label configuration {i}: end index {end}"
1476+
f" cannot be less than start index {start}"
1477+
)
14371478
normalized_pages["end"] = end
14381479
# If no end is specified, leave it out (meaning "to end of document")
14391480

src/nutrient_dws/file_handler.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -205,21 +205,22 @@ def get_file_size(file_input: FileInput) -> int | None:
205205

206206
return None
207207

208+
208209
def get_pdf_page_count(pdf_input: FileInput) -> int:
209210
"""Zero dependency way to get the number of pages in a PDF.
210211
211212
Args:
212-
file_input: File path, bytes, or file-like object. Has to be of a PDF file
213+
pdf_input: File path, bytes, or file-like object. Has to be of a PDF file
213214
214215
Returns:
215216
Number of pages in a PDF.
216217
"""
217218
if isinstance(pdf_input, (str, Path)):
218-
with open(pdf_input, 'rb') as f:
219+
with open(pdf_input, "rb") as f:
219220
pdf_bytes = f.read()
220221
elif isinstance(pdf_input, bytes):
221222
pdf_bytes = pdf_input
222-
elif hasattr(pdf_input, 'read') and hasattr(pdf_input, 'seek') and hasattr(pdf_input, 'tell'):
223+
elif hasattr(pdf_input, "read") and hasattr(pdf_input, "seek") and hasattr(pdf_input, "tell"):
223224
pos = pdf_input.tell()
224225
pdf_input.seek(0)
225226
pdf_bytes = pdf_input.read()
@@ -228,35 +229,35 @@ def get_pdf_page_count(pdf_input: FileInput) -> int:
228229
raise TypeError("Unsupported input type. Expected str, Path, bytes, or seekable BinaryIO.")
229230

230231
# Find all PDF objects
231-
objects = re.findall(rb'(\d+)\s+(\d+)\s+obj(.*?)endobj', pdf_bytes, re.DOTALL)
232+
objects = re.findall(rb"(\d+)\s+(\d+)\s+obj(.*?)endobj", pdf_bytes, re.DOTALL)
232233

233234
# Get the Catalog Object
234235
catalog_obj = None
235-
for obj_num, gen_num, obj_data in objects:
236-
if b'/Type' in obj_data and b'/Catalog' in obj_data:
236+
for _obj_num, _gen_num, obj_data in objects:
237+
if b"/Type" in obj_data and b"/Catalog" in obj_data:
237238
catalog_obj = obj_data
238239
break
239240

240241
if not catalog_obj:
241242
raise ValueError("Could not find /Catalog object in PDF.")
242243

243244
# Extract /Pages reference (e.g. 3 0 R)
244-
pages_ref_match = re.search(rb'/Pages\s+(\d+)\s+(\d+)\s+R', catalog_obj)
245+
pages_ref_match = re.search(rb"/Pages\s+(\d+)\s+(\d+)\s+R", catalog_obj)
245246
if not pages_ref_match:
246247
raise ValueError("Could not find /Pages reference in /Catalog.")
247248
pages_obj_num = pages_ref_match.group(1).decode()
248249
pages_obj_gen = pages_ref_match.group(2).decode()
249250

250251
# Step 3: Find the referenced /Pages object
251-
pages_obj_pattern = fr'{pages_obj_num}\s+{pages_obj_gen}\s+obj(.*?)endobj'.encode()
252+
pages_obj_pattern = rf"{pages_obj_num}\s+{pages_obj_gen}\s+obj(.*?)endobj".encode()
252253
pages_obj_match = re.search(pages_obj_pattern, pdf_bytes, re.DOTALL)
253254
if not pages_obj_match:
254255
raise ValueError("Could not find root /Pages object.")
255256
pages_obj_data = pages_obj_match.group(1)
256257

257258
# Step 4: Extract /Count
258-
count_match = re.search(rb'/Count\s+(\d+)', pages_obj_data)
259+
count_match = re.search(rb"/Count\s+(\d+)", pages_obj_data)
259260
if not count_match:
260261
raise ValueError("Could not find /Count in root /Pages object.")
261262

262-
return int(count_match.group(1))
263+
return int(count_match.group(1))

tests/integration/test_direct_api_integration.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,9 @@ def test_split_pdf_integration(self, client, sample_multipage_pdf_path, tmp_path
273273
# Verify the number of pages in each output PDF
274274
total_page_count = get_pdf_page_count(sample_multipage_pdf_path)
275275
assert get_pdf_page_count(result[0]) == 1 # First PDF should have 1 page
276-
assert get_pdf_page_count(result[1]) == total_page_count - 1 # Second PDF should have the remaining pages
276+
assert (
277+
get_pdf_page_count(result[1]) == total_page_count - 1
278+
) # Second PDF should have the remaining pages
277279

278280
def test_split_pdf_with_output_files(self, client, sample_multipage_pdf_path, tmp_path):
279281
"""Test split_pdf method saving to output files."""
@@ -307,7 +309,9 @@ def test_split_pdf_with_output_files(self, client, sample_multipage_pdf_path, tm
307309

308310
# Verify the number of pages in the second output PDF
309311
total_page_count = get_pdf_page_count(sample_multipage_pdf_path)
310-
assert get_pdf_page_count(str(tmp_path / "remaining.pdf")) == total_page_count - 1 # Second PDF should have remaining pages
312+
assert (
313+
get_pdf_page_count(str(tmp_path / "remaining.pdf")) == total_page_count - 1
314+
) # Second PDF should have remaining pages
311315

312316
def test_split_pdf_no_ranges_error(self, client, sample_pdf_path):
313317
"""Test split_pdf with no ranges returns first page by default."""
@@ -396,7 +400,9 @@ def test_duplicate_pdf_pages_negative_indexes(self, client, sample_pdf_path):
396400
assert_is_pdf(result)
397401

398402
# Verify the number of pages in the output PDF
399-
assert get_pdf_page_count(result) == 3 # Should have 3 pages (last page, first page, last page)
403+
assert (
404+
get_pdf_page_count(result) == 3
405+
) # Should have 3 pages (last page, first page, last page)
400406

401407
def test_duplicate_pdf_pages_empty_indexes_error(self, client, sample_pdf_path):
402408
"""Test duplicate_pdf_pages method with empty page_indexes raises error."""
@@ -415,7 +421,9 @@ def test_delete_pdf_pages_basic(self, client, sample_multipage_pdf_path):
415421

416422
# Verify the number of pages in the output PDF
417423
total_page_count = get_pdf_page_count(sample_multipage_pdf_path)
418-
assert get_pdf_page_count(result) == total_page_count - 1 # Should have 2 pages (deleted first page from 3-page PDF)
424+
assert (
425+
get_pdf_page_count(result) == total_page_count - 1
426+
) # Should have 2 pages (deleted first page from 3-page PDF)
419427

420428
def test_delete_pdf_pages_multiple(self, client, sample_multipage_pdf_path):
421429
"""Test delete_pdf_pages method with multiple page deletion."""
@@ -428,7 +436,9 @@ def test_delete_pdf_pages_multiple(self, client, sample_multipage_pdf_path):
428436

429437
# Verify the number of pages in the output PDF
430438
total_page_count = get_pdf_page_count(sample_multipage_pdf_path)
431-
assert get_pdf_page_count(result) == total_page_count - 2 # Should have 1 page (deleted pages 1 and 3 from 3-page PDF)
439+
assert (
440+
get_pdf_page_count(result) == total_page_count - 2
441+
) # Should have 1 page (deleted pages 1 and 3 from 3-page PDF)
432442

433443
def test_delete_pdf_pages_with_output_file(self, client, sample_multipage_pdf_path, tmp_path):
434444
"""Test delete_pdf_pages method saving to output file."""
@@ -449,7 +459,9 @@ def test_delete_pdf_pages_with_output_file(self, client, sample_multipage_pdf_pa
449459

450460
# Verify the number of pages in the output PDF
451461
total_page_count = get_pdf_page_count(sample_multipage_pdf_path)
452-
assert get_pdf_page_count(output_path) == total_page_count - 1 # Should have 2 pages (deleted page 2 from 3-page PDF)
462+
assert (
463+
get_pdf_page_count(output_path) == total_page_count - 1
464+
) # Should have 2 pages (deleted page 2 from 3-page PDF)
453465

454466
def test_delete_pdf_pages_negative_indexes_error(self, client, sample_pdf_path):
455467
"""Test delete_pdf_pages method with negative indexes raises error."""
@@ -473,7 +485,9 @@ def test_delete_pdf_pages_duplicate_indexes(self, client, sample_multipage_pdf_p
473485

474486
# Verify the number of pages in the output PDF
475487
total_page_count = get_pdf_page_count(sample_multipage_pdf_path)
476-
assert get_pdf_page_count(result) == total_page_count - 2 # Should have 1 page (deleted pages 1 and 2 from 3-page PDF)
488+
assert (
489+
get_pdf_page_count(result) == total_page_count - 2
490+
) # Should have 1 page (deleted pages 1 and 2 from 3-page PDF)
477491

478492
# Tests for add_page
479493
def test_add_page_at_beginning(self, client, sample_pdf_path):

tests/integration/test_live_api.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,9 @@ def test_duplicate_pdf_pages_negative_indexes(self, client, sample_pdf_path):
306306
assert_is_pdf(result)
307307

308308
# Verify the number of pages in the output PDF
309-
assert get_pdf_page_count(result) == 3 # Should have 3 pages (last page, first page, last page)
309+
assert (
310+
get_pdf_page_count(result) == 3
311+
) # Should have 3 pages (last page, first page, last page)
310312

311313
def test_duplicate_pdf_pages_empty_indexes_error(self, client, sample_pdf_path):
312314
"""Test duplicate_pdf_pages method with empty page_indexes raises error."""
@@ -326,7 +328,9 @@ def test_delete_pdf_pages_basic(self, client, sample_pdf_path):
326328

327329
# Verify the number of pages in the output PDF
328330
total_pages = get_pdf_page_count(sample_pdf_path)
329-
assert get_pdf_page_count(result) == total_pages - 1 # Should have one less page than original
331+
assert (
332+
get_pdf_page_count(result) == total_pages - 1
333+
) # Should have one less page than original
330334

331335
def test_delete_pdf_pages_multiple(self, client, sample_pdf_path):
332336
"""Test delete_pdf_pages method with multiple page deletion."""

tests/integration/test_new_tools_integration.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ def test_create_redactions_preset_with_output_file(
7575
"""Test creating redactions with preset and saving to file."""
7676
output_path = tmp_path / "redacted_preset.pdf"
7777
result = client.create_redactions_preset(
78-
sample_pdf_with_sensitive_data, preset="international-phone-number", output_path=str(output_path)
78+
sample_pdf_with_sensitive_data,
79+
preset="international-phone-number",
80+
output_path=str(output_path),
7981
)
8082
assert result is None
8183
assert output_path.exists()
@@ -94,7 +96,9 @@ def test_create_redactions_text(self, client, sample_pdf_with_sensitive_data):
9496
"""Test creating redactions for exact text matches."""
9597
# Use a very common letter that should exist
9698
result = client.create_redactions_text(
97-
sample_pdf_with_sensitive_data, text="a", case_sensitive=False,
99+
sample_pdf_with_sensitive_data,
100+
text="a",
101+
case_sensitive=False,
98102
)
99103
assert_is_pdf(result)
100104
assert len(result) > 0

0 commit comments

Comments
 (0)