Code for layout parser#34
Open
Yashsethi24 wants to merge 1 commit into
Open
Conversation
Hi. Here's a snippet for that: #!/usr/bin/env python3
"""
Usage:
python publaynet_annotations_extractor.py --input path/to/train.json [--output annotations.json] [--limit 100]
"""
import json
import argparse
from pathlib import Path
from pycocotools.coco import COCO
# PubLayNet uses COCO categories but with document layout labels
CATEGORY_NAMES = {
1: "text",
2: "title",
3: "list",
4: "table",
5: "figure",
}
def parse_args():
parser = argparse.ArgumentParser(
description="Load PubLayNet annotations and extract per-image annotation lists."
)
parser.add_argument(
"-i", "--input", required=True,
help="Path to the COCO-format PubLayNet JSON (train.json, val.json, or test.json)"
)
parser.add_argument(
"-o", "--output", default="publaynet_image_annotations.json",
help="Where to save the output JSON mapping"
)
parser.add_argument(
"-l", "--limit", type=int,
help="Process only the first N images (for quick testing)"
)
return parser.parse_args()
def load_coco_annotations(json_path: Path) -> COCO:
"""
Load and return a COCO object from the given JSON file.
"""
if not json_path.exists():
raise FileNotFoundError(f"Annotation file not found: {json_path}")
return COCO(str(json_path))
def gather_image_annotations(coco: COCO, limit: int = None) -> dict:
"""
Return a dict mapping image filenames to their list of annotations.
Each annotation contains:
- bbox: [x, y, width, height]
- category: human-readable name
- area: float
- iscrowd: 0 or 1
"""
image_ids = coco.getImgIds()
if limit:
image_ids = image_ids[:limit]
records = {}
for img_id in image_ids:
info = coco.loadImgs(img_id)[0]
filename = info.get("file_name", f"<id_{img_id}>")
ann_ids = coco.getAnnIds(imgIds=img_id)
anns = coco.loadAnns(ann_ids)
annotations = []
for ann in anns:
annotations.append({
"bbox": ann["bbox"],
"category": CATEGORY_NAMES.get(ann["category_id"], "unknown"),
"area": ann.get("area", 0),
"iscrowd": ann.get("iscrowd", 0),
})
records[filename] = annotations
return records
def save_annotations(data: dict, out_path: Path) -> None:
"""
Write the collected annotation data to a JSON file.
"""
with out_path.open("w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
print(f"Saved annotations for {len(data)} images to {out_path}")
def main():
args = parse_args()
input_path = Path(args.input)
output_path = Path(args.output)
print(f"Loading annotations from {input_path}...")
coco = load_coco_annotations(input_path)
print("Gathering per-image annotation data...")
image_data = gather_image_annotations(coco, limit=args.limit)
save_annotations(image_data, output_path)
if __name__ == "__main__":
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Hey! Excellent work.
But can you please share the code which you used to get the annotations of the images?