Skip to content

Commit be898b3

Browse files
dgunningclaude
andcommitted
fix: resolve XBRLS detailed view overwriting totals and expand filer_type classification
Fix #687: dimensional segment rows in XBRLS stitching were overwriting parent total values (e.g., Goodwill 7,970M replaced by segment 650M). Skip is_dimension rows so totals are preserved. Fix #562: expand filer_type fallback to classify ADR deposits, foreign registrations, UITs, investment company funds, and crowdfunding issuers using form-based signals when state_of_incorporation is unavailable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d057516 commit be898b3

3 files changed

Lines changed: 56 additions & 2 deletions

File tree

edgar/entity/constants.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,48 @@
104104
# All known form types for validation
105105
ALL_FORM_TYPES = COMPANY_FORMS | FUND_FORMS | INDIVIDUAL_FORMS
106106

107+
# Filer type classification form sets (used by Company.filer_type fallback)
108+
# Priority order: Foreign signals first, then Domestic signals
109+
# See docs-internal/research/sec-filings/data-structures/filer-type-gap-analysis.md
110+
111+
FILER_TYPE_FOREIGN_FORMS = frozenset({
112+
# ADR deposit registrations (100% confidence foreign)
113+
'F-6', 'F-6EF', 'F-6 POS', 'F-6/A',
114+
# Foreign private issuer exemption
115+
'12G3-2B',
116+
# Foreign registration statements
117+
'F-1', 'F-1/A', 'F-3', 'F-3/A', 'F-4', 'F-4/A',
118+
'F-10', 'F-10/A', 'F-3ASR', 'F-1MEF', 'F-3MEF', 'F-4MEF',
119+
# Foreign Exchange Act registration
120+
'20FR12B', '20FR12G', '20FR12B/A', '20FR12G/A',
121+
# Foreign government annual reports
122+
'18-K', '18-K/A',
123+
# Sub-national foreign entity reports
124+
'SE',
125+
})
126+
127+
FILER_TYPE_DOMESTIC_FORMS = frozenset({
128+
# Domestic registration statements
129+
'S-1', 'S-1/A', 'S-3', 'S-3/A', 'S-4', 'S-4/A',
130+
'S-11', 'S-11/A', 'S-3ASR', 'S-1MEF', 'S-3MEF', 'S-4MEF', 'S-4EF',
131+
# Small business forms (pre-2008)
132+
'SB-2', 'SB-2/A', 'SB-2MEF',
133+
'10KSB', '10KSB/A', '10KSB405', '10QSB', '10QSB/A',
134+
'10SB12G', '10SB12G/A', '10SB12B', '10SB12B/A',
135+
# Domestic Exchange Act registration
136+
'10-12G', '10-12G/A', '10-12B', '10-12B/A',
137+
# Unit Investment Trust forms
138+
'S-6', 'S-6/A', 'REGDEX', 'REGDEX/A',
139+
'24F-2NT', '24F-2NT/A', '487', '497J',
140+
# Investment company fund forms
141+
'N-1A', 'N-1A/A', 'N-2', 'N-2/A', 'N-3', 'N-4', 'N-5', 'N-6',
142+
'N-8A', 'N-8F', 'N-CSR', 'N-CSRS', 'N-CEN', 'N-PORT',
143+
'NSAR-A', 'NSAR-B', '485BPOS', '485APOS',
144+
'40-APP', '40-APP/A', '40-17G', '40-17G/A',
145+
# Regulation Crowdfunding (US-only)
146+
'C', 'C/A', 'C-U', 'C-AR', 'C-TR',
147+
})
148+
107149
# Name-based heuristic constants for company detection
108150
# Loose keywords: substring match (keyword appears anywhere in uppercased name)
109151
# Only keywords long enough (>=7 chars) or with distinctive punctuation to avoid

edgar/entity/core.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from edgar.enums import FormType
4343

4444
# Import constants and utilities from separate modules
45-
from edgar.entity.constants import COMPANY_FORMS
45+
from edgar.entity.constants import COMPANY_FORMS, FILER_TYPE_FOREIGN_FORMS, FILER_TYPE_DOMESTIC_FORMS
4646
from edgar.entity.utils import has_company_filings, normalize_cik
4747

4848
# TTM (Trailing Twelve Months) imports
@@ -736,6 +736,13 @@ def filer_type(self) -> Optional[str]:
736736
return 'Foreign'
737737
elif '10-K' in form_types or '10-K/A' in form_types or '10-Q' in form_types:
738738
return 'Domestic'
739+
740+
# Extended fallback: foreign forms (ADR, foreign registration, sovereign)
741+
if form_types & FILER_TYPE_FOREIGN_FORMS:
742+
return 'Foreign'
743+
# Extended fallback: domestic forms (registration, UITs, funds, crowdfunding)
744+
if form_types & FILER_TYPE_DOMESTIC_FORMS:
745+
return 'Domestic'
739746
return None
740747

741748
def _get_form_types(self, limit: int = 100) -> set:

edgar/xbrl/stitching/core.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,15 @@ def _integrate_statement_data(
386386
if item.get('is_abstract', False) and not item.get('children'):
387387
continue
388388

389-
# Skip dimension items
389+
# Skip dimension items (taxonomy structural items)
390390
if any(bracket in label for bracket in ['[Axis]', '[Domain]', '[Member]', '[Line Items]', '[Table]', '[Abstract]']):
391391
continue
392392

393+
# Skip dimensional segment rows — they share the same concept name as their
394+
# parent total row, so the last segment would overwrite the correct total value
395+
if item.get('is_dimension', False):
396+
continue
397+
393398
# Use concept as the primary key for identifying the same financial line item
394399
# This is more reliable than labels which may vary across filings
395400

0 commit comments

Comments
 (0)