Skip to content

Audit mandatory attributes #10

@paulduchesne

Description

@paulduchesne

Small script to assess label/description/source for FIAFcore entities.

import pandas
import pathlib
import rdflib

fiafcore_path = pathlib.Path.home() / 'git' / 'FIAFcore' / 'classes'
frags = [x for x in fiafcore_path.rglob('*') if x.suffix == '.ttl']

graph = rdflib.Graph()
for x in sorted(frags):
    graph += rdflib.Graph().parse(x)

fiafcore_classes = [s for s,p,o in graph.triples((None, rdflib.RDF.type, rdflib.OWL.Class))]

df = pandas.DataFrame(columns=['entity', 'attribute', 'result'])
for x in sorted(fiafcore_classes):
    for y in [rdflib.RDFS.label, rdflib.URIRef('http://purl.org/dc/elements/1.1/description'), rdflib.URIRef('http://purl.org/dc/elements/1.1/source')]:
        for z in ['en', 'es', 'fr']:
            test = [o for s,p,o in graph.triples((x, y, None)) if o.language == z]
            df.loc[len(df)] = [x,y+'_'+z,len(test)]

print(df.result.unique()) # if this is not 0 and 1 something has gone wrong
pivoted_df = df.pivot(index='entity', columns='attribute', values='result').reset_index()

for x in list(pivoted_df.columns.values):
    if x != 'entity':
        print(x, sum(pivoted_df[x]))

Metadata

Metadata

Assignees

Labels

No fields configured for Feature.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions