-
Notifications
You must be signed in to change notification settings - Fork 2
Data Checks
Vladimir Alexiev edited this page May 8, 2025
·
3 revisions
This page lists checks for data quality and consistency
PREFIX quantitykind: <http://qudt.org/vocab/quantitykind/>
PREFIX unit: <http://qudt.org/vocab/unit/>
select distinct ?s where {
[] ?p ?s .
filter(isURI(?s))
filter not exists {
?s ?p1 []
}
filter(!contains(str(?s),str(unit:)))
filter(!contains(str(?s),str(quantitykind:)))
} 58 results!
see #52
Check that mRID exists for all objects and conforms to the URIs (id is suffix of URI)
All with a mRID match the URI
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x a cim:IdentifiedObject ; cim:IdentifiedObject.mRID ?id .
filter(!strafter(str(?x),"http://www.Statnett.no/IGM/Nordic44_CGM#_")=?id)
} 241 objects have no mRID
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x a cim:IdentifiedObject
filter not exists {
?x cim:IdentifiedObject.mRID []
}
}Discrepancy source:
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x a cim:IdentifiedObject
filter not exists {
?x cim:IdentifiedObject.mRID []
}
filter not exists {
graph ?g {
?x a ?type .
?g dct:conformsTo ?c.
}
}
}
see #53
Names should be canonical strings, i.e. no leading, trailing or consecutive spaces. Otherwise when you print them, you can't tell apart two names that differ only in spacing.
450 have non canonical strings
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
{
?x cim:IdentifiedObject.name ?name .
filter(regex(?name," "))
} union {
?x cim:IdentifiedObject.name ?name .
filter(regex(?name," $"))
}
union {
?x cim:IdentifiedObject.name ?name .
filter(regex(?name,"^ "))
}
}40 differ only by spaces
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select ?nospace (count(distinct ?name) as ?c) {
?x cim:IdentifiedObject.name ?name .
bind(replace(?name," ","") as ?nospace)
} group by ?nospace having(?c > 1)Example
PREFIX sesame: <http://www.openrdf.org/schema/sesame#>
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
select * {
?x cim:IdentifiedObject.name ?name ; sesame:directType ?type
filter(replace(?name," ","")="OSKARSHAMN")
} see #14
PREFIX cim: <https://cim.ucaiug.io/ns#>
PREFIX sesame: <http://www.openrdf.org/schema/sesame#>
select ?name (count(*) as ?c)
{ ?x cim:IdentifiedObject.name ?name.
} group by ?name order by desc(?c)