Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions teams/team3/create_rdf_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import csv

prefixe_url_siren = "https://api.insee.fr/siret/"

predicate_name = "rdfs:label"
predicate_long = "sx:longitude"
predicate_lat = "sx:latitude"
predicate_purpose = "org:purpose"

SIREN = "SIREN"
NIC = "NIC"
APEN = "APEN700"
NOMEN_LONG = "NOMEN_LONG"
LOGITUDE = "longitude"
latitude = "latitude"
prefix_siret = "https://api.insee.fr/entreprises/sirene/siret/"

prefix_ape = "http://id.insee.fr/codes/nafr2/sousClasse/";

def addChevrons( string ):
return "<" + string + ">"

def create_line_string( predicate, object ):
return "\t" + predicate + " \"" + object.replace('"','') + "\" ; \n"

def create_line_url( predicate, object ):
return "\t" + predicate + addChevrons(object.replace('"','')) + " ; \n"


def create_point(siret, lat, long):
pointName = "%sPoint"%(siret)
point ="\tgeo:hasGeometry ex:%s .\n"%(pointName)
point += "ex:%s a sf:Point;\n"%(pointName)
point += "\tgeo:asWKT \"POINT(%s %s)\"^^geo:wktLiteral.\n"%(long, lat)
return point


#with open('geo-sirene_rdf.ttl', 'w') as output:

with open('geo-sirene_35_rdf.ttl', 'w', encoding="utf8") as output:
#on écrit les prefixe
output.write("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema> .\n")
output.write("@prefix sx: <http://www.w3.org/2003/01/geo/wgs84_pos> .\n")
output.write("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n")
output.write("@prefix rdf:<https://www.w3.org/1999/02/22-rdf-syntax-ns> .\n")
output.write("@prefix geo: <http://www.opengis.net/ont/geosparql#> .\n")
output.write("@prefix ex: <http://www.example.org/POI#> .\n")
output.write("@prefix sf: <http://www.opengis.net/ont/sf#> .\n")
output.write("@prefix org: <http://www.w3.org/ns/org#> .\n")



#with open('geo_sirene.csv', 'r', encoding="utf8") as csvfile:
with open('geo-sirene_35.csv', 'r', encoding="utf8") as csvfile:
csvreader = csv.reader(csvfile, delimiter=',', quotechar='"')
# tableau de résultat

headers = next(csvreader)

indice_SIREN = headers.index(SIREN)
indice_NIC = headers.index(NIC)
indice_NOMEN_LONG = headers.index(NOMEN_LONG)
indice_LOGITUDE = headers.index(LOGITUDE)
indice_latitude = headers.index(latitude)
indice_APEN = headers.index(APEN)

i=0
for row in csvreader:
siret = row[indice_SIREN]+row[indice_NIC]
output.write(addChevrons(prefix_siret+siret)+"\n")
output.write("\trdf:type org:OrganizationalUnit ;\n ")

better_apen = row[indice_APEN][:2]+"."+row[indice_APEN][2:]
output.write(create_line_string(predicate_name,row[indice_NOMEN_LONG]))
output.write(create_line_url(predicate_purpose,prefix_ape+better_apen))
output.write(create_point(siret, row[indice_latitude], row[indice_LOGITUDE] ))

i+=1

if (i%100==0):
print(i)



27 changes: 23 additions & 4 deletions teams/team3/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,28 @@ ex:00708077300034Point a sf:Point;
* First we download [this file](https://www.insee.fr/fr/statistiques/2520034) containing french population grid data
* Then we use [QGIS](https://qgis.org/) to convert it to ShapeFile and change projection from EPSG:3035 to EPSG:4326
* Then we export it from shapefile to CSV
* We use a [Java tool to convert it to RDF](https://github.com/alicela/CensusGrid-LOS) hacked from
*
* We use a [Java tool to convert it to RDF](https://github.com/alicela/CensusGrid-LOS) hacked from [this one](https://github.com/LOS-ESSnet/POP5/blob/master/src/main/java/eu/europa/ec/eurostat/los/pop5/DataSetModelMaker.java)
## Converting organization data (SIRENE)
* We use [this repository](http://data.cquest.org/geo_sirene/last/) containing files with added geolocalization information that has been produced [this way](https://www.insee.fr/fr/information/2509465) and this way
* We use a Python conversion script to convert it
* This Python script can be modified to add variables using [the W3C Organization Ontology](https://www.w3.org/TR/vocab-org/)
* We use a [Python script](https://github.com/trblft/Paris-Hackathon/blob/master/teams/team3/create_rdf_test.py) to convert it
* This Python script can be modified to add variables using [the W3C Organization Ontology](https://www.w3.org/TR/vocab-org/)
# Querying the GraphDB sparql endpoints
```
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX geo-pos: <http://www.w3.org/2003/01/geo/wgs84_pos>
PREFIX gn: <http://www.geonames.org/ontology#>
PREFIX omgeo: <http://www.ontotext.com/owlim/geo#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
select (sum(xsd:double(?pop)) as ?pop_totale) where {
# On sélectionne la position de l'entreprise VINOUZE
?e rdfs:label "VINOUZE" .
?e geo-pos:long ?lon .
?e geo-pos:lat ?lat .
# On récupère la population des carreaux statistiques dont le centroide est à moins d'1 km
SERVICE <http://census_sparl_endpoint/repositories/census-point>
{
?c omgeo:nearby(?lat ?lon "1km") .
?c gn:population ?pop .
}
}
```