From bf1b3d2207cc5d1c8b0304c0fdd20f5a07c3d10e Mon Sep 17 00:00:00 2001 From: trblft Date: Wed, 12 Sep 2018 09:30:29 +0200 Subject: [PATCH 1/3] Python script added --- teams/team3/create_rdf_test.py | 84 ++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 teams/team3/create_rdf_test.py diff --git a/teams/team3/create_rdf_test.py b/teams/team3/create_rdf_test.py new file mode 100644 index 0000000..c2d0c34 --- /dev/null +++ b/teams/team3/create_rdf_test.py @@ -0,0 +1,84 @@ +import csv + +prefixe_url_siren = "https://api.insee.fr/siret/" + +predicate_name = "rdfs:label" +predicate_long = "sx:longitude" +predicate_lat = "sx:latitude" +predicate_purpose = "org:purpose" + +SIREN = "SIREN" +NIC = "NIC" +APEN = "APEN700" +NOMEN_LONG = "NOMEN_LONG" +LOGITUDE = "longitude" +latitude = "latitude" +prefix_siret = "https://api.insee.fr/entreprises/sirene/siret/" + +prefix_ape = "http://id.insee.fr/codes/nafr2/sousClasse/"; + +def addChevrons( string ): + return "<" + string + ">" + +def create_line_string( predicate, object ): + return "\t" + predicate + " \"" + object.replace('"','') + "\" ; \n" + +def create_line_url( predicate, object ): + return "\t" + predicate + addChevrons(object.replace('"','')) + " ; \n" + + +def create_point(siret, lat, long): + pointName = "%sPoint"%(siret) + point ="\tgeo:hasGeometry ex:%s .\n"%(pointName) + point += "ex:%s a sf:Point;\n"%(pointName) + point += "\tgeo:asWKT \"POINT(%s %s)\"^^geo:wktLiteral.\n"%(long, lat) + return point + + +#with open('geo-sirene_rdf.ttl', 'w') as output: + +with open('geo-sirene_35_rdf.ttl', 'w', encoding="utf8") as output: +#on écrit les prefixe + output.write("@prefix rdfs: .\n") + output.write("@prefix sx: .\n") + output.write("@prefix xsd: .\n") + output.write("@prefix rdf: .\n") + output.write("@prefix geo: .\n") + output.write("@prefix ex: .\n") + output.write("@prefix sf: .\n") + output.write("@prefix org: .\n") + + + + #with open('geo_sirene.csv', 'r', encoding="utf8") as csvfile: + with open('geo-sirene_35.csv', 'r', encoding="utf8") as csvfile: + csvreader = csv.reader(csvfile, delimiter=',', quotechar='"') + # tableau de résultat + + headers = next(csvreader) + + indice_SIREN = headers.index(SIREN) + indice_NIC = headers.index(NIC) + indice_NOMEN_LONG = headers.index(NOMEN_LONG) + indice_LOGITUDE = headers.index(LOGITUDE) + indice_latitude = headers.index(latitude) + indice_APEN = headers.index(APEN) + + i=0 + for row in csvreader: + siret = row[indice_SIREN]+row[indice_NIC] + output.write(addChevrons(prefix_siret+siret)+"\n") + output.write("\trdf:type org:OrganizationalUnit ;\n ") + + better_apen = row[indice_APEN][:2]+"."+row[indice_APEN][2:] + output.write(create_line_string(predicate_name,row[indice_NOMEN_LONG])) + output.write(create_line_url(predicate_purpose,prefix_ape+better_apen)) + output.write(create_point(siret, row[indice_latitude], row[indice_LOGITUDE] )) + + i+=1 + + if (i%100==0): + print(i) + + + From a2e7cccd40b03fa8234f29c7a82f7a332963740c Mon Sep 17 00:00:00 2001 From: trblft Date: Wed, 12 Sep 2018 09:40:51 +0200 Subject: [PATCH 2/3] Url added --- teams/team3/readme.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/teams/team3/readme.md b/teams/team3/readme.md index e5297ae..586b8b4 100644 --- a/teams/team3/readme.md +++ b/teams/team3/readme.md @@ -18,9 +18,8 @@ ex:00708077300034Point a sf:Point; * First we download [this file](https://www.insee.fr/fr/statistiques/2520034) containing french population grid data * Then we use [QGIS](https://qgis.org/) to convert it to ShapeFile and change projection from EPSG:3035 to EPSG:4326 * Then we export it from shapefile to CSV -* We use a [Java tool to convert it to RDF](https://github.com/alicela/CensusGrid-LOS) hacked from -* +* We use a [Java tool to convert it to RDF](https://github.com/alicela/CensusGrid-LOS) hacked from [this one](https://github.com/LOS-ESSnet/POP5/blob/master/src/main/java/eu/europa/ec/eurostat/los/pop5/DataSetModelMaker.java) ## Converting organization data (SIRENE) * We use [this repository](http://data.cquest.org/geo_sirene/last/) containing files with added geolocalization information that has been produced [this way](https://www.insee.fr/fr/information/2509465) and this way -* We use a Python conversion script to convert it +* We use a [Python script](https://github.com/trblft/Paris-Hackathon/blob/master/teams/team3/create_rdf_test.py) to convert it * This Python script can be modified to add variables using [the W3C Organization Ontology](https://www.w3.org/TR/vocab-org/) \ No newline at end of file From a0855d97129ff7a3ae8885eff269aefb2eb84021 Mon Sep 17 00:00:00 2001 From: trblft Date: Wed, 12 Sep 2018 11:02:17 +0200 Subject: [PATCH 3/3] Query part --- teams/team3/readme.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/teams/team3/readme.md b/teams/team3/readme.md index 586b8b4..c4ab321 100644 --- a/teams/team3/readme.md +++ b/teams/team3/readme.md @@ -22,4 +22,24 @@ ex:00708077300034Point a sf:Point; ## Converting organization data (SIRENE) * We use [this repository](http://data.cquest.org/geo_sirene/last/) containing files with added geolocalization information that has been produced [this way](https://www.insee.fr/fr/information/2509465) and this way * We use a [Python script](https://github.com/trblft/Paris-Hackathon/blob/master/teams/team3/create_rdf_test.py) to convert it -* This Python script can be modified to add variables using [the W3C Organization Ontology](https://www.w3.org/TR/vocab-org/) \ No newline at end of file +* This Python script can be modified to add variables using [the W3C Organization Ontology](https://www.w3.org/TR/vocab-org/) +# Querying the GraphDB sparql endpoints +``` +PREFIX rdfs: +PREFIX geo-pos: +PREFIX gn: +PREFIX omgeo: +PREFIX xsd: +select (sum(xsd:double(?pop)) as ?pop_totale) where { + # On sélectionne la position de l'entreprise VINOUZE + ?e rdfs:label "VINOUZE" . + ?e geo-pos:long ?lon . + ?e geo-pos:lat ?lat . + # On récupère la population des carreaux statistiques dont le centroide est à moins d'1 km + SERVICE + { + ?c omgeo:nearby(?lat ?lon "1km") . + ?c gn:population ?pop . + } +} +``` \ No newline at end of file