-
Notifications
You must be signed in to change notification settings - Fork 32
Add new action to infer a tabular resource schema #82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
1c984dd
154d529
3b1773c
a167acb
670d896
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| import json | ||
|
|
||
| from sqlalchemy.orm.exc import NoResultFound | ||
| from frictionless import system, Resource | ||
|
|
||
| import ckan.plugins as plugins | ||
| import ckan.lib.uploader as uploader | ||
|
|
@@ -174,6 +175,42 @@ def resource_validation_show(context, data_dict): | |
|
|
||
| return _validation_dictize(validation) | ||
|
|
||
| def resource_table_schema_infer(context, data_dict): | ||
| ''' | ||
| Use frictionless framework to infer a resource schema | ||
| ''' | ||
|
|
||
| t.check_access('resource_create', context, data_dict) | ||
|
|
||
| t.get_or_bust(data_dict, 'resource_id') | ||
|
|
||
| store_schema = data_dict.get('store_schema', True) | ||
|
|
||
| resource = t.get_action('resource_show')( | ||
| {}, {u'id': data_dict['resource_id']}) | ||
|
|
||
| source = None | ||
| if resource.get('url_type') == 'upload': | ||
| upload = uploader.get_resource_uploader(resource) | ||
| if isinstance(upload, uploader.ResourceUpload): | ||
| source = upload.get_path(resource['id']) | ||
|
|
||
| if not source: | ||
| source = resource['url'] | ||
|
|
||
| with system.use_context(trusted=True): | ||
| # TODO: check for valid formats | ||
| fric_resource = Resource({'path': source, 'format': resource.get('format', 'csv').lower()}) | ||
| fric_resource.infer() | ||
| resource['schema'] = fric_resource.schema.to_json() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we get an error here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, we can. an example, if you don't add the format, because the uploaded file don't have an extension, the frictionless throws a exception saying that it can't infer the schema because the file is not tabular.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok so let's catch these exceptions and wrap them in a raised
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| # TODO: check for exception | ||
| if store_schema: | ||
| t.get_action('resource_update')( | ||
| context, resource) | ||
|
|
||
| return {u'schema': fric_resource.schema.to_dict()} | ||
|
|
||
|
|
||
| def resource_validation_delete(context, data_dict): | ||
| u''' | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |
|
|
||
| from werkzeug.datastructures import FileStorage as FlaskFileStorage | ||
| import ckan.plugins as p | ||
| import ckan.lib.uploader as uploader | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not needed |
||
| import ckantoolkit as t | ||
|
|
||
| from ckanext.validation import settings | ||
|
|
@@ -17,6 +18,7 @@ | |
| auth_resource_validation_delete, auth_resource_validation_run_batch, | ||
| resource_create as custom_resource_create, | ||
| resource_update as custom_resource_update, | ||
| resource_table_schema_infer | ||
| ) | ||
| from ckanext.validation.helpers import ( | ||
| get_validation_badge, | ||
|
|
@@ -34,6 +36,7 @@ | |
| get_create_mode_from_config, | ||
| get_update_mode_from_config, | ||
| ) | ||
|
|
||
| from ckanext.validation.interfaces import IDataValidation | ||
| from ckanext.validation import blueprints, cli | ||
|
|
||
|
|
@@ -89,6 +92,7 @@ def get_actions(self): | |
| u'resource_validation_run_batch': resource_validation_run_batch, | ||
| u'resource_create': custom_resource_create, | ||
| u'resource_update': custom_resource_update, | ||
| u'resource_table_schema_infer': resource_table_schema_infer | ||
| } | ||
|
|
||
| return new_actions | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the resource['format'] is not one of
ckanext.validation.formatswe should raise aValidationErrorwith : "Not a valid format to infer the resource schema"