RRAP-IS Dataset sync Demo Notebook
A tutorial of RRAP uploading and downloading dataset using Jupyter notebooks.
- About
- Authentication
- Helper functions
- Endpoint Documentation
- Demonstration
- Find the newly minited dataset
import requests
import os
import sys
import json
from bs4 import BeautifulSoup
from json2html import *
from IPython.display import IFrame, display, HTML, JSON, Markdown, Image
from mdsisclienttools.auth.TokenManager import DeviceFlowManager
import mdsisclienttools.datastore.ReadWriteHelper as IOHelper
import mdsisclienttools
import numpy as np
import pandas as pd
from cloudpathlib import S3Client
import cloudpathlib
data_store = "https://data.testing.rrap-is.com"
data_api = "https://data-api.testing.rrap-is.com"
registry_api = "https://registry-api.testing.rrap-is.com"
prov_api = "https://prov-api.testing.rrap-is.com"
auth_server = "https://auth.dev.rrap-is.com/auth/realms/rrap"
# garbage = "https://frogs.are.green"
base_urls = {'data_api': data_api, 'registry_api': registry_api, 'prov_api': prov_api, 'auth_server': auth_server, 'data_store': data_store}#, 'garbage': garbage}
display(f'Checking base urls')
for key, url in base_urls.items():
try:
print(f'Testing - {url}', end="")
r = requests.get(url)
r.raise_for_status()
print(f' - Passed')
except requests.exceptions.HTTPError as err:
print(f' - Fail')
raise SystemExit(err)
except requests.exceptions.RequestException as e:
# catastrophic error. bail.
print(f' - Fail')
raise SystemExit(e)
local_token_storage = ".tokens.json"
token_manager = DeviceFlowManager(
stage="TEST",
keycloak_endpoint=auth_server,
local_storage_location=local_token_storage
)
def wrap_html_table(data):
soup = BeautifulSoup(data)
ul_tag = soup.find("table")
div_tag = soup.new_tag("div")
div_tag['style'] = "width: auto; height: 400px; overflow-y: auto; "
ul_tag.wrap(div_tag)
new_tag = soup.new_tag("details")
div_tag.wrap(new_tag)
tag = soup.new_tag("summary")
tag.string = "Results"
soup.div.insert_after(tag)
return soup.prettify()
def json_to_md(response_json):
json_obj_in_html = json2html.convert( response_json )
return wrap_html_table(json_obj_in_html)
def handle_request(method, url, params=None, payload=None, auth=None):
try:
if params:
response = requests.request(method, url=url, params=params, auth=auth)
elif payload:
response = requests.request(method, url=url, json=payload, auth=auth)
else:
response = requests.request(method, url=url, auth=auth)
# If the response was successful, no Exception will be raised
response.raise_for_status()
except HTTPError as http_err:
print(f'HTTP error occurred: {http_err}') # Python 3.6
return {"error": http_err}
except Exception as err:
print(f'Other error occurred: {err}') # Python 3.6
return {"error": err }
else:
return response.json()
## This is used to identify the current version of a package
try:
from pip._internal.operations import freeze
except ImportError: # pip < 10.0
from pip.operations import freeze
packages = freeze.freeze()
found = [package for package in packages if package.find('mdsisclienttools') > -1]
display(found)
import logging
import re
import warnings
logging.basicConfig(filename="log.txt",level=logging.ERROR)
logging.captureWarnings(True)
warnings.filterwarnings('always', category=DeprecationWarning,
module=r'^{0}\.'.format(re.escape(__name__)))
warnings.warn("This is a DeprecationWarning",category=DeprecationWarning)
Endpoint Documentation
Endpoint documentation can be found by appending either /docs or /redoc on the end a base URL.
For example:
Then select from the menu an endpoint function call e.g. /register/mint-dataset
Then append the function call onto the base url e.g. https://data-api.testing.rrap-is.com/register/mint-dataset
auth = token_manager.get_auth
postfix = "/register/mint-dataset"
payload = {
"author": {
"name": "Andrew Freebairn",
"email": "andrew.freebairn@csiro.au",
"orcid": "https://orcid.org/0000-0001-9429-6559",
"organisation": {
"name": "CSIRO",
"ror": "https://ror.org/03qn8fb07"
}
},
"dataset_info": {
"name": "MVP Demo Dataset",
"description": "For demonstration purposes",
"publisher": {
"name": "Andrew",
"ror": "https://ror.org/057xz1h85"
},
"created_date": "2022-08-05",
"published_date": "2022-08-05",
"license": "https://creativecommons.org/licenses/by/4.0/",
"keywords": [
"keyword1"
],
"version": "0.0.1"
}
}
endpoint = data_api + postfix
response_json = handle_request("POST", endpoint, None, payload, auth())
new_handle = response_json['handle']
HTML(json_to_md(response_json))
auth = token_manager.get_auth
postfix = "/registry/items/list-all-datasets"
endpoint = data_api + postfix
# response = requests.get(endpoint, auth=auth())
response_json = handle_request("GET", endpoint, None, None, auth())
reg_items = response_json['registry_items']
if any( item['handle'] == new_handle for item in reg_items):
print(f'Found new handle: {new_handle}')
else:
print(f'Did NOT find new handle: {new_handle}')
auth = token_manager.get_auth
IOHelper.download('./data', new_handle, auth(), data_api)
auth = token_manager.get_auth
IOHelper.upload(new_handle, auth(), "./data", data_api)