About

This notebook is a demonstration of registering elements that will enable provenance traces.

Run all imports

Keep all your imports at the top of a notebook. It allows for easier management.

import requests
import os
import sys
import json
from bs4 import BeautifulSoup
from IPython.display import IFrame, display, HTML, JSON, Markdown, Image
from mdsisclienttools.auth.TokenManager import DeviceFlowManager

import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings(action='once')

Define global variables

Similar to import we like to define notebook variable at the top and reuse them throughout the notebook

data_store = "https://data.testing.rrap-is.com"
data_api = "https://data-api.testing.rrap-is.com"
registry_api = "https://registry-api.testing.rrap-is.com"
prov_api = "https://prov-api.testing.rrap-is.com"
auth_server = "https://auth.dev.rrap-is.com/auth/realms/rrap"
# garbage = "https://frogs.are.green"
base_urls = {'data_api': data_api, 'registry_api': registry_api, 'prov_api': prov_api, 'auth_server': auth_server, 'data_store': data_store}#, 'garbage': garbage}
display(f'Checking base urls')

for key, url in base_urls.items():
    try:
        print(f'Testing - {url}', end="")
        r = requests.get(url)
        r.raise_for_status()
        print(f' - Passed')
    except requests.exceptions.HTTPError as err:
        print(f' - Fail')
        raise SystemExit(err)
    except requests.exceptions.RequestException as e:
        # catastrophic error. bail.
        print(f' - Fail')
        raise SystemExit(e)

Authentication

Setup tokens using device authorisation flow against keycloak server

This could result in a browser window being opened if you don't have valid tokens cached in local storage.

Return to Top

local_token_storage = ".tokens.json"

token_manager = DeviceFlowManager(
    stage="TEST",
    keycloak_endpoint=auth_server,
    local_storage_location=local_token_storage
)

Endpoint Documentation

Endpoint documentation can be found by appending either /docs or /redoc on the end a base URL.

For example:

Then select from the menu an endpoint function call e.g. /register/mint-dataset

Then append the function call onto the base url e.g. https://data-api.testing.rrap-is.com/register/mint-dataset

Return to Top

Notebook helper functions

Return to Top

from enum import Enum
from enum_switch import Switch
class ProvType(Enum):
    AGENT = 1
    ACTIVITY = 2
    ENTITY = 3

class ItemType(Enum):
    MODEL = 1
    PERSON = 2
    ORGANISATION = 3
    MODELRUN = 4
    MODEL_RUN_WORKFLOW = 5

class ProvTypeFromItemType(Switch):
    def MODEL(self):
        return ProvType.ENTITY

    def PERSON(self):
        return ProvType.AGENT    

    def ORGANISATION(self):
        return ProvType.AGENT    

    def MODELRUN(self):
        return ProvType.ACTIVITY

    def MODEL_RUN_WORKFLOW(self):
        return ProvType.ENTITY

prov_of_item = ProvTypeFromItemType(ItemType)
provs = [print(prov_of_item(t)) for t in ItemType]        
ProvType.ENTITY
ProvType.AGENT
ProvType.AGENT
ProvType.ACTIVITY
ProvType.ENTITY
def register_item(payload, item_type, auth):
    prov_type = prov_of_item(item_type)
    postfix = f'/registry/{prov_type.name.lower()}/{item_type.name.lower()}/create'
    endpoint = registry_api + postfix 
    return requests.post(endpoint, json=payload, auth=auth())
def registry_list(item_type, auth):
    prov_type = prov_of_item(item_type)
    postfix = f'/registry/{prov_type.name.lower()}/{item_type.name.lower()}/list'
    endpoint = registry_api + postfix
    return requests.get(endpoint, auth=auth())

Demonstration

This demonstration is for registering elements that will enable provenance traces

Register a model (CoCoNet)

models = [{
    "display_name": "ADRIA",
    "name": "ADRIA",
    "description": "Coral Community Network",
    "documentation_url": "https://github.com/gbrrestoration/CoCoNet-model/blob/main/README.md",
    "source_url": "https://gbrrestoration.org/"
    }]
auth = token_manager.get_auth
responses = [register_item(model, ItemType.MODEL, auth) for model in models]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]

Register Models

  • ADRIA
  • eReefs
  • RECOM
  • Reefmod
  • CoCoNet
  • IPMF

Return to Top

models = [{
    "display_name": "ADRIA",
    "name": "ADRIA",
    "description": "Yet to be published",
    "documentation_url": "https://github.com/gbrrestoration/",
    "source_url": "https://gbrrestoration.org/"
    },{
    "display_name": "eReefs",
    "name": "eReefs",
    "description": "https://research.csiro.au/ereefs/summary/",
    "documentation_url": "https://gbrrestoration.org/wp-content/uploads/2020/09/T14-Environmental-Modelling-of-Large-Scale-SRM_v3.03-3.pdf#page=17",
    "source_url": "https://github.com/csiro-coasts/EMS/"
    },{
    "display_name": "RECOM",
    "name": "RECOM",
    "description": "The Relocatable Coastal Model (RECOM) is designed for non-expert modellers to generate high resolution models over limited area coastal or reef domains within the GBR, and produces hydrodynamic, sediment transport, wave and biogeochemical outputs",
    "documentation_url": "https://research.csiro.au/ereefs/models/models-about/recom/ ",
    "source_url": "https://research.csiro.au/ereefs/models/models-about/recom/ "
    },{
    "display_name": "Reefmod",
    "name": "Reefmod",
    "description": "Modelling fine-scale ecological processes",
    "documentation_url": "https://github.com/ymbozec/REEFMOD.6.8_GBR/blob/main/README.md",
    "source_url": "https://github.com/ymbozec/REEFMOD.6.8_GBR"
    },{
    "display_name": "CoCoNet",
    "name": "CoCoNet",
    "description": "Coral Community Network -  Great Barrier Reef-scale community model.",
    "documentation_url": "https://gbrrestoration.org/wp-content/uploads/2020/09/T6-Modelling-Methods-and-Findings_26April_FINAL3.pdf#page=23",
    "source_url": "https://github.com/gbrrestoration/CoCoNet-model"
    },{
    "display_name": "IPMF",
    "name": "IPMF",
    "description": "IPMF private repo at the moment",
    "documentation_url": "https://github.com/open-AIMS/IPMF/blob/main/README.md",
    "source_url": "https://github.com/open-AIMS/IPMF/"
    }]
auth = token_manager.get_auth
responses = [register_item(model, ItemType.MODEL, auth) for model in models]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]
auth = token_manager.get_auth
result = registry_list(ItemType.MODEL, auth)
print(json.dumps(result.json(), indent=2))

Register Workflow Definition

Return to Top

workflows = [{
  "display_name": "string",
  "version": "string",
  "software": "string",
  "automation_schedule": {},
  "input_templates": [
    "string"
  ],
  "output_templates": [
    "string"
  ]
}]

auth = token_manager.get_auth
responses = [register_item(wf, ItemType.MODEL_RUN_WORKFLOW, auth) for wf in workflows]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]
Token validation failed due to error: Signature has expired.
Refreshing using refresh token

{
  "status": {
    "success": true,
    "details": "Successfully uploaded the complete item. Return item includes handle id."
  },
  "created_item": {
    "display_name": "string",
    "version": "string",
    "software": "string",
    "automation_schedule": {},
    "input_templates": [
      "string"
    ],
    "output_templates": [
      "string"
    ],
    "id": "10378.1/1691171",
    "created_timestamp": 1662358946,
    "updated_timestamp": 1662358946,
    "item_category": "ENTITY",
    "item_subtype": "MODEL_RUN_WORKFLOW_DEFINITION",
    "record_type": "COMPLETE_ITEM"
  }
}

Register Modeller/Person/People

Return to Top

people = [{
    "display_name": "Andrew Freebairn",
    "first_name": "Andrew",
    "last_name": "Freebairn",
    "email": "andrew.freebairn@csiro.au ",
    "orcid": "https://orcid.org/0000-0001-9429-6559"
    }]
auth = token_manager.get_auth
responses = [register_item(person, ItemType.PERSON, auth) for person in people]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]
people = [{
    "display_name": "Andrew Freebairn",
    "first_name": "Andrew",
    "last_name": "Freebairn",
    "email": "andrew.freebairn@csiro.au ",
    "orcid": "https://orcid.org/0000-0001-9429-6559"
    },{
    "display_name": "Ross Petridis",
    "first_name": "Ross",
    "last_name": "Petridis",
    "email": "ross.petridis@csiro.au",
    "orcid": "https://orcid.org/"
    },{
    "display_name": "Peter Baker",
    "first_name": "Peter",
    "last_name": "Baker",
    "email": "peter.baker122@csiro.au",
    "orcid": "https://orcid.org/"
    },{
    "display_name": "Jonathan Yu",
    "first_name": "Jonathan",
    "last_name": "Yu",
    "email": "jonathan.yu@csiro.au",
    "orcid": "https://orcid.org/"
    }]
auth = token_manager.get_auth
responses = [register_item(person, ItemType.PERSON, auth) for person in people]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]
auth = token_manager.get_auth
result = registry_list(ItemType.PERSON, auth)
print(json.dumps(result.json(), indent=2))

Register Organisation/s

Return to Top

organisations = [{
    "display_name": "Commonwealth Scientific and Industrial Research Organisation",
    "name": "Commonwealth Scientific and Industrial Research Organisation (CSIRO)",
    "ror": "https://ror.org/03qn8fb07"
    }]
auth = token_manager.get_auth
responses = [register_item(org, ItemType.ORGANISATION, auth) for org in organisations]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]
organisations = [{
    "display_name": "Commonwealth Scientific and Industrial Research Organisation",
    "name": "Commonwealth Scientific and Industrial Research Organisation (CSIRO)",
    "ror": "https://ror.org/03qn8fb07"
    },{
    "display_name": "The Great Barrier Reef Foundation",
    "name": "The Great Barrier Reef Foundation (GBRF)",
    "ror": "https://ror.org/00d4phf77"
    },{
    "display_name": "Australian Institute of Marine Science",
    "name": "Australian Institute of Marine Science (AIMS)",
    "ror": " https://ror.org/03x57gn41"
    },{
    "display_name": "Queensland University of Technology",
    "name": "Queensland University of Technology (QUT)",
    "ror": "https://ror.org/03pnv4752"
    },{
    "display_name": "James Cook University",
    "name": "James Cook University (JCU)",
    "ror": "https://ror.org/03pnv4752"
    },{
    "display_name": "The University of Queensland",
    "name": "The University of Queensland (UQ)",
    "ror": "https://ror.org/00rqy9422"
    },{
    "display_name": "Southern Cross University",
    "name": "Southern Cross University (SCU)",
    "ror": "https://ror.org/001xkv632"
    }]
auth = token_manager.get_auth
responses = [register_item(org, ItemType.ORGANISATION, auth) for org in organisations]
vars = [print(json.dumps(result.json(), indent=2)) for result in responses]
auth = token_manager.get_auth
result = registry_list(ItemType.ORGANISATION, auth)
print(json.dumps(result.json(), indent=2))
def registry_update(payload, id, item_type, auth):
    prov_type = prov_of_item(item_type)
    postfix = f'/registry/{prov_type.name.lower()}/{item_type.name.lower()}/update'
    endpoint = registry_api + postfix + f'?id={id}'
    return requests.put(endpoint, json=payload, auth=auth())

Please use the Update function if something has already been registered

Each registry type (Model Run, Organisation, Person, Model, etc) has the ability to be updated

e.g. See Organisation update api documentation

Below is an example of identifying an existing registerd item and then updating it.

auth = token_manager.get_auth
result = registry_list(ItemType.ORGANISATION, auth)
result_json = result.json()['items']
# Then find one with a particular name
name = 'CSIRO'
found_org = [org for org in result_json if name in org['name']]
# We might find multiple records with this name so we might want to update all
# We define the new details
payload = {
"display_name": "Commonwealth Scientific and Industrial Research Organisation",
"name": "Commonwealth Scientific and Industrial Research Organisation (CSIRO)",
"ror": "https://ror.org/03qn8fb07"
}
#Then we call the correct endpoint with each id and the new payload
results = [registry_update(payload, org['id'], ItemType.ORGANISATION, auth) for org in found_org]
print(results)
#Note all