About

This notebook is a demonstration of generating a record of a model run and searching the provenance database to identify linkages between data, models, users and organisations with the RRAP-IS system.

Run all imports

Keep all your imports at the top of a notebook. It allows for easier management.

%%capture
import requests
import os
import sys
import json
from json2html import *
from bs4 import BeautifulSoup
from IPython.display import IFrame, display, HTML, JSON, Markdown, Image
from mdsisclienttools.auth.TokenManager import DeviceFlowManager
from urllib.error import HTTPError
import networkx as nx
import nx_altair as nxa
from networkx.readwrite import json_graph

import numpy as np
import pandas as pd
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
defaults = dict(width=800, height=600)
hv.opts.defaults(opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))

import warnings
warnings.filterwarnings(action='once')

Define global variables

Similar to import we like to define notebook variable at the top and reuse them throughout the notebook

data_api = "https://data-api.testing.rrap-is.com"
registry_api = "https://registry-api.testing.rrap-is.com"
prov_api = "https://prov-api.testing.rrap-is.com"
auth_server = "https://auth.dev.rrap-is.com/auth/realms/rrap"
# garbage = "https://frogs.are.green"
base_urls = {'data_api': data_api, 'registry_api': registry_api, 'prov_api': prov_api, 'auth_server': auth_server}#, 'garbage': garbage}
display(f'Checking base urls')

for key, url in base_urls.items():
    try:
        print(f'Testing - {url}', end="")
        r = requests.get(url)
        r.raise_for_status()
        print(f' - Passed')
    except requests.exceptions.HTTPError as err:
        print(f' - Fail')
        raise SystemExit()
    except requests.exceptions.RequestException as e:
        # catastrophic error. bail.
        print(f' - Fail')
        raise SystemExit()
'Checking base urls'
Testing - https://data-api.testing.rrap-is.com - Passed
Testing - https://registry-api.testing.rrap-is.com - Passed
Testing - https://prov-api.testing.rrap-is.com - Passed
Testing - https://auth.dev.rrap-is.com/auth/realms/rrap - Passed

Authentication

Setup tokens using device authorisation flow against keycloak server

This could result in a browser window being opened if you don't have valid tokens cached in local storage.

Return to Top

local_token_storage = ".tokens.json"

token_manager = DeviceFlowManager(
    stage="TEST",
    keycloak_endpoint=auth_server,
    local_storage_location=local_token_storage
)

Helper functions

Return to Top

def wrap_html_table(data):
    soup = BeautifulSoup(data)

    ul_tag = soup.find("table")
    div_tag = soup.new_tag("div")
    div_tag['style'] = "width: auto; height: 400px; overflow-y: auto; "
    ul_tag.wrap(div_tag)
    new_tag = soup.new_tag("details")
    div_tag.wrap(new_tag)
    
    tag = soup.new_tag("summary")
    tag.string = "Results"
    soup.div.insert_after(tag)

    return soup.prettify()
    
def json_to_md(response_json):
        json_obj_in_html = json2html.convert( response_json  )
        return wrap_html_table(json_obj_in_html)
    
def handle_request(method, url, params=None, payload=None, auth=None):
    try:
        if params:
            response = requests.request(method, url=url, params=params, auth=auth)
        elif payload:
            response = requests.request(method, url=url, json=payload, auth=auth)
        else:
            response = requests.request(method, url=url, auth=auth)
        # If the response was successful, no Exception will be raised
        response.raise_for_status()

    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')  # Python 3.6
        return {"error": http_err}
    except Exception as err:
        print(f'Other error occurred: {err}')  # Python 3.6
        return {"error": err }
    else:
        return response.json()

Endpoint Documentation

Endpoint documentation can be found by appending either /docs or /redoc on the end a base URL.

For example:

Return to Top

Demonstration

Register a model run

A model run consists of ten main elements where many have already been registered in the RRAP-IS Registry.

These elements are:

  • Start Time
  • End Time
  • Workflow definition*
  • Input dataset*
  • Input template*
  • Output dataset*
  • Output template*
  • Modeller* and
  • Requesting Organisation*

*pre-registered elements.

For examples of registering elements please see RRAP-IS Register blog post or RRAP-IS Register Query blog post

see Endpoint documentation

auth = token_manager.get_auth
postfix = "/model_run/register_complete"
payload = {
  "start_time": 0,
  "end_time": 1662467929,
  "workflow_definition": {
    "id": "10378.1/1691197"
  },
  "inputs": {
    "datasets": {
      "10378.1/1690478": {
        "template": {
          "id": "10378.1/1690478"
        },
        "dataset_type": "DATA_STORE",
        "dataset": {
          "id": "10378.1/1688622"
        }
      }
    }
  },
  "outputs": {
    "datasets": {
      "10378.1/1690478": {
        "template": {
          "id": "10378.1/1690478"
        },
        "dataset_type": "DATA_STORE",
        "dataset": {
          "id": "10378.1/1688634"
        }
      }
    }
  },
  "associations": {
    "modeller": {
      "id": "10378.1/1691160"
    },
    "requesting_organisation": {
      "id": "10378.1/1690557"
    }
  }
}
endpoint = prov_api + postfix 

response_json = handle_request('POST', endpoint, None, payload, auth())
HTML(json_to_md(response_json))
status
success True
details All successful.
record_info
id 10378.1/1691345
prov_json {"prefix": {"default": "http://hdl.handle.net/"}, "activity": {"10378.1/1691345": {"model_run/10378.1/1691345": true, "item_category": "ACTIVITY", "item_subtype": "MODEL_RUN"}}, "entity": {"10378.1/1688622": {"model_run/10378.1/1691345": true, "item_category": "ENTITY", "item_subtype": "DATASET"}, "10378.1/1688634": {"model_run/10378.1/1691345": true, "item_category": "ENTITY", "item_subtype": "DATASET"}, "10378.1/1691197": {"model_run/10378.1/1691345": true, "item_category": "ENTITY", "item_subtype": "MODEL_RUN_WORKFLOW_DEFINITION", "prov:type": {"$": "prov:Collection", "type": "prov:QUALIFIED_NAME"}}, "10378.1/1690478": {"model_run/10378.1/1691345": true, "item_category": "ENTITY", "item_subtype": "DATASET_TEMPLATE"}, "10378.1/1691116": {"model_run/10378.1/1691345": true, "item_category": "ENTITY", "item_subtype": "MODEL"}}, "agent": {"10378.1/1691160": {"model_run/10378.1/1691345": true, "item_category": "AGENT", "item_subtype": "PERSON"}, "10378.1/1690557": {"model_run/10378.1/1691345": true, "item_category": "AGENT", "item_subtype": "ORGANISATION"}}, "used": {"_:id1": {"prov:activity": "10378.1/1691345", "prov:entity": "10378.1/1688622"}, "_:id3": {"prov:activity": "10378.1/1691345", "prov:entity": "10378.1/1691116"}, "_:id4": {"prov:activity": "10378.1/1691345", "prov:entity": "10378.1/1691197"}}, "wasGeneratedBy": {"_:id2": {"prov:entity": "10378.1/1688634", "prov:activity": "10378.1/1691345"}}, "wasAssociatedWith": {"_:id5": {"prov:activity": "10378.1/1691345", "prov:agent": "10378.1/1691160"}, "_:id6": {"prov:activity": "10378.1/1691345", "prov:agent": "10378.1/1690557"}}, "wasAttributedTo": {"_:id7": {"prov:entity": "10378.1/1688634", "prov:agent": "10378.1/1691160"}}, "hadMember": {"_:id8": {"prov:collection": "10378.1/1691197", "prov:entity": "10378.1/1690478"}, "_:id11": {"prov:collection": "10378.1/1691197", "prov:entity": "10378.1/1691116"}}, "wasInfluencedBy": {"_:id9": {"prov:influencee": "10378.1/1688622", "prov:influencer": "10378.1/1690478"}, "_:id10": {"prov:influencee": "10378.1/1688634", "prov:influencer": "10378.1/1690478"}}}
record
workflow_definition
id 10378.1/1691197
inputs
datasets
10378.1/1690478
template
id 10378.1/1690478
dataset_type DATA_STORE
dataset
id 10378.1/1688622
outputs
datasets
10378.1/1690478
template
id 10378.1/1690478
dataset_type DATA_STORE
dataset
id 10378.1/1688634
associations
modeller
id 10378.1/1691160
requesting_organisation
id 10378.1/1690557
start_time 0
end_time 1662467929
Results

Exploring linkages across RRAP-IS data, models, modellers and organisation

Simple exploration functions include:

  • Searching upstream (option to only looks so far) from a point and
  • Searching downstream (option to only looks so far) from a point

Explore Upstream Provenance

Lets explore back upstream from the generated output from the above completed model run. We will limit it to a depth of one, so only the directly linked elements

Return to Top

auth = token_manager.get_auth
postfix = "/explore/upstream"
params = {
    "starting_id": "10378.1/1688634",
    "depth": 1
}

endpoint = prov_api + postfix

response_json = handle_request('GET', endpoint, params, None, auth())
result_graph = response_json["graph"]

networkx_graph = json_graph.node_link_graph(result_graph)
im = hv.Graph.from_networkx(networkx_graph, nx.layout.circular_layout).opts(tools=['hover','tap'],
                                                                          node_size=10,
                                                                          node_color='item_category',
                                                                          cmap = ['blue','orange', 'green', 'red'],
                                                                          directed=True, 
                                                                          arrowhead_length=0.02,
                                                                          bgcolor='pink')
labels = hv.Labels(im.nodes, ['x', 'y'], 'item_category').opts(opts.Labels(text_font_size='12pt', text_color='blue', xoffset=0, yoffset=0.05, bgcolor='white'))
labels_2 = hv.Labels(im.nodes, ['x', 'y'], 'item_subtype').opts(opts.Labels(text_font_size='8pt', xoffset=0, yoffset=-0.05, bgcolor='white'))
hv_graph = (im * labels * labels_2)

hv.save(hv_graph, 'lineage_network.html', backend='bokeh')

HTML('lineage_network.html')
<!DOCTYPE html> lineage_network

Explore Downstream Provence

Lets explore forward downstream from the generated model definition defined above. We will limit it to a depth of one, so only the directly linked elements

Return to Top

auth = token_manager.get_auth
postfix = "/explore/downstream"
params = {
    "starting_id": "10378.1/1691197",
    "depth": 1
}
endpoint = prov_api + postfix 

response_json = handle_request('GET', endpoint, params, None, auth())
result_graph = response_json["graph"]

networkx_graph = json_graph.node_link_graph(result_graph)
im = hv.Graph.from_networkx(networkx_graph, nx.layout.fruchterman_reingold_layout).opts(tools=['hover','tap'],
                                                                          node_size=10,
                                                                          node_color='item_category',
                                                                          cmap = ['blue','orange', 'green', 'red'],
                                                                          directed=True, 
                                                                          arrowhead_length=0.02,
                                                                          bgcolor='pink')
labels = hv.Labels(im.nodes, ['x', 'y'], 'item_category').opts(opts.Labels(text_font_size='12pt', text_color='blue', xoffset=0, yoffset=0.05, bgcolor='white'))
labels_2 = hv.Labels(im.nodes, ['x', 'y'], 'item_subtype').opts(opts.Labels(text_font_size='8pt', xoffset=0, yoffset=-0.05, bgcolor='white'))
hv_graph = (im * labels * labels_2)

hv.save(hv_graph, 'network.html', backend='bokeh')

HTML("network.html")
<!DOCTYPE html> network