About

This notebook is a demonstration of integrating the RRAP-IS and submitting a model run.

Run all imports

Keep all your imports at the top of a notebook. It allows for easier management.

%%capture
import requests
import os
import sys
import json
from json2html import *
from bs4 import BeautifulSoup
from IPython.display import IFrame, display, HTML, JSON, Markdown, Image
from mdsisclienttools.auth.TokenManager import DeviceFlowManager
import mdsisclienttools.datastore.ReadWriteHelper as IOHelper
from urllib.error import HTTPError
import networkx as nx
import nx_altair as nxa
from networkx.readwrite import json_graph

import numpy as np
import pandas as pd

import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
defaults = dict(width=800, height=600)
hv.opts.defaults(opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))

import warnings
warnings.filterwarnings(action='once')

Define global variables

Similar to import we like to define notebook variable at the top and reuse them throughout the notebook

data_store = "https://data.testing.rrap-is.com"
data_api = "https://data-api.testing.rrap-is.com"
registry_api = "https://registry-api.testing.rrap-is.com"
prov_api = "https://prov-api.testing.rrap-is.com"
auth_server = "https://auth.dev.rrap-is.com/auth/realms/rrap"
# garbage = "https://frogs.are.green"
base_urls = {'data_api': data_api, 'registry_api': registry_api, 'prov_api': prov_api, 'auth_server': auth_server, 'data_store': data_store}#, 'garbage': garbage}
display(f'Checking base urls')

for key, url in base_urls.items():
    try:
        print(f'Testing - {url}', end="")
        r = requests.get(url)
        r.raise_for_status()
        print(f' - Passed')
    except requests.exceptions.HTTPError as err:
        print(f' - Fail')
        raise SystemExit(err)
    except requests.exceptions.RequestException as e:
        # catastrophic error. bail.
        print(f' - Fail')
        raise SystemExit(e)
'Checking base urls'
Testing - https://data-api.testing.rrap-is.com - Passed
Testing - https://registry-api.testing.rrap-is.com - Passed
Testing - https://prov-api.testing.rrap-is.com - Passed
Testing - https://auth.dev.rrap-is.com/auth/realms/rrap - Passed
Testing - https://data.testing.rrap-is.com - Passed

Authentication

Setup tokens using device authorisation flow against keycloak server

This could result in a browser window being opened if you don't have valid tokens cached in local storage.

Return to Top

local_token_storage = ".tokens.json"

token_manager = DeviceFlowManager(
    stage="TEST",
    keycloak_endpoint=auth_server,
    local_storage_location=local_token_storage
)
Attempting to generate authorisation tokens.

Looking for existing tokens in local storage.

Validating found tokens

Trying to use found tokens to refresh the access token.

Token refresh successful.

Endpoint Documentation

Endpoint documentation can be found by appending either /docs or /redoc on the end a base URL.

For example:

Then select from the menu an endpoint function call e.g. /register/mint-dataset

Then append the function call onto the base url e.g. https://data-api.testing.rrap-is.com/register/mint-dataset

Return to Top

Notebook helper functions

Return to Top

from enum import Enum
from enum_switch import Switch

def wrap_html_table(data):
    soup = BeautifulSoup(data)

    ul_tag = soup.find("table")
    div_tag = soup.new_tag("div")
    div_tag['style'] = "width: auto; height: 400px; overflow-y: auto; "
    ul_tag.wrap(div_tag)
    new_tag = soup.new_tag("details")
    div_tag.wrap(new_tag)
    
    tag = soup.new_tag("summary")
    tag.string = "Results"
    soup.div.insert_after(tag)

    return soup.prettify()
    
def json_to_md(response_json):
        json_obj_in_html = json2html.convert( response_json  )
        return wrap_html_table(json_obj_in_html)

def handle_request_return_raw(method, url, params=None, payload=None, auth=None):
    try:
        if params:
            response = requests.request(method, url=url, params=params, auth=auth)
        elif payload:
            response = requests.request(method, url=url, json=payload, auth=auth)
        else:
            response = requests.request(method, url=url, auth=auth)
        # If the response was successful, no Exception will be raised
        response.raise_for_status()

    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')  # Python 3.6
        return {"error": http_err}
    except Exception as err:
        print(f'Other error occurred: {err}')  # Python 3.6
        return {"error": err }
    else:
        return response
        
def handle_request(method, url, params=None, payload=None, auth=None, parse_json=True):
    try:
        if params:
            response = requests.request(method, url=url, params=params, auth=auth)
        elif payload:
            response = requests.request(method, url=url, json=payload, auth=auth)
        else:
            response = requests.request(method, url=url, auth=auth)
        # If the response was successful, no Exception will be raised
        response.raise_for_status()

    except HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')  # Python 3.6
        return {"error": http_err}
    except Exception as err:
        print(f'Other error occurred: {err}')  # Python 3.6
        return {"error": err }
    if parse_json:
        return response.json()
    else:
        return response
        
class ProvType(Enum):
    AGENT = 1
    ACTIVITY = 2
    ENTITY = 3

class ItemType(Enum):
    MODEL = 1
    PERSON = 2
    ORGANISATION = 3
    MODELRUN = 4
    MODEL_RUN_WORKFLOW = 5

class ProvTypeFromItemType(Switch):
    def MODEL(self):
        return ProvType.ENTITY

    def PERSON(self):
        return ProvType.AGENT    

    def ORGANISATION(self):
        return ProvType.AGENT    

    def MODELRUN(self):
        return ProvType.ACTIVITY

    def MODEL_RUN_WORKFLOW(self):
        return ProvType.ENTITY

prov_of_item = ProvTypeFromItemType(ItemType)
provs = [print(prov_of_item(t)) for t in ItemType]

def register_item(payload, item_type, auth):
    prov_type = prov_of_item(item_type)
    postfix = f'/registry/{prov_type.name.lower()}/{item_type.name.lower()}/create'
    endpoint = registry_api + postfix 
    return handle_request("POST", endpoint, None, payload, auth=auth())

def registry_list(item_type, auth):
    prov_type = prov_of_item(item_type)
    postfix = f'/registry/{prov_type.name.lower()}/{item_type.name.lower()}/list'
    endpoint = registry_api + postfix
    return handle_request("GET", endpoint, None, None, auth=auth())

def registry_fetch(params, item_type, auth):
    prov_type = prov_of_item(item_type)
    postfix = f'/registry/{prov_type.name.lower()}/{item_type.name.lower()}/fetch'
    endpoint = registry_api + postfix
    return handle_request("GET", endpoint, params, None, auth=auth())
ProvType.ENTITY
ProvType.AGENT
ProvType.AGENT
ProvType.ACTIVITY
ProvType.ENTITY

Demonstration

This demonstration illustrates how the RRAP-IS system can be used to upload and register model outputs and discover provenance information (what data was used for a particular model run and what are the associated outputs).

For this demonstration it is assumed that the model has been executed, both input and output datasets have been registered, the model has been registered and all associated templates have been registered. The use of a model run parameter file is used to define these registered items (capturing the handle id values) and is loaded as input for the model run registration.

For the demonstration a fictitious model is used, the data is actual RRAP data and the provenance information is only derived from this exercise

Return to Top

Get all template IDs

auth = token_manager.get_auth
postfix = "/registry/entity/model_run_workflow/list"
endpoint = registry_api + postfix 
response_json = handle_request("GET", endpoint, None, None, auth())
HTML(json_to_md(response_json))
status
success True
details Successfully listed items.
items
display_name software_id software_version automation_schedule input_templates output_templates annotations id created_timestamp updated_timestamp item_category item_subtype record_type
(Simulator) Model run workflow template 10378.1/1693340 10378.1/1693318 vfake1.2 None
template_id optional
10378.1/1693319 None
template_id optional
10378.1/1693319 None
None 10378.1/1693340 1667174524 1667174524 ENTITY MODEL_RUN_WORKFLOW_TEMPLATE COMPLETE_ITEM
(Testing) IPMF example generated workflow template workflow template id (10378.1/1693488) 10378.1/1693485 1.2.3 None
template_id optional
10378.1/1693477 None
10378.1/1693479 None
10378.1/1693481 None
template_id optional
10378.1/1693483 None
required
  • ipmf_model_run_identifier
  • RCP
  • Purpose
  • Intervention
  • Year_Start
  • Year_End
  • init_cover
  • settle_prob
  • fts
  • max.juvis_m2
  • counterfactual
optional
  • Fogging_reducer
  • Fogging_sites
  • Fogging_start
10378.1/1693488 1667259860 1667259860 ENTITY MODEL_RUN_WORKFLOW_TEMPLATE COMPLETE_ITEM
(Simulator) Model run workflow template 10378.1/1693313 10378.1/1693292 vfake1.2 None
template_id optional
10378.1/1693301 None
template_id optional
10378.1/1693297 None
None 10378.1/1693313 1667174321 1667174321 ENTITY MODEL_RUN_WORKFLOW_TEMPLATE COMPLETE_ITEM
seed_items
unparsable_items
total_item_count 3
complete_item_count 3
seed_item_count 0
unparsable_item_count 0
Results

Get Template CSV for selected template ID

auth = token_manager.get_auth
workflow_template_id = "10378.1/1693488"
postfix = "/bulk/generate_template/csv"
param = f'workflow_template_id={workflow_template_id}'
endpoint = prov_api + postfix 
response = handle_request("GET", endpoint, param, None, auth(), False)
workflow_table = pd.DataFrame([x.split(',') for x in response.text.split('\n')])

workflow_table.to_csv('Model_Workflow.csv')
auth = token_manager.get_auth

files = {'csv_file': open('Model_Workflow_v1.csv')}

postfix = "/bulk/convert_model_runs/csv"
endpoint = prov_api + postfix 
response = requests.request('POST', url=endpoint, params=None, files=files, auth=auth())
response.json()
# response = handle_request("POST", endpoint, None, None, auth(), False)
/tmp/ipykernel_3832/1726935669.py:4: ResourceWarning: unclosed file <_io.TextIOWrapper name='Model_Workflow_v1.csv' mode='r' encoding='UTF-8'>
  files = {'csv_file': open('Model_Workflow.csv')}
{'detail': 'None of the provided headers matched the expected workflow template flag to identify the workflow template.'}