Schritt 6: JupyterHub einrichten

In diesem Abschnitt lernen Sie, wie Sie einen JupyterHub einrichten. Dieser JupyterHub wird auf dem Manager Node erstellt und dann auf die Worker Nodes verteilt. Stellen Sie sich vor, ein Student loggt sich in eine Web-Anwendung ein und möchte direkt mit dem Programmieren beginnen. Der Manager Node startet dann einen Docker Container auf einem Worker Node, sodass der Student die Rechenressourcen dort nutzen kann, um seine Programme auszuführen.

Directory erstellen

Erstellen Sie ein Directory für JupyterHub auf dem Manager Node. Dieses Directory wird später auch für die Web-Anwendung verwendet.

mkdir -p /opt/WebApp
cd /opt/WebApp

Konfiguration Jupyterhub

Erstellen Sie eine YAML Datei für JupyterHub.

nano jupyterhub_config.py

Fügen Sie den folgenden Code in die Datei ein.


import fcntl
import subprocess
import json
import base64
import sys
import os
from jupyterhub import orm
from dockerspawner import SwarmSpawner, DockerSpawner
from traitlets import Unicode, List, validate, TraitError
from tornado import web
from oauthenticator.generic import GenericOAuthenticator
from oauthenticator.oauth2 import OAuthLoginHandler, OAuthenticator, OAuthLogoutHandler
import shutil

def _serialize_state(state):
    """Serialize OAuth state to a base64 string after passing through JSON"""
    json_state = json.dumps(state)
    return base64.urlsafe_b64encode(json_state.encode("utf8")).decode("ascii")

class ShibbolethClerkLoginHandler(OAuthLoginHandler):

    def _get_user_data_from_request(self):
        """Get shibboleth attributes (user data) from request headers."""
        user_data = {'shibboleth':False}
        value_list = [self.request.headers.get(header, "") for header in self.authenticator.headers]
        self.log.info("User data debug: %s", (value_list) )
        if value_list[0]:
                user_data['jh_name'] = 'shibboleth-' + value_list[0]
                user_data['shibboleth'] = True
        self.log.info("User data debug: %s", user_data)

        return user_data

    async def get(self):
        """Get user data and log user in."""
        self.statsd.incr('login.request')
        user_data = self._get_user_data_from_request()

        if user_data['shibboleth']:
            user = await self.login_user(user_data)
            if user is None:
            raise web.HTTPError(403)
            else:
                self.redirect(self.get_next_url(user))

        else:
            redirect_uri = self.authenticator.get_callback_url(self)
            token_params = self.authenticator.extra_authorize_params.copy()
            self.log.info(f"OAuth redirect: {redirect_uri}")

            state_id = self._generate_state_id()
            next_url = self._get_next_url()

            cookie_state = _serialize_state({"state_id": state_id, "next_url": next_url})
            self.set_state_cookie(cookie_state)

            authorize_state = _serialize_state({"state_id": state_id})
            token_params["state"] = authorize_state

            self.authorize_redirect(
                redirect_uri=redirect_uri,
                client_id=self.authenticator.client_id,
                scope=self.authenticator.scope,
                extra_params=token_params,
                response_type="code",
            )

class ShibbolethClerkLogoutHandler(OAuthLogoutHandler):
    """Log a user out from JupyterHub by clearing their login cookie
    and then redirect to shibboleth logout url to clear shibboleth cookie."""

    async def get(self):
        user = await self.get_current_user()

        self.log.info("User logged out: %s", user.name)
        await self.default_handle_logout()
        await self.handle_logout()
        self._jupyterhub_user = None
        #if user.name.startswith('shibboleth'):
        #    self.redirect(self.authenticator.shibboleth_logout_url)
        #else:
        self.redirect("https://databrix.dhbw-stuttgart.de")

class ShibbolethClerkAuthenticator(OAuthenticator):

    #manage_groups = True

    headers = List(
        default_value=['mail'],
        config=True,
        help="""List of HTTP headers to get user data. First item is used as unique user name."""
    )
    shibboleth_logout_url = Unicode(
        default_value='',
        config=True,
        help="""Url to logout from shibboleth SP.""")

    login_handler = ShibbolethClerkLoginHandler
    logout_handler = ShibbolethClerkLogoutHandler


    @validate('headers')
    def _valid_headers(self, proposal):
        if not proposal['value']:
            raise TraitError('Headers should contain at least 1 item.')
        return proposal['value']

    def login_url(self, base_url):
        return url_path_join(base_url, "login")

    async def authenticate(self, handler, data):


        try:
            check = data['shibboleth']
            username = data['jh_name']
            user_data = {
                    'name': username,
                    'auth_state': data,
                    }

            return user_data

        except:
            access_token_params = self.build_access_tokens_request_params(handler, data)
            token_info = await self.get_token_info(handler, access_token_params)
            user_info = await self.token_to_user(token_info)

            username = self.user_info_to_username(user_info)
            username = self.normalize_username(username)

            refresh_token = token_info.get("refresh_token", None)
            if self.enable_auth_state and not refresh_token:
                self.log.debug(
                    "Refresh token was empty, will try to pull refresh_token from previous auth_state"
                )
                refresh_token = await self.get_prev_refresh_token(handler, username)
                if refresh_token:
                    token_info["refresh_token"] = refresh_token

            auth_model = {
                "name": 'clerk-' + username,
                "admin": True if username in self.admin_users else None,
                "auth_state": self.build_auth_state_dict(token_info, user_info),
                "groups" : [user_info.get('public_metadata')]
            }

            return await self.update_auth_model(auth_model)

    def get_handlers(self, app):
        return [ (r'/oauth_callback',self.callback_handler),
                 (r'/logout',self.logout_handler),
                 (r'/login', self.login_handler),
               ]

"""
--------------create collaborative workspaces and define the roles of group members-------------------------------------
"""

scope = []
gruppe_scope = []

# Create scopes for groups 1-20
for i in range(1, 21):
    space_name = f'group_{i}'

    # Add necessary permissions for each group
    scope.extend([
        f"servers!user={space_name}",
        f"access:servers!user={space_name}",
        f"read:users!user={space_name}",
        f"users:activity!user={space_name}",
        f"list:users!user={space_name}",
    ])

# Add general user permissions
scope.extend([
    "read:users",
    "list:users!user",
    "read:groups",  # Add general group read permission
    "list:groups"   # Add general group list permission
])

# Remove duplicates
scope = list(set(scope))

"""
---------------------------add idle culler service and user role----------------------------------------------------
"""

c.JupyterHub.load_roles=[
    {
        "name": "user",
        "scopes": [
            "read:users!user",
            "read:groups",
            "access:services!service=forum",
            "access:services!service=idle_culler_warning",
            ] + scope
    },
    {
        "name": "jupyterhub-idle-culler-role",
        "scopes": [
            "list:users",
            "read:users:activity",
            "read:servers",
            "delete:servers",
            "admin:users", # if using --cull-users
        ],
        "services": ["jupyterhub-idle-culler-service"],
    },

    {
        "name": "dhbw-grader",
        "scopes": [
            "access:servers!user=grader",
            "servers!user=grader",
            "read:users!user=grader",
            "users:activity!user=grader",
            "list:users!user=grader",
        ],
        "groups":['dozent'],
        "users":["grader"]
    },
]

webapp_users = [f'group_{i}' for i in range(1,21)]

c.JupyterHub.load_roles.append(    {
        "name": "webapp-user",
        "scopes": [
            "read:users!user",
            "read:groups",
            "access:services!service=forum",
            "access:services!service=idle_culler_warning",
            ] + scope,
        "users":webapp_users
})

c.JupyterHub.services = [
    {
        "name": "jupyterhub-idle-culler-service",
        "command": [
            sys.executable,
            "-m", "jupyterhub_idle_culler",
            "--timeout=300",
        ],
         "admin": True,
    },

    {
        'name': 'forum',
        'url': f'http://0.0.0.0:8010',
        'command': [
            sys.executable,
            "-m",
            "forum_service",
            '--port',
            "8010",
        ],
    },


    {
        'name': 'idle_culler_warning',
        'url': f'http://0.0.0.0:8011',
        'command': [
            sys.executable,
            "-m",
            "idle_culler",
            '--port',
            "8011",
        ],
    }

]

"""
---------------------------------mount nfs file and create presistant volume for users-----------------------------
"""
def pre_spawn_hook(spawner):
    username = spawner.user.name  # get the username
    volume_path = os.path.join('/srv/ngshare/data_kurs_app/user_directories', username)

    mounts = [
              {'type': 'bind',
               'source':  os.path.join('/opt/mount/data_kurs_app/user_directories', username),
               'target': '/home/jovyan/work'},
              ]

    spawner.extra_container_spec = {'mounts': mounts}


# attach the hook function to the spawner
c.Spawner.pre_spawn_hook =  pre_spawn_hook
c.Spawner.debug = True
c.DockerSpawner.notebook_dir = '/home/jovyan/work'

"""
-----------------------------------Jupyterhub spawner setting----------------------------------------------------------
"""
c.SwarmSpawner.cmd = ["jupyterhub-singleuser", "--allow-root"]
c.SwarmSpawmer.use_internal_ip = True

c.SwarmSpawner.network_name = os.environ["DOCKER_NETWORK_NAME"]
c.SwarmSpawner.extra_host_config = { 'network_mode': os.environ["DOCKER_NETWORK_NAME"] }
c.SwarmSpawner.start_timeout = 500
c.Spawner.http_timeout = 60
c.Spawner.cpu_guarantee = 4
c.Spawner.mem_guarantee = "15G"
c.SwarmSpawner.remove = False
c.Spawner.extra_placement_spec = { "constraints": ["node.role == worker"] }
c.ConfigurableHTTPProxy.debug = True
c.Spawner.oauth_client_allowed_scopes = ["read:groups","read:users","read:roles","access:services!service=forum","access:services!service=idle_culler_warning"]

class MySwarmSpawner(SwarmSpawner):

    def options_form(self, spawner):
        return ""  # Return an empty string to disable the form
    def start(self):
        self.image = "guyq1997/jupyterlab-databrix:latest"

        return super().start()

c.JupyterHub.spawner_class = MySwarmSpawner

#c.JupyterHub.base_url = "/jupyterhub"
c.JupyterHub.hub_ip = "0.0.0.0"
c.JupyterHub.bind_url = 'http://0.0.0.0:8000/jupyterhub'

"""
-----------------------------Setiing and configure shibboleth&clerk Authenticator-----------------------------------
"""
c.Authenticator.allow_all = True
c.JupyterHub.shutdown_on_logout = True
c.JupyterHub.authenticator_class = ShibbolethClerkAuthenticator
c.ShibbolethClerkAuthenticator.allow_all = True
c.ShibbolethClerkAuthenticator.client_id = 'FWOKsO13UUY9WXWV'
#c.ShibbolethClerkAuthenticator.client_secret = 'sk_test_Eb65VOWxy4U3jOd7bTsV5xJrEUkleaPS7WNwV05RFS'
c.ShibbolethClerkAuthenticator.oauth_callback_url = 'https://1985609f-7839-4819-8840-2d38548e4ea5.ma.bw-cloud-instance.org/jupyterhub/hub/oauth_callback'
c.ShibbolethClerkAuthenticator.authorize_url = "https://tough-lemming-0.clerk.accounts.dev/oauth/authorize"
c.ShibbolethClerkAuthenticator.token_url = "https://tough-lemming-0.clerk.accounts.dev/oauth/token"
c.ShibbolethClerkAuthenticator.userdata_url = "https://tough-lemming-0.clerk.accounts.dev/oauth/userinfo"
c.ShibbolethClerkAuthenticator.scope = ["email", "profile", "public_metadata"]
c.ShibbolethClerkAuthenticator.username_claim = "email"
c.ShibbolethClerkAuthenticator.shibboleth_logout_url = "https://1985609f-7839-4819-8840-2d38548e4ea5.ma.bw-cloud-instance.org/Shibboleth.sso/Logout"
c.JupyterHub.admin_access = True
c.Authenticator.admin_users = {'clerk-yuqiang.gu@dhbw-stuttgart.de'}

# Persist hub data on volume mounted inside container
data_dir = os.environ.get('DATA_VOLUME_CONTAINER', '/data')
c.JupyterHub.cookie_secret_file = os.path.join(data_dir,'jupyterhub_cookie_secret')

c.JupyterHub.db_url = 'postgresql://postgres:{password}@{host}/{db}'.format(
    host=os.environ['POSTGRES_HOST'],
    password=os.environ['POSTGRES_PASSWORD'],
    db=os.environ['POSTGRES_DB'],
)
c.Authenticator.delete_invalid_users = True
c.JupyterHub.last_activity_interval = 120
c.JupyterHub.tornado_settings = {"cookie_options": {"expires_days": 0.17}}

Referecnces

  1. Library für ShibbolethAuthenticator
  2. Konfiguration für jupyter-collaboration extension
  3. Anleitung von Docker Spawner für Jupyterhub
  4. Problembehebung von Persistent Volume mit DockerSpawner
  5. Cut idle Service in Jupyterhub