diff --git a/.gitignore b/.gitignore index 986f39ae1..aa73ebb11 100644 --- a/.gitignore +++ b/.gitignore @@ -82,3 +82,4 @@ secrets.yaml # faker official.csv fact.png +jupyterhub/.env-dev diff --git a/README.md b/README.md index e779590d0..425aa2dc4 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,9 @@ There is a few other documentation links available on the [website](https://clow ## Installation The easiest way of running Clowder v2 is checking out the [code](https://github.com/clowder-framework/clowder2) -and running `docker compose up` in the main directory. +and running `docker compose up` in the main directory. If you would like to run Clowder with JupyterHub, +you can use our script `docker-prod.sh` to start the services. Run `./docker-prod.sh prod up` to start the services +and `./docker-prod.sh prod down` to stop them. Helm charts are available for running Clowder v2 on Kubernetes. See the [helm](https://github.com/clowder-framework/clowder2/tree/main/deployments/kubernetes/charts) directory for more information. diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 92df388f8..61dcc8bf0 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -67,6 +67,8 @@ services: postgres: image: postgres + networks: + - clowder2 volumes: - postgres_data:/var/lib/postgresql/data environment: @@ -79,6 +81,8 @@ services: volumes: - ./scripts/keycloak/clowder-realm-dev.json:/opt/keycloak/data/import/realm.json:ro - ./scripts/keycloak/clowder-theme/:/opt/keycloak/themes/clowder-theme/:ro + networks: + - clowder2 command: - start-dev - --http-relative-path /keycloak @@ -172,6 +176,7 @@ services: networks: clowder2: + name: clowder2 ## By default this config uses default local driver, ## For custom volumes replace with volume driver configuration. diff --git a/docker-compose.jupyter-dev.yml b/docker-compose.jupyter-dev.yml new file mode 100644 index 000000000..bea03e898 --- /dev/null +++ b/docker-compose.jupyter-dev.yml @@ -0,0 +1,30 @@ +version: '3' +services: + jupyterhub: + build: + context: jupyterhub + dockerfile: Dockerfile.jupyterhub + args: + JUPYTERHUB_VERSION: 4 + restart: always + networks: + - clowder2 + volumes: + # The JupyterHub configuration file + - ./jupyterhub/jupyterhub_dev_config.py:/srv/jupyterhub/jupyterhub_config.py:ro + # Bind Docker socket on the hostso we can connect to the daemon from + # within the container + - /var/run/docker.sock:/var/run/docker.sock:rw + # Bind Docker volume on host for JupyterHub database and cookie secrets + - jupyterhub-data:/data + ports: + - "8765:8000" + env_file: + - jupyterhub/.env-dev + command: jupyterhub -f /srv/jupyterhub/jupyterhub_config.py + + depends_on: + - keycloak + +volumes: + jupyterhub-data: diff --git a/docker-compose.jupyter.yml b/docker-compose.jupyter.yml new file mode 100644 index 000000000..b8f02659c --- /dev/null +++ b/docker-compose.jupyter.yml @@ -0,0 +1,33 @@ +version: '3' +services: + jupyterhub: + build: + context: jupyterhub + dockerfile: Dockerfile.jupyterhub + args: + JUPYTERHUB_VERSION: 4 + restart: always + networks: + - clowder2 + volumes: + # The JupyterHub configuration file + - ./jupyterhub/jupyterhub_config.py:/srv/jupyterhub/jupyterhub_config.py:ro + # Bind Docker socket on the hostso we can connect to the daemon from + # within the container + - /var/run/docker.sock:/var/run/docker.sock:rw + # Bind Docker volume on host for JupyterHub database and cookie secrets + - jupyterhub_data:/data + env_file: + - jupyterhub/.env + labels: + - "traefik.enable=true" + - "traefik.http.routers.jupyterhub.rule=PathPrefix(`/jupyterhub`)" + - "traefik.http.services.jupyterhub.loadbalancer.server.port=8000" + + command: jupyterhub -f /srv/jupyterhub/jupyterhub_config.py + + depends_on: + - keycloak + +volumes: + jupyterhub_data: diff --git a/docker-dev.sh b/docker-dev.sh index 44f50afab..eb775df60 100755 --- a/docker-dev.sh +++ b/docker-dev.sh @@ -7,3 +7,12 @@ if [ "$1" = "down" ] then docker-compose -f docker-compose.dev.yml -p clowder2-dev down fi +if [ "$1" = "jupyter" ] && [ "$2" = "up" ] +then + docker-compose -f docker-compose.dev.yml -f docker-compose.jupyter-dev.yml -p clowder2-dev up -d --build +fi + +if [ "$1" = "jupyter" ] && [ "$2" = "down" ] +then + docker-compose -f docker-compose.dev.yml -f docker-compose.jupyter-dev.yml -p clowder2-dev down +fi diff --git a/docker-prod.sh b/docker-prod.sh new file mode 100755 index 000000000..e661a9a31 --- /dev/null +++ b/docker-prod.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env sh +if [ "$1" = "prod" ] && [ "$2" = "up" ] +then + docker-compose -f docker-compose.yml -f docker-compose.jupyter.yml up -d +fi + +if [ "$1" = "prod" ] && [ "$2" = "down" ] +then + docker-compose -f docker-compose.yml -f docker-compose.jupyter.yml down +fi diff --git a/docs/docs/devs/getstarted.md b/docs/docs/devs/getstarted.md index bbf8f1633..6eb3b72aa 100644 --- a/docs/docs/devs/getstarted.md +++ b/docs/docs/devs/getstarted.md @@ -47,6 +47,8 @@ section below). - Running `docker-compose logs -f` displays the live logs for all containers. To view the logs of individual containers, provide the container name. For example, for viewing the backend module logs, run `docker-compose logs -f backend`. - Running `./docker-dev.sh down` brings down the required services. +- If you want to run the jupyterhub, you can run `./docker-dev.sh jupyter up`. The jupyterhub will be available at + `http://localhost:8765`. You can bring it down using `./docker-dev.sh jupyter down`. **Note:** `./docker-dev.sh` sets the project name flag to `-p clowder2-dev`. This is so that the dev containers don't get mixed with the production containers if the user is running both on the same machine using `docker-compose.yml`. diff --git a/frontend/package.json b/frontend/package.json index 0c9f543b4..2681aa035 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -12,7 +12,7 @@ "start-message": "babel-node tools/startMessage.js", "prestart": "npm-run-all --parallel start-message", "start": "npm-run-all --parallel open:src", - "start:dev": "export CLOWDER_REMOTE_HOSTNAME=http://localhost:8000 && npm run start", + "start:dev": "export CLOWDER_REMOTE_HOSTNAME=http://localhost:8000 && export JUPYTERHUB_URL=http://localhost:8765 && npm run start", "open:src": "babel-node tools/srcServer.js", "open:dist": "babel-node tools/distServer.js", "lint:watch": "npm run lint --watch", diff --git a/frontend/src/app.config.ts b/frontend/src/app.config.ts index 3b05b1825..fc33bdb6d 100644 --- a/frontend/src/app.config.ts +++ b/frontend/src/app.config.ts @@ -9,6 +9,7 @@ interface Config { hostname: string; apikey: string; GHIssueBaseURL: string; + jupyterHubURL: string; KeycloakBaseURL: string; KeycloakLogin: string; KeycloakLogout: string; @@ -65,6 +66,10 @@ config["KeycloakRegister"] = `${config.KeycloakBaseURL}/register`; config["searchEndpoint"] = `${hostname}/api/v2/elasticsearch`; config["publicSearchEndpoint"] = `${hostname}/api/v2/public_elasticsearch`; +// jupterhub +const localJupyterhubURL: string = `${config.hostname}/jupyterhub`; +config["jupyterHubURL"] = process.env.JUPYTERHUB_URL || localJupyterhubURL; + // refresh token time interval config["refreshTokenInterval"] = 1000 * 60; // 1 minute // updated extractor logs diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx index e50d81d7d..fea4e64b6 100644 --- a/frontend/src/components/Layout.tsx +++ b/frontend/src/components/Layout.tsx @@ -23,6 +23,7 @@ import { RootState } from "../types/data"; import { AddBox, Explore } from "@material-ui/icons"; import HistoryIcon from "@mui/icons-material/History"; import GroupIcon from "@mui/icons-material/Group"; +import MenuBookIcon from "@mui/icons-material/MenuBook"; import Gravatar from "react-gravatar"; import PersonIcon from "@mui/icons-material/Person"; import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; @@ -41,6 +42,8 @@ import AdminPanelSettingsIcon from "@mui/icons-material/AdminPanelSettings"; import { Footer } from "./navigation/Footer"; import BuildIcon from "@mui/icons-material/Build"; +import config from "../app.config"; + const drawerWidth = 240; const Main = styled("main", { shouldForwardProp: (prop) => prop !== "open" })<{ @@ -439,6 +442,22 @@ export default function PersistentDrawerLeft(props) { + {/*TODO: Need to make link dynamic */} + + + + + + + + + + diff --git a/frontend/webpack.config.dev.js b/frontend/webpack.config.dev.js index 688ca512b..6511e871f 100644 --- a/frontend/webpack.config.dev.js +++ b/frontend/webpack.config.dev.js @@ -8,6 +8,9 @@ import ESLintPlugin from "eslint-webpack-plugin"; console.log( `the current CLOWDER_REMOTE_HOSTNAME environment variable is ${process.env.CLOWDER_REMOTE_HOSTNAME}` ); +console.log( + `the JupyterHub URL is set to ${process.env.JUPYTERHUB_URL}` +) export default { mode: "development", @@ -40,6 +43,7 @@ export default { CLOWDER_REMOTE_HOSTNAME: JSON.stringify( process.env.CLOWDER_REMOTE_HOSTNAME ), + JUPYTERHUB_URL: JSON.stringify(process.env.JUPYTERHUB_URL), APIKEY: JSON.stringify(process.env.APIKEY), KeycloakBaseURL: JSON.stringify(process.env.KeycloakBaseURL), }, diff --git a/frontend/webpack.config.prod.js b/frontend/webpack.config.prod.js index 8102de89c..67ccf827b 100644 --- a/frontend/webpack.config.prod.js +++ b/frontend/webpack.config.prod.js @@ -11,6 +11,9 @@ import TerserPlugin from "terser-webpack-plugin"; console.log( `the current CLOWDER_REMOTE_HOSTNAME environment variable is ${process.env.CLOWDER_REMOTE_HOSTNAME}` ); +console.log( + `the JupyterHub URL is set to ${process.env.JUPYTERHUB_URL}` +) export default { mode: "production", @@ -47,6 +50,7 @@ export default { CLOWDER_REMOTE_HOSTNAME: JSON.stringify( process.env.CLOWDER_REMOTE_HOSTNAME ), + JUPYTERHUB_URL: JSON.stringify(process.env.JUPYTERHUB_URL), APIKEY: JSON.stringify(process.env.APIKEY), KeycloakBaseURL: JSON.stringify(process.env.KeycloakBaseURL), }, diff --git a/jupyterhub/.env-example b/jupyterhub/.env-example new file mode 100644 index 000000000..949459b1f --- /dev/null +++ b/jupyterhub/.env-example @@ -0,0 +1,14 @@ +# Example configuration file for Clowder JupyterHub +KEYCLOAK_HOSTNAME="keycloak:8080/keycloak" +# Development mode use the following line instead +#KEYCLOAK_HOSTNAME="keycloak:8080/keycloak" +KEYCLOAK_AUDIENCE="clowder" +KEYCLOAK_REALM="clowder" +JUPYTERHUB_ADMIN="admin" +#Change network name to the one created by docker-compose +DOCKER_NETWORK_NAME="clowder2_clowder2" +DOCKER_NOTEBOOK_IMAGE="quay.io/jupyter/base-notebook:latest" +DOCKER_NOTEBOOK_DIR="/home/jovyan/work" +JUPYTERHUB_CRYPT_KEY="" +CLOWDER_URL="localhost" +PROD_DEPLOYMENT="false" diff --git a/jupyterhub/Clowder_APIs.ipynb b/jupyterhub/Clowder_APIs.ipynb new file mode 100644 index 000000000..14beefba8 --- /dev/null +++ b/jupyterhub/Clowder_APIs.ipynb @@ -0,0 +1,193 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6dfcb963-cf69-4505-9871-5cc13471f5dd", + "metadata": {}, + "source": [ + "## Clowder APIs" + ] + }, + { + "cell_type": "markdown", + "id": "310051ad-4262-42fc-ac28-911f92842a7e", + "metadata": {}, + "source": [ + "## Import libraries and setup utility function\n", + "\n", + "We start by importing the rquired libraries, data and setting up some utility functions and variables that we will use below." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "ffec7cb7-1a82-4148-aad1-bb3b1b19915b", + "metadata": {}, + "outputs": [], + "source": [ + "import pyclowder\n", + "import json\n", + "import os\n", + "import pandas as pd\n", + "\n", + "import requests\n", + "\n", + "# Function to download the IRIS dataset\n", + "def download_iris_dataset():\n", + " # URL for the Iris dataset hosted by UCI Machine Learning Repository\n", + " url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\"\n", + " \n", + " response = requests.get(url)\n", + " \n", + " if response.status_code == 200:\n", + " return response.content\n", + " else:\n", + " print(\"Failed to download the dataset. Status code:\", response.status_code)\n", + "\n", + "\n", + "CLOWDER_URL = \"http://localhost:8000\"" + ] + }, + { + "cell_type": "markdown", + "id": "f8d5c0ae-659b-4a52-a30a-0fa8b7051694", + "metadata": {}, + "source": [ + "## Token Generation" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "59fca5f8-a5d6-419d-835a-023a73c5a1d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'token': 'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICI2dUVlQ0xOc1hTQXZUN1VDek1FRVk2VmI4ajJnY1RhWlFESUpnbnFGSHVJIn0.eyJleHAiOjE3MTk5MzAzNTMsImlhdCI6MTcxOTkzMDA1MywianRpIjoiYjZhYTAwNjUtZmY1OS00M2QyLWJiOGYtNzliNGI0MTgwNzNmIiwiaXNzIjoiaHR0cDovL2xvY2FsaG9zdDo4MDgwL2tleWNsb2FrL3JlYWxtcy9jbG93ZGVyIiwic3ViIjoiZjg0Y2JjNmQtYzEzZC00MmVmLWFhN2MtMWQ4MmFjYzVhZWViIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoiY2xvd2RlcjItYmFja2VuZCIsInNlc3Npb25fc3RhdGUiOiJlZDM0MzA3Ny1lZmNiLTRlOGMtYjM4OS0zN2JlMzk0MmUyNzAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cDovL2xvY2FsaG9zdDo4MDAwIl0sInJlYWxtX2FjY2VzcyI6eyJyb2xlcyI6WyJkZWZhdWx0LXJvbGVzLWNsb3dkZXIiLCJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJzY29wZSI6Im9wZW5pZCBwcm9maWxlIGVtYWlsIiwic2lkIjoiZWQzNDMwNzctZWZjYi00ZThjLWIzODktMzdiZTM5NDJlMjcwIiwiZW1haWxfdmVyaWZpZWQiOnRydWUsIm5hbWUiOiJWaXNtYXlhayBNb2hhbmFyYWphbiIsInByZWZlcnJlZF91c2VybmFtZSI6Im1vaGFuYXIyQGlsbGlub2lzLmVkdSIsImdpdmVuX25hbWUiOiJWaXNtYXlhayIsImZhbWlseV9uYW1lIjoiTW9oYW5hcmFqYW4iLCJlbWFpbCI6Im1vaGFuYXIyQGlsbGlub2lzLmVkdSJ9.NAZs_sDtIdmy02bIjILG59jeRK99bMPdODQqEkyn-jC0YP_949LAPhWMSvxG5-eVdhHOeGILlbPmWgzU9tuw4YOkn-mNdVIBh16EicMJJ6zXeYzVj5RuUfhtZYJ3LfoknjuPBABI44dqo-Ixqh760m6HQKyZUfW62Lg-nGLYTvfoGtlResziGZ7u4L6vdsmmr_05SKGWdXxpVQUzwBNiXI2SO3FbWNk2uwR_qXL8WHyajHKCXvgHYHUbnAKJ1SRKO0xNfYDDlGKXuEn4fZfVmICq9693Z8emwydlcp8BVOaVuFqbsWwoRmEoSnK4bzLAH-CM-PFESPfbTrnu0Scnnw'}\n" + ] + } + ], + "source": [ + "user_login_json = {\n", + " \"email\": \"mohanar2@illinois.edu\",\n", + " \"password\": \"password\"\n", + "}\n", + "login_url = CLOWDER_URL + \"/api/v2/login\"\n", + "response = requests.post(login_url, json = user_login_json)\n", + "token = response.json()[\"token\"]\n", + "headers = {\n", + " 'Authorization': f'Bearer {token}'\n", + "}\n", + "print(response.json())" + ] + }, + { + "cell_type": "markdown", + "id": "2b273e10-8efc-46a8-b220-b63354735c49", + "metadata": {}, + "source": [ + "## Creating Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "91639e11-b553-4b57-a0df-6e91c661662f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'Flower Dataset', 'description': 'Dataset for Flower Data', 'status': 'PRIVATE', 'id': '66840cc55713c91cd9b89483', 'creator': {'email': 'mohanar2@illinois.edu', 'first_name': 'Vismayak', 'last_name': 'Mohanarajan', 'id': '663a45d5b75ca83d17ac6564', 'admin': True, 'admin_mode': True, 'read_only_user': False}, 'created': '2024-07-02T14:20:53.503934', 'modified': '2024-07-02T14:20:53.503942', 'user_views': 0, 'downloads': 0, 'thumbnail_id': None, 'standard_license': True, 'license_id': 'CC BY'}\n" + ] + } + ], + "source": [ + "dataset_json = {\n", + " \"name\": \"Flower Dataset\",\n", + " \"description\": \"Dataset for Flower Data\",\n", + " \"status\": \"PRIVATE\",\n", + "}\n", + "dataset_params = {\n", + " \"license_id\": \"CC BY\"\n", + "}\n", + "\n", + "\n", + "dataset_url = CLOWDER_URL + \"/api/v2/datasets\"\n", + "response = requests.post(dataset_url, json = dataset_json, headers = headers, params = dataset_params)\n", + "dataset_id = response.json()['id']\n", + "print(response.json())" + ] + }, + { + "cell_type": "markdown", + "id": "51ad8ad2-bcb6-40d4-9587-44f6a1a7c09d", + "metadata": {}, + "source": [ + "## Uploading File" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "ed02e036-edd0-403b-8292-9600946bc156", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'file', 'status': 'PRIVATE', 'id': '66840ced5713c91cd9b89485', 'creator': {'email': 'mohanar2@illinois.edu', 'first_name': 'Vismayak', 'last_name': 'Mohanarajan', 'id': '663a45d5b75ca83d17ac6564', 'admin': True, 'admin_mode': True, 'read_only_user': False}, 'created': '2024-07-02T14:21:33.733027', 'version_id': '2a4bbb53-8149-4384-9d55-2696c9d587ed', 'version_num': 1, 'dataset_id': '66840cc55713c91cd9b89483', 'folder_id': None, 'views': 0, 'downloads': 0, 'bytes': 4551, 'content_type': {'content_type': 'application/octet-stream', 'main_type': 'application'}, 'thumbnail_id': None, 'storage_type': 'minio', 'storage_path': None, 'object_type': 'file'}\n" + ] + } + ], + "source": [ + "\n", + "file = download_iris_dataset()\n", + "file_json = {\n", + " \"file\": file,\n", + " \"mediaType\": 'multipart/form-data'\n", + "}\n", + "save_file_url = CLOWDER_URL + \"/api/v2/datasets/\" + dataset_id + '/files'\n", + "headers = {\n", + " 'Authorization': f'Bearer {token}'\n", + "}\n", + "response = requests.post(save_file_url, files = file_json, headers = headers)\n", + "print(response.json())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08c6673d-70d2-4879-948f-8db76eda4cde", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/jupyterhub/Dockerfile.jupyterhub b/jupyterhub/Dockerfile.jupyterhub new file mode 100644 index 000000000..349782ad7 --- /dev/null +++ b/jupyterhub/Dockerfile.jupyterhub @@ -0,0 +1,14 @@ +ARG JUPYTERHUB_VERSION +FROM quay.io/jupyterhub/jupyterhub:$JUPYTERHUB_VERSION + +# Install dockerspawner, +# hadolint ignore=DL3013 +RUN python3 -m pip install --no-cache-dir \ + dockerspawner + +# Install custom authenticator +WORKDIR /tmp/authenticator/ +COPY authenticator /tmp/authenticator/ +RUN pip3 install /tmp/authenticator + +CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"] diff --git a/jupyterhub/Dockerfile.jupyterlab b/jupyterhub/Dockerfile.jupyterlab new file mode 100644 index 000000000..dd9c2e4f4 --- /dev/null +++ b/jupyterhub/Dockerfile.jupyterlab @@ -0,0 +1,23 @@ +# Base Image +FROM quay.io/jupyter/base-notebook:latest + +# Install additional packages +USER root +RUN apt-get -qq update && apt-get install -y --no-install-recommends \ + curl \ + git \ + zip unzip \ + nano \ + vim-tiny \ + lsof && \ + rm -rf /var/lib/apt/lists/* + +USER $NB_USER + +COPY Clowder_APIs.ipynb /home/jovyan/work/ + +# Install Python packages +RUN pip install --no-cache-dir \ + requests \ + pyclowder \ + pandas diff --git a/jupyterhub/authenticator/customauthenticator/__init__.py b/jupyterhub/authenticator/customauthenticator/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/jupyterhub/authenticator/customauthenticator/custom.py b/jupyterhub/authenticator/customauthenticator/custom.py new file mode 100644 index 000000000..7dfb05962 --- /dev/null +++ b/jupyterhub/authenticator/customauthenticator/custom.py @@ -0,0 +1,225 @@ +import json +import os +import urllib.parse + +from jose import jwt +from jose.exceptions import ExpiredSignatureError, JWTClaimsError, JWTError +from tornado import web +from traitlets import Unicode + +from jupyterhub.auth import Authenticator +from jupyterhub.handlers import LoginHandler, LogoutHandler + + +class CustomTokenAuthenticator(Authenticator): + """ + Accept the authenticated Access Token from cookie. + """ + + auth_cookie_header = Unicode( + os.environ.get("AUTH_COOKIE_HEADER", ""), + config=True, + help="the cookie header we put in browser to retrieve token", + ) + + auth_username_key = Unicode( + os.environ.get("AUTH_USERNAME_KEY", ""), + config=True, + help="the key to retreive username from the json", + ) + + landing_page_login_url = Unicode( + os.environ.get("LANDING_PAGE_LOGIN_URL", ""), + config=True, + help="the landing page login entry", + ) + + landing_page_logout_url = Unicode( + os.environ.get("LANDING_PAGE_LOGOUT", ""), + config=True, + help="the landing page logout entry", + ) + + keycloak_url = Unicode( + os.environ.get("KEYCLOAK_URL", ""), + config=True, + help="the URL where keycloak is installed", + ) + + keycloak_audience = Unicode( + os.environ.get("KEYCLOAK_AUDIENCE", ""), + config=True, + help="the audience for keycloak to check", + ) + + keycloak_pem_key = Unicode( + os.environ.get("KEYCLOAK_PEM_KEY", ""), + config=True, + help="the RSA pem key with proper header and footer (deprecated)", + ) + + space_service_url = Unicode( + os.environ.get("SPACE_SERVICE_URL", ""), + config=True, + help="the internal space service url", + ) + + quotas = None + + def get_handlers(self, app): + return [ + (r"/", LoginHandler), + (r"/user", LoginHandler), + (r"/lab", LoginHandler), + (r"/login", LoginHandler), + (r"/logout", CustomTokenLogoutHandler), + ] + + def get_keycloak_pem(self): + if not self.keycloak_url: + raise web.HTTPError( + 500, log_message="JupyterHub is not correctly configured." + ) + + # fetch the key + response = urllib.request.urlopen(self.keycloak_url) + if response.code >= 200 or response <= 299: + encoding = response.info().get_content_charset("utf-8") + result = json.loads(response.read().decode(encoding)) + self.keycloak_pem_key = ( + f"-----BEGIN PUBLIC KEY-----\n" + f"{result['public_key']}\n" + f"-----END PUBLIC KEY-----" + ) + else: + raise web.HTTPError(500, log_message="Could not get key from keycloak.") + + def check_jwt_token(self, access_token): + # make sure we have the pem cert + if not self.keycloak_pem_key: + self.get_keycloak_pem() + + # make sure audience is set + if not self.keycloak_audience: + raise web.HTTPError( + 403, log_message="JupyterHub is not correctly configured." + ) + + # no token in the cookie + if not access_token: + raise web.HTTPError(401, log_message="Please login to access Clowder.") + + # make sure it is a valid token + if len(access_token.split(" ")) != 2 or access_token.split(" ")[0] != "Bearer": + raise web.HTTPError( + 403, log_message="Token format not valid, it has to be bearer xxxx!" + ) + + # decode jwt token instead of sending it to userinfo endpoint: + access_token = access_token.split(" ")[1] + public_key = self.keycloak_pem_key + audience = self.keycloak_audience + try: + resp_json = jwt.decode(access_token, public_key, audience=audience) + except ExpiredSignatureError: + raise web.HTTPError( + 403, + log_message="JWT Expired Signature Error: token signature has expired", + ) + except JWTClaimsError: + raise web.HTTPError( + 403, log_message="JWT Claims Error: token signature is invalid" + ) + except JWTError: + raise web.HTTPError( + 403, log_message="JWT Error: token signature is invalid" + ) + except Exception: + raise web.HTTPError(403, log_message="Not a valid jwt token!") + + # make sure we know username + if self.auth_username_key not in resp_json.keys(): + raise web.HTTPError( + 500, + log_message=f"Required field {self.auth_username_key} does not exist in jwt token", + ) + username = resp_json[self.auth_username_key] + + self.log.info(f"username={username}") + return {"name": username} + + async def authenticate(self, handler, data): + self.log.info("Authenticate") + try: + access_token = urllib.parse.unquote( + handler.get_cookie(self.auth_cookie_header, "") + ) + if not access_token: + raise web.HTTPError(401, log_message="Please login to access Clowder.") + + # check token and authorization + user = self.check_jwt_token(access_token) + return user + except web.HTTPError as e: + if e.log_message: + error_msg = urllib.parse.quote(e.log_message.encode("utf-8")) + else: + error_msg = ( + urllib.parse.quote(f"Error {e}".encode("utf-8")) + + ". Please login to access Clowder." + ) + handler.redirect(f"{self.landing_page_login_url}?error={error_msg}") + + # async def pre_spawn_start(self, user, spawner): + # auth_state = await user.get_auth_state() + # if not auth_state: + # self.log.error("No auth state") + # return + # + # spawner.environment['NB_USER'] = user.name + # spawner.environment['NB_UID'] = str(auth_state['uid']) + # + # quota = self.find_quota(user, auth_state) + # if "cpu" in quota: + # spawner.cpu_guarantee = quota["cpu"][0] + # spawner.cpu_limit = quota["cpu"][1] + # else: + # spawner.cpu_guarantee = 1 + # spawner.cpu_limit = 2 + # if "mem" in quota: + # spawner.mem_guarantee = f"{quota['mem'][0]}G" + # spawner.mem_limit = f"{quota['mem'][1]}G" + # else: + # spawner.mem_guarantee = "2G" + # spawner.mem_limit = "4G" + + +# +# # This is called from the jupyterlab so there is no cookies that this depends on +# async def refresh_user(self, user, handler): +# self.log.info("Refresh User") +# try: +# access_token = urllib.parse.unquote(handler.get_cookie(self.auth_cookie_header, "")) +# # if no token present +# if not access_token: +# return False +# +# # if token present, check token and authorization +# if self.check_jwt_token(access_token): +# True +# return False +# except: +# self.log.exception("Error in refresh user") +# return False + + +class CustomTokenLogoutHandler(LogoutHandler): + async def handle_logout(self): + # remove clowder token on logout + self.log.info("Remove clowder token on logout") + self.log.info( + "You have logged out of Clowder system from Clowder . Please login again if you want to use " + "Clowder components." + ) + self.set_cookie(self.authenticator.auth_cookie_header, "") + self.redirect(f"{self.authenticator.landing_page_logout_url}") diff --git a/jupyterhub/authenticator/setup.py b/jupyterhub/authenticator/setup.py new file mode 100644 index 000000000..0314f36d2 --- /dev/null +++ b/jupyterhub/authenticator/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup + +setup( + name="customauthenticator", + version="0.8.0", + description="Custom Authenticator for JupyterHub", + author="cwang138", + author_email="cwang138@illinois.edu", + license="MPL 2.0", + packages=["customauthenticator"], + install_requires=["jupyterhub", "pyjwt", "requests", "python-jose"], +) diff --git a/jupyterhub/authenticator/test_jwt.py b/jupyterhub/authenticator/test_jwt.py new file mode 100644 index 000000000..e86f6bc96 --- /dev/null +++ b/jupyterhub/authenticator/test_jwt.py @@ -0,0 +1,41 @@ +import json +import urllib.request + +from jose import jwt +from jose.exceptions import ExpiredSignatureError, JWTClaimsError, JWTError + +response = urllib.request.urlopen("") + +if response.code >= 200 or response <= 299: + encoding = response.info().get_content_charset("utf-8") + result = json.loads(response.read().decode(encoding)) + public_key = ( + f"-----BEGIN PUBLIC KEY-----\n" + f"{result['public_key']}\n" + f"-----END PUBLIC KEY-----" + ) +else: + print("Could not get key from keycloak.") + + +access_token = "" + +# make sure it is a valid token +if len(access_token.split(" ")) != 2 or access_token.split(" ")[0] != "Bearer": + print("Token format not valid, it has to be bearer xxxx!") + +# decode jwt token instead of sending it to userinfo endpoint: +access_token = access_token.split(" ")[1] + +try: + decoded = jwt.decode(access_token, public_key, audience="clowder") + print(decoded) + +except ExpiredSignatureError: + print("JWT Expired Signature Error: token signature has expired") +except JWTClaimsError: + print("JWT Claims Error: token signature is invalid") +except JWTError: + print("JWT Error: token signature is invalid") +except Exception: + print("Not a valid jwt token!") diff --git a/jupyterhub/jupyterhub_config.py b/jupyterhub/jupyterhub_config.py new file mode 100644 index 000000000..5f0b19b99 --- /dev/null +++ b/jupyterhub/jupyterhub_config.py @@ -0,0 +1,112 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + + +# Configuration file for JupyterHub +import os + +from customauthenticator.custom import CustomTokenAuthenticator + +c = get_config() # noqa: F821 + +# We rely on environment variables to configure JupyterHub so that we +# avoid having to rebuild the JupyterHub container every time we change a +# configuration parameter. + +# Base URL of the Hub +c.JupyterHub.base_url = "/jupyterhub" + + +# Important proxy settings to work with Traefik +c.JupyterHub.proxy_class = "jupyterhub.proxy.ConfigurableHTTPProxy" +c.ConfigurableHTTPProxy.command = ["configurable-http-proxy"] + + +# Spawn single-user servers as Docker containers +c.JupyterHub.spawner_class = "dockerspawner.DockerSpawner" + +# Spawn containers from this image +c.DockerSpawner.image = os.environ["DOCKER_NOTEBOOK_IMAGE"] + +# Connect containers to this Docker network +network_name = os.environ["DOCKER_NETWORK_NAME"] +c.DockerSpawner.use_internal_ip = True +c.DockerSpawner.network_name = network_name + +# Explicitly set notebook directory because we'll be mounting a volume to it. +# Most `jupyter/docker-stacks` *-notebook images run the Notebook server as +# user `jovyan`, and set the notebook directory to `/home/jovyan/work`. +# We follow the same convention. +notebook_dir = os.environ.get("DOCKER_NOTEBOOK_DIR", "/home/jovyan/work") +c.DockerSpawner.notebook_dir = notebook_dir +c.Spawner.args = ["--NotebookApp.default_url=/notebooks/Welcome.ipynb"] + +# Mount the real user's Docker volume on the host to the notebook user's +# notebook directory in the container +c.DockerSpawner.volumes = {"jupyterhub-user-{username}": notebook_dir} + +# Remove containers once they are stopped +c.DockerSpawner.remove = True + +# For debugging arguments passed to spawned containers +c.DockerSpawner.debug = True + +# User containers will access hub by container name on the Docker network +c.JupyterHub.hub_ip = "jupyterhub" +c.JupyterHub.hub_port = 8080 + +# Persist hub data on volume mounted inside container +# c.JupyterHub.cookie_secret_file = "/data/jupyterhub_cookie_secret" +c.JupyterHub.db_url = "sqlite:////data/jupyterhub.sqlite" + +# # Authenticate users with Native Authenticator +# c.JupyterHub.authenticator_class = "nativeauthenticator.NativeAuthenticator" +# +# # Allow anyone to sign-up without approval +# c.NativeAuthenticator.open_signup = True + +# Authenticate with Custom Token Authenticator +c.Spawner.cmd = ["start.sh", "jupyterhub-singleuser", "--allow-root"] +c.KubeSpawner.args = ["--allow-root"] +c.JupyterHub.authenticator_class = CustomTokenAuthenticator +# TODO:Change this keycloak_url as required + +c.CustomTokenAuthenticator.auth_cookie_header = "Authorization" +c.CustomTokenAuthenticator.auth_username_key = "preferred_username" +c.CustomTokenAuthenticator.auth_uid_number_key = "uid_number" +c.CustomTokenAuthenticator.enable_auth_state = True +c.CustomTokenAuthenticator.auto_login = True + +if os.getenv("PROD_DEPLOYMENT") == "true": + c.CustomTokenAuthenticator.keycloak_url = "https://%s/realms/%s/" % ( + os.getenv("KEYCLOAK_HOSTNAME"), + os.getenv("KEYCLOAK_REALM"), + ) + c.CustomTokenAuthenticator.landing_page_login_url = "https://" + os.getenv( + "KEYCLOAK_HOSTNAME" + ) + c.CustomTokenAuthenticator.landing_page_logout_url = ( + "https://" + os.getenv("CLOWDER_URL") + "/auth/logout" + ) + +else: + c.CustomTokenAuthenticator.keycloak_url = "http://%s/realms/%s/" % ( + os.getenv("KEYCLOAK_HOSTNAME"), + os.getenv("KEYCLOAK_REALM"), + ) + c.CustomTokenAuthenticator.landing_page_login_url = "http://" + os.getenv( + "KEYCLOAK_HOSTNAME" + ) + c.CustomTokenAuthenticator.landing_page_logout_url = ( + "http://" + os.getenv("CLOWDER_URL") + "/auth/logout" + ) + +c.JupyterHub.cookie_secret = os.getenv("JUPYTERHUB_CRYPT_KEY") + +# Allow all users to access +c.Authenticator.allow_all = True + +# Allowed admins +admin = os.environ.get("JUPYTERHUB_ADMIN") +if admin: + c.Authenticator.admin_users = [admin] diff --git a/jupyterhub/jupyterhub_dev_config.py b/jupyterhub/jupyterhub_dev_config.py new file mode 100644 index 000000000..136f80962 --- /dev/null +++ b/jupyterhub/jupyterhub_dev_config.py @@ -0,0 +1,98 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. + +# Configuration file for JupyterHub +import os + +from customauthenticator.custom import CustomTokenAuthenticator + +c = get_config() # noqa: F821 + +# We rely on environment variables to configure JupyterHub so that we +# avoid having to rebuild the JupyterHub container every time we change a +# configuration parameter. + +# Spawn single-user servers as Docker containers +c.JupyterHub.spawner_class = "dockerspawner.DockerSpawner" + +# Spawn containers from this image +c.DockerSpawner.image = os.environ["DOCKER_NOTEBOOK_IMAGE"] + +# Connect containers to this Docker network +network_name = os.environ["DOCKER_NETWORK_NAME"] +c.DockerSpawner.use_internal_ip = True +c.DockerSpawner.network_name = network_name + +# Explicitly set notebook directory because we'll be mounting a volume to it. +# Most `jupyter/docker-stacks` *-notebook images run the Notebook server as +# user `jovyan`, and set the notebook directory to `/home/jovyan/work`. +# We follow the same convention. +notebook_dir = os.environ.get("DOCKER_NOTEBOOK_DIR", "/home/jovyan/work") +c.DockerSpawner.notebook_dir = notebook_dir + +# Mount the real user's Docker volume on the host to the notebook user's +# notebook directory in the container +c.DockerSpawner.volumes = {"jupyterhub-user-{username}": notebook_dir} + +# Remove containers once they are stopped +c.DockerSpawner.remove = True + +# For debugging arguments passed to spawned containers +c.DockerSpawner.debug = True + +# User containers will access hub by container name on the Docker network +c.JupyterHub.hub_ip = "jupyterhub" +c.JupyterHub.hub_port = 8080 + +# Persist hub data on volume mounted inside container +# c.JupyterHub.cookie_secret_file = "/data/jupyterhub_cookie_secret" +c.JupyterHub.db_url = "sqlite:////data/jupyterhub.sqlite" + +# # Authenticate users with Native Authenticator +# c.JupyterHub.authenticator_class = "nativeauthenticator.NativeAuthenticator" +# +# # Allow anyone to sign-up without approval +# c.NativeAuthenticator.open_signup = True + +# Authenticate with Custom Token Authenticator +c.Spawner.cmd = ["start.sh", "jupyterhub-singleuser", "--allow-root"] +c.KubeSpawner.args = ["--allow-root"] +c.JupyterHub.authenticator_class = CustomTokenAuthenticator +# TODO:Change this keycloak_url as required + +c.CustomTokenAuthenticator.auth_cookie_header = "Authorization" +c.CustomTokenAuthenticator.auth_username_key = "preferred_username" +c.CustomTokenAuthenticator.auth_uid_number_key = "uid_number" +c.CustomTokenAuthenticator.enable_auth_state = True +c.CustomTokenAuthenticator.auto_login = True + +if os.getenv("PROD_DEPLOYMENT") == "true": + c.CustomTokenAuthenticator.keycloak_url = "https://%s/realms/%s/" % ( + os.getenv("KEYCLOAK_HOSTNAME"), + os.getenv("KEYCLOAK_REALM"), + ) + c.CustomTokenAuthenticator.landing_page_login_url = "https://" + os.getenv( + "KEYCLOAK_HOSTNAME" + ) + c.CustomTokenAuthenticator.landing_page_logout_url = ( + "https://" + os.getenv("CLOWDER_URL") + "/auth/logout" + ) + +else: + c.CustomTokenAuthenticator.keycloak_url = "http://%s/realms/%s/" % ( + os.getenv("KEYCLOAK_HOSTNAME"), + os.getenv("KEYCLOAK_REALM"), + ) + c.CustomTokenAuthenticator.landing_page_login_url = "http://" + os.getenv( + "KEYCLOAK_HOSTNAME" + ) + c.CustomTokenAuthenticator.landing_page_logout_url = ( + "http://" + os.getenv("CLOWDER_URL") + "/auth/logout" + ) + +c.JupyterHub.cookie_secret = os.getenv("JUPYTERHUB_CRYPT_KEY") + +# Allowed admins +admin = os.environ.get("JUPYTERHUB_ADMIN") +if admin: + c.Authenticator.admin_users = [admin]