From 8abe8ceaf70b72391dc644bd16a7c8a0e48ae692 Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Wed, 4 Nov 2020 16:55:36 +0100
Subject: [PATCH 01/14] Started refactoring repository structure

---
 README.md                                     | 20 +++++++++++++++++++
 .../index-all-hive-databases/README.md        |  0
 .../index-all-hive-databases.py               |  0
 .../client_api_utils}/README.md               |  0
 ...acefully_provited_hive_legacy_migration.py |  0
 .../project_bundle_deployment/README.md       |  0
 .../manage_bundles.py                         |  0
 .../compute_partition_list}/README.md         |  0
 .../compute_partition_list.py                 |  0
 .../custom_python_models}/README.md           |  0
 .../custom_python_models}/lightgbm.py         |  0
 .../dataset_last_run_job_info}/README.md      |  0
 .../get_job_info_for_datasets_in_project.py   |  0
 .../partition_list_variable}/README.md        |  0
 .../build_only_new_partitions.py              |  0
 .../build_whole_output_custom.py              |  0
 .../python_io_examples}/README.md             |  0
 .../pandas_chunked_read_write.py              |  0
 {reco => _old/reco}/README.md                 |  0
 {reco => _old/reco}/__init__.py               |  0
 {reco => _old/reco}/surprise_wrapper.py       |  0
 .../authenticate-calls/README.md              |  0
 .../flask-webapps/authenticate-calls/app.js   |  0
 .../authenticate-calls/backend.py             |  0
 .../flask-session-per-browser/README.md       |  0
 .../flask-session-per-browser/app.js          |  0
 .../flask-session-per-browser/backend.py      |  0
 .../flask-session-per-browser/body.html       |  0
 .../flask-session-per-frontend/README.md      |  0
 .../flask-session-per-frontend/app.js         |  0
 .../flask-session-per-frontend/backend.py     |  0
 .../flask-session-per-frontend/body.html      |  0
 .../flask-session-per-user/README.md          |  0
 .../flask-session-per-user/app.js             |  0
 .../flask-session-per-user/backend.py         |  0
 .../flask-session-per-user/body.html          |  0
 .../shiny/authenticate-calls/README.md        |  0
 .../shiny/authenticate-calls/server.R         |  0
 .../shiny/authenticate-calls/ui.R             |  0
 .../shiny/shiny-and-dygraphs/README.md        |  0
 .../shiny/shiny-and-dygraphs/UI.R             |  0
 .../shiny/shiny-and-dygraphs/server.R         |  0
 admin/README.md                               | 10 ++++++++++
 applications/README.md                        |  4 ++++
 datasets/README.md                            | 10 ++++++++++
 machine_learning/README.md                    |  8 ++++++++
 metrics_and_checks/README.md                  |  4 ++++
 scenarios/README.md                           |  3 +++
 statistics/README.md                          |  3 +++
 webapps/README.md                             |  7 +++++++
 50 files changed, 69 insertions(+)
 rename {administration => _old/administration}/index-all-hive-databases/README.md (100%)
 rename {administration => _old/administration}/index-all-hive-databases/index-all-hive-databases.py (100%)
 rename {client_api_utils => _old/client_api_utils}/README.md (100%)
 rename {client_api_utils => _old/client_api_utils}/hive_config_migration/gracefully_provited_hive_legacy_migration.py (100%)
 rename {client_api_utils => _old/client_api_utils}/project_bundle_deployment/README.md (100%)
 rename {client_api_utils => _old/client_api_utils}/project_bundle_deployment/manage_bundles.py (100%)
 rename {compute_partition_list => _old/compute_partition_list}/README.md (100%)
 rename {compute_partition_list => _old/compute_partition_list}/compute_partition_list.py (100%)
 rename {custom_python_models => _old/custom_python_models}/README.md (100%)
 rename {custom_python_models => _old/custom_python_models}/lightgbm.py (100%)
 rename {dataset_last_run_job_info => _old/dataset_last_run_job_info}/README.md (100%)
 rename {dataset_last_run_job_info => _old/dataset_last_run_job_info}/get_job_info_for_datasets_in_project.py (100%)
 rename {partition_list_variable => _old/partition_list_variable}/README.md (100%)
 rename {partition_list_variable => _old/partition_list_variable}/build_only_new_partitions.py (100%)
 rename {partition_list_variable => _old/partition_list_variable}/build_whole_output_custom.py (100%)
 rename {python_io_examples => _old/python_io_examples}/README.md (100%)
 rename {python_io_examples => _old/python_io_examples}/pandas_chunked_read_write.py (100%)
 rename {reco => _old/reco}/README.md (100%)
 rename {reco => _old/reco}/__init__.py (100%)
 rename {reco => _old/reco}/surprise_wrapper.py (100%)
 rename {visualization => _old/visualization}/flask-webapps/authenticate-calls/README.md (100%)
 rename {visualization => _old/visualization}/flask-webapps/authenticate-calls/app.js (100%)
 rename {visualization => _old/visualization}/flask-webapps/authenticate-calls/backend.py (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-browser/README.md (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-browser/app.js (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-browser/backend.py (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-browser/body.html (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-frontend/README.md (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-frontend/app.js (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-frontend/backend.py (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-frontend/body.html (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-user/README.md (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-user/app.js (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-user/backend.py (100%)
 rename {visualization => _old/visualization}/flask-webapps/flask-session-per-user/body.html (100%)
 rename {visualization => _old/visualization}/shiny/authenticate-calls/README.md (100%)
 rename {visualization => _old/visualization}/shiny/authenticate-calls/server.R (100%)
 rename {visualization => _old/visualization}/shiny/authenticate-calls/ui.R (100%)
 rename {visualization => _old/visualization}/shiny/shiny-and-dygraphs/README.md (100%)
 rename {visualization => _old/visualization}/shiny/shiny-and-dygraphs/UI.R (100%)
 rename {visualization => _old/visualization}/shiny/shiny-and-dygraphs/server.R (100%)
 create mode 100644 admin/README.md
 create mode 100644 applications/README.md
 create mode 100644 datasets/README.md
 create mode 100644 machine_learning/README.md
 create mode 100644 metrics_and_checks/README.md
 create mode 100644 scenarios/README.md
 create mode 100644 statistics/README.md
 create mode 100644 webapps/README.md

diff --git a/README.md b/README.md
index 8ee7c6e..d5e8daa 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,22 @@
 # dss-code-samples
 Various code samples for using DSS
+
+## Refactoring 
+
+### Structure
+
+```
+dss-code-samples
+|_admin
+|_applications
+|_datasets
+|_formulas
+|_metrics_and_checks
+|_machine_learning
+|_partitioning
+|_scenarios
+|_statistics
+|_webapps
+```
+
+
diff --git a/administration/index-all-hive-databases/README.md b/_old/administration/index-all-hive-databases/README.md
similarity index 100%
rename from administration/index-all-hive-databases/README.md
rename to _old/administration/index-all-hive-databases/README.md
diff --git a/administration/index-all-hive-databases/index-all-hive-databases.py b/_old/administration/index-all-hive-databases/index-all-hive-databases.py
similarity index 100%
rename from administration/index-all-hive-databases/index-all-hive-databases.py
rename to _old/administration/index-all-hive-databases/index-all-hive-databases.py
diff --git a/client_api_utils/README.md b/_old/client_api_utils/README.md
similarity index 100%
rename from client_api_utils/README.md
rename to _old/client_api_utils/README.md
diff --git a/client_api_utils/hive_config_migration/gracefully_provited_hive_legacy_migration.py b/_old/client_api_utils/hive_config_migration/gracefully_provited_hive_legacy_migration.py
similarity index 100%
rename from client_api_utils/hive_config_migration/gracefully_provited_hive_legacy_migration.py
rename to _old/client_api_utils/hive_config_migration/gracefully_provited_hive_legacy_migration.py
diff --git a/client_api_utils/project_bundle_deployment/README.md b/_old/client_api_utils/project_bundle_deployment/README.md
similarity index 100%
rename from client_api_utils/project_bundle_deployment/README.md
rename to _old/client_api_utils/project_bundle_deployment/README.md
diff --git a/client_api_utils/project_bundle_deployment/manage_bundles.py b/_old/client_api_utils/project_bundle_deployment/manage_bundles.py
similarity index 100%
rename from client_api_utils/project_bundle_deployment/manage_bundles.py
rename to _old/client_api_utils/project_bundle_deployment/manage_bundles.py
diff --git a/compute_partition_list/README.md b/_old/compute_partition_list/README.md
similarity index 100%
rename from compute_partition_list/README.md
rename to _old/compute_partition_list/README.md
diff --git a/compute_partition_list/compute_partition_list.py b/_old/compute_partition_list/compute_partition_list.py
similarity index 100%
rename from compute_partition_list/compute_partition_list.py
rename to _old/compute_partition_list/compute_partition_list.py
diff --git a/custom_python_models/README.md b/_old/custom_python_models/README.md
similarity index 100%
rename from custom_python_models/README.md
rename to _old/custom_python_models/README.md
diff --git a/custom_python_models/lightgbm.py b/_old/custom_python_models/lightgbm.py
similarity index 100%
rename from custom_python_models/lightgbm.py
rename to _old/custom_python_models/lightgbm.py
diff --git a/dataset_last_run_job_info/README.md b/_old/dataset_last_run_job_info/README.md
similarity index 100%
rename from dataset_last_run_job_info/README.md
rename to _old/dataset_last_run_job_info/README.md
diff --git a/dataset_last_run_job_info/get_job_info_for_datasets_in_project.py b/_old/dataset_last_run_job_info/get_job_info_for_datasets_in_project.py
similarity index 100%
rename from dataset_last_run_job_info/get_job_info_for_datasets_in_project.py
rename to _old/dataset_last_run_job_info/get_job_info_for_datasets_in_project.py
diff --git a/partition_list_variable/README.md b/_old/partition_list_variable/README.md
similarity index 100%
rename from partition_list_variable/README.md
rename to _old/partition_list_variable/README.md
diff --git a/partition_list_variable/build_only_new_partitions.py b/_old/partition_list_variable/build_only_new_partitions.py
similarity index 100%
rename from partition_list_variable/build_only_new_partitions.py
rename to _old/partition_list_variable/build_only_new_partitions.py
diff --git a/partition_list_variable/build_whole_output_custom.py b/_old/partition_list_variable/build_whole_output_custom.py
similarity index 100%
rename from partition_list_variable/build_whole_output_custom.py
rename to _old/partition_list_variable/build_whole_output_custom.py
diff --git a/python_io_examples/README.md b/_old/python_io_examples/README.md
similarity index 100%
rename from python_io_examples/README.md
rename to _old/python_io_examples/README.md
diff --git a/python_io_examples/pandas_chunked_read_write.py b/_old/python_io_examples/pandas_chunked_read_write.py
similarity index 100%
rename from python_io_examples/pandas_chunked_read_write.py
rename to _old/python_io_examples/pandas_chunked_read_write.py
diff --git a/reco/README.md b/_old/reco/README.md
similarity index 100%
rename from reco/README.md
rename to _old/reco/README.md
diff --git a/reco/__init__.py b/_old/reco/__init__.py
similarity index 100%
rename from reco/__init__.py
rename to _old/reco/__init__.py
diff --git a/reco/surprise_wrapper.py b/_old/reco/surprise_wrapper.py
similarity index 100%
rename from reco/surprise_wrapper.py
rename to _old/reco/surprise_wrapper.py
diff --git a/visualization/flask-webapps/authenticate-calls/README.md b/_old/visualization/flask-webapps/authenticate-calls/README.md
similarity index 100%
rename from visualization/flask-webapps/authenticate-calls/README.md
rename to _old/visualization/flask-webapps/authenticate-calls/README.md
diff --git a/visualization/flask-webapps/authenticate-calls/app.js b/_old/visualization/flask-webapps/authenticate-calls/app.js
similarity index 100%
rename from visualization/flask-webapps/authenticate-calls/app.js
rename to _old/visualization/flask-webapps/authenticate-calls/app.js
diff --git a/visualization/flask-webapps/authenticate-calls/backend.py b/_old/visualization/flask-webapps/authenticate-calls/backend.py
similarity index 100%
rename from visualization/flask-webapps/authenticate-calls/backend.py
rename to _old/visualization/flask-webapps/authenticate-calls/backend.py
diff --git a/visualization/flask-webapps/flask-session-per-browser/README.md b/_old/visualization/flask-webapps/flask-session-per-browser/README.md
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-browser/README.md
rename to _old/visualization/flask-webapps/flask-session-per-browser/README.md
diff --git a/visualization/flask-webapps/flask-session-per-browser/app.js b/_old/visualization/flask-webapps/flask-session-per-browser/app.js
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-browser/app.js
rename to _old/visualization/flask-webapps/flask-session-per-browser/app.js
diff --git a/visualization/flask-webapps/flask-session-per-browser/backend.py b/_old/visualization/flask-webapps/flask-session-per-browser/backend.py
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-browser/backend.py
rename to _old/visualization/flask-webapps/flask-session-per-browser/backend.py
diff --git a/visualization/flask-webapps/flask-session-per-browser/body.html b/_old/visualization/flask-webapps/flask-session-per-browser/body.html
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-browser/body.html
rename to _old/visualization/flask-webapps/flask-session-per-browser/body.html
diff --git a/visualization/flask-webapps/flask-session-per-frontend/README.md b/_old/visualization/flask-webapps/flask-session-per-frontend/README.md
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-frontend/README.md
rename to _old/visualization/flask-webapps/flask-session-per-frontend/README.md
diff --git a/visualization/flask-webapps/flask-session-per-frontend/app.js b/_old/visualization/flask-webapps/flask-session-per-frontend/app.js
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-frontend/app.js
rename to _old/visualization/flask-webapps/flask-session-per-frontend/app.js
diff --git a/visualization/flask-webapps/flask-session-per-frontend/backend.py b/_old/visualization/flask-webapps/flask-session-per-frontend/backend.py
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-frontend/backend.py
rename to _old/visualization/flask-webapps/flask-session-per-frontend/backend.py
diff --git a/visualization/flask-webapps/flask-session-per-frontend/body.html b/_old/visualization/flask-webapps/flask-session-per-frontend/body.html
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-frontend/body.html
rename to _old/visualization/flask-webapps/flask-session-per-frontend/body.html
diff --git a/visualization/flask-webapps/flask-session-per-user/README.md b/_old/visualization/flask-webapps/flask-session-per-user/README.md
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-user/README.md
rename to _old/visualization/flask-webapps/flask-session-per-user/README.md
diff --git a/visualization/flask-webapps/flask-session-per-user/app.js b/_old/visualization/flask-webapps/flask-session-per-user/app.js
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-user/app.js
rename to _old/visualization/flask-webapps/flask-session-per-user/app.js
diff --git a/visualization/flask-webapps/flask-session-per-user/backend.py b/_old/visualization/flask-webapps/flask-session-per-user/backend.py
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-user/backend.py
rename to _old/visualization/flask-webapps/flask-session-per-user/backend.py
diff --git a/visualization/flask-webapps/flask-session-per-user/body.html b/_old/visualization/flask-webapps/flask-session-per-user/body.html
similarity index 100%
rename from visualization/flask-webapps/flask-session-per-user/body.html
rename to _old/visualization/flask-webapps/flask-session-per-user/body.html
diff --git a/visualization/shiny/authenticate-calls/README.md b/_old/visualization/shiny/authenticate-calls/README.md
similarity index 100%
rename from visualization/shiny/authenticate-calls/README.md
rename to _old/visualization/shiny/authenticate-calls/README.md
diff --git a/visualization/shiny/authenticate-calls/server.R b/_old/visualization/shiny/authenticate-calls/server.R
similarity index 100%
rename from visualization/shiny/authenticate-calls/server.R
rename to _old/visualization/shiny/authenticate-calls/server.R
diff --git a/visualization/shiny/authenticate-calls/ui.R b/_old/visualization/shiny/authenticate-calls/ui.R
similarity index 100%
rename from visualization/shiny/authenticate-calls/ui.R
rename to _old/visualization/shiny/authenticate-calls/ui.R
diff --git a/visualization/shiny/shiny-and-dygraphs/README.md b/_old/visualization/shiny/shiny-and-dygraphs/README.md
similarity index 100%
rename from visualization/shiny/shiny-and-dygraphs/README.md
rename to _old/visualization/shiny/shiny-and-dygraphs/README.md
diff --git a/visualization/shiny/shiny-and-dygraphs/UI.R b/_old/visualization/shiny/shiny-and-dygraphs/UI.R
similarity index 100%
rename from visualization/shiny/shiny-and-dygraphs/UI.R
rename to _old/visualization/shiny/shiny-and-dygraphs/UI.R
diff --git a/visualization/shiny/shiny-and-dygraphs/server.R b/_old/visualization/shiny/shiny-and-dygraphs/server.R
similarity index 100%
rename from visualization/shiny/shiny-and-dygraphs/server.R
rename to _old/visualization/shiny/shiny-and-dygraphs/server.R
diff --git a/admin/README.md b/admin/README.md
new file mode 100644
index 0000000..5b2eada
--- /dev/null
+++ b/admin/README.md
@@ -0,0 +1,10 @@
+# Administration
+
+## TODO
+
+- [ ] List jobs currently running
+- [ ] Create a code environment from a list of packages
+- [ ] Programmatically add impersonation rules
+- [ ] Create API service infrastructure
+- [ ] Create a mapping between code environments and Python/R recipes
+- [ ] Create and manage project folders 
diff --git a/applications/README.md b/applications/README.md
new file mode 100644
index 0000000..29b471d
--- /dev/null
+++ b/applications/README.md
@@ -0,0 +1,4 @@
+# Dataiku Applications
+
+- [ ] App-as-API example
+- [ ] List and cleanup application instances
diff --git a/datasets/README.md b/datasets/README.md
new file mode 100644
index 0000000..e60dc27
--- /dev/null
+++ b/datasets/README.md
@@ -0,0 +1,10 @@
+# Datasets
+
+## TODO
+
+- [ ] Programmatically build partitions
+- [ ] Read from/write to non-local-FS-folders
+- [ ] Flag and delete orphaned datasets
+- [ ] Schema propagation from updated dataset
+- [ ] Create "Upload" dataset and add/replace file(s)
+
diff --git a/machine_learning/README.md b/machine_learning/README.md
new file mode 100644
index 0000000..fd20a25
--- /dev/null
+++ b/machine_learning/README.md
@@ -0,0 +1,8 @@
+# Machine learning
+
+- [ ] "Pure code" model training and batch scoring in PyTorch
+- [ ] "Pure code" model training and batch scoring in Tensorflow 2.x
+- [ ] Visual ML: custom preprocessing (numerical + categorical)
+- [ ] Visual ML: custom evaluation metric (classification + regression)
+- [ ] Visual ML: custom Python model (classification + regression)
+- [ ] Visual ML: download pre-trained model in a managed folder
diff --git a/metrics_and_checks/README.md b/metrics_and_checks/README.md
new file mode 100644
index 0000000..cc00c68
--- /dev/null
+++ b/metrics_and_checks/README.md
@@ -0,0 +1,4 @@
+# Metrics and checks
+
+- [ ] Retrieve metrics history of a dataset
+- [ ] Retrieve metrics history of a model
diff --git a/scenarios/README.md b/scenarios/README.md
new file mode 100644
index 0000000..940ffb3
--- /dev/null
+++ b/scenarios/README.md
@@ -0,0 +1,3 @@
+# Scenarios
+
+- [ ] Implement a timeout for a particular scenario step
diff --git a/statistics/README.md b/statistics/README.md
new file mode 100644
index 0000000..54f19e4
--- /dev/null
+++ b/statistics/README.md
@@ -0,0 +1,3 @@
+# Interactive statistics
+
+- [ ] Retrieve correlation matrix from dataset/worksheet/card
diff --git a/webapps/README.md b/webapps/README.md
new file mode 100644
index 0000000..ed38fbf
--- /dev/null
+++ b/webapps/README.md
@@ -0,0 +1,7 @@
+# Webapps
+
+- [ ] Authenticate users on a Flask webapp
+- [ ] Maintaining a model per browser/frontend/user on a Flask webapp
+- [ ] Authenticate users on a Shiny webapp
+- [ ] Display interactive time series in a Shiny webapp
+- [ ] Display interactive time series in a Bokeh webapp

From 073c90738ab83d4654d07d08e3c0a9890c97999f Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Wed, 4 Nov 2020 19:07:09 +0100
Subject: [PATCH 02/14] Add instructions for venv setup

---
 README.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/README.md b/README.md
index d5e8daa..f63b577 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,21 @@ Various code samples for using DSS
 
 ## Refactoring 
 
+### Getting started
+
+(DSS >= 8.0.3)
+
+Create a dedicated virtual environment and install the following packages:
+* `dataiku-internal-client`:  follow the instructions in the [DSS doc](https://doc.dataiku.com/dss/latest/python-api/outside-usage.html#installing-the-package)
+* `dataikuapi`: 
+  ```
+  $ pip install dataiku-api-client
+  ```
+* `pandas`:
+  ```
+  $ pip install "pandas>=1.0,<1.1"
+  ```
+
 ### Structure
 
 ```
@@ -20,3 +35,4 @@ dss-code-samples
 ```
 
 
+

From 0559f646332205123db8376df11f737f7c573f21 Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Wed, 4 Nov 2020 22:50:47 +0100
Subject: [PATCH 03/14] WIP working version of list_jobs

---
 .gitignore         |  3 +++
 admin/list_jobs.py | 30 ++++++++++++++++++++++++++++++
 requirements.txt   | 13 +++++++++++++
 3 files changed, 46 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 admin/list_jobs.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..07bc6ad
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+venv/
+.vscode/
+
diff --git a/admin/list_jobs.py b/admin/list_jobs.py
new file mode 100644
index 0000000..184c22b
--- /dev/null
+++ b/admin/list_jobs.py
@@ -0,0 +1,30 @@
+import dataiku
+from datetime import datetime
+
+def list_jobs_by_status(client=None, project_key=None):
+    """
+    """
+
+    project = client.get_project(project_key)
+    jobs_by_status = {"RUNNING": [],
+                         "FAILED": [],
+                         "DONE": [],
+                         "ABORTED": []}
+    for job in project.list_jobs():
+        if "state" not in job:
+            jobs_by_status["RUNNING"].append(job)
+        else:
+            jobs_by_status[job["state"]].append(job)
+    return jobs_by_status
+
+
+def filter_jobs_by_start_date(jobs_by_status=None, start_date=None):
+    """
+    """
+
+    start_date_timestamp = int(datetime.strptime(start_date, "%Y/%m/%d").strftime("%s")) * 1000
+    is_after_start_date = lambda x, d: x["def"]["initiationTimestamp"] > d
+    jobs_after_start_date = {_status: [job for job in _list if is_after_start_date(job, start_date_timestamp)] for _status, _list in jobs_by_status.items()}
+    return jobs_after_start_date 
+
+
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..63b9507
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+certifi==2020.6.20
+chardet==3.0.4
+idna==2.8
+numpy==1.19.4
+pandas==1.0.5
+python-dateutil==2.8.0
+pytz==2019.2
+requests==2.22.0
+six==1.15.0
+urllib3==1.25.11
+dataiku-api-client==8.0.0
+http://localhost:40000/public/packages/dataiku-internal-client.tar.gz
+

From a998e91df377bd12b9fa09714ff3e2cac38418cb Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Thu, 5 Nov 2020 10:50:07 +0100
Subject: [PATCH 04/14] Add docstrings and abort function

---
 admin/list_jobs.py | 44 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/admin/list_jobs.py b/admin/list_jobs.py
index 184c22b..0939cbb 100644
--- a/admin/list_jobs.py
+++ b/admin/list_jobs.py
@@ -1,8 +1,16 @@
 import dataiku
 from datetime import datetime
 
+
 def list_jobs_by_status(client=None, project_key=None):
-    """
+    """List jobs by current status in a given project.
+
+    Args:
+        client: A handle on the target DSS instance
+        project_key: A string representing the target project key
+    
+    Returns:
+        jobs_by_status: A dict of lists mapping jobs and their states
     """
 
     project = client.get_project(project_key)
@@ -11,20 +19,46 @@ def list_jobs_by_status(client=None, project_key=None):
                          "DONE": [],
                          "ABORTED": []}
     for job in project.list_jobs():
-        if "state" not in job:
+        if not job["stableState"]:
             jobs_by_status["RUNNING"].append(job)
         else:
             jobs_by_status[job["state"]].append(job)
     return jobs_by_status
 
 
-def filter_jobs_by_start_date(jobs_by_status=None, start_date=None):
-    """
-    """
+def filter_jobs_by_start_date(client=None, project_key=None, start_date=None):
+    """List jobs that were started after a specific date.
+
+    Args:
+        client: A handle on the target DSS instance
+        project_key: A string representing the target project key
+        start_date: A string of the form 'YYYY/mm/dd' 
+
+    Returns:
+        jobs_after_start_date: A dict of lists mapping jobs and their states
 
+    """
+    jobs_by_status = list_jobs_by_status(client, project_key)
     start_date_timestamp = int(datetime.strptime(start_date, "%Y/%m/%d").strftime("%s")) * 1000
     is_after_start_date = lambda x, d: x["def"]["initiationTimestamp"] > d
     jobs_after_start_date = {_status: [job for job in _list if is_after_start_date(job, start_date_timestamp)] for _status, _list in jobs_by_status.items()}
     return jobs_after_start_date 
 
 
+def abort_all_running_jobs(client=None, project_key=None):
+    """Terminate all running jobs in a project.
+
+    Args:
+        client: A handle on the target DSS instance
+        project_key: A string representing the target project key
+    """
+
+    project = client.get_project(project_key)
+    aborted_jobs = []
+    for job in project.list_jobs():
+        if not job["stableState"]:
+            job_id = job["def"]["id"]
+            aborted_jobs.append(job_id)
+            project.get_job(job_id).abort()
+    print(f"Deleted {len(aborted_jobs)} running jobs")
+

From d6a215fd3b43636436a4119077ea5403654a7a9b Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Thu, 5 Nov 2020 11:31:24 +0100
Subject: [PATCH 05/14] Make first samples importable

---
 .gitignore                           | 1 +
 admin/__init__.py                    | 0
 admin/{list_jobs.py => job_utils.py} | 0
 datasets/README.md                   | 1 +
 4 files changed, 2 insertions(+)
 create mode 100644 admin/__init__.py
 rename admin/{list_jobs.py => job_utils.py} (100%)

diff --git a/.gitignore b/.gitignore
index 07bc6ad..48abe11 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 venv/
 .vscode/
+__pycache__/
 
diff --git a/admin/__init__.py b/admin/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/admin/list_jobs.py b/admin/job_utils.py
similarity index 100%
rename from admin/list_jobs.py
rename to admin/job_utils.py
diff --git a/datasets/README.md b/datasets/README.md
index e60dc27..9894a39 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -3,6 +3,7 @@
 ## TODO
 
 - [ ] Programmatically build partitions
+- [ ] Retrieve last build date (PR #3)
 - [ ] Read from/write to non-local-FS-folders
 - [ ] Flag and delete orphaned datasets
 - [ ] Schema propagation from updated dataset

From 8a601b629d944fca86d4b04f663ea146a893fa78 Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Fri, 6 Nov 2020 11:38:34 +0100
Subject: [PATCH 06/14] More ideas

---
 README.md          | 12 ++++++++++++
 datasets/README.md |  1 +
 2 files changed, 13 insertions(+)

diff --git a/README.md b/README.md
index f63b577..20111b8 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,18 @@ Various code samples for using DSS
 
 (DSS >= 8.0.3)
 
+#### Use within DSS (as project library)
+- Register in Project Lib Git
+- No need to specify remote DSS params
+- Profit
+
+#### Outside of DSS
+- Clone repository, tarzip it
+- Create virtualenv with dss requirements and tarzipped archive
+- Profit ...?
+
+You can reuse them as they are, customize them for your own needs, and even package them into plugins.
+
 Create a dedicated virtual environment and install the following packages:
 * `dataiku-internal-client`:  follow the instructions in the [DSS doc](https://doc.dataiku.com/dss/latest/python-api/outside-usage.html#installing-the-package)
 * `dataikuapi`: 
diff --git a/datasets/README.md b/datasets/README.md
index 9894a39..9043a51 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -5,6 +5,7 @@
 - [ ] Programmatically build partitions
 - [ ] Retrieve last build date (PR #3)
 - [ ] Read from/write to non-local-FS-folders
+- [ ] Run containerized execution with input/output data in managed folders
 - [ ] Flag and delete orphaned datasets
 - [ ] Schema propagation from updated dataset
 - [ ] Create "Upload" dataset and add/replace file(s)

From 699f2cd81d60f613e85ae5e08124fdce03f55441 Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Mon, 16 Nov 2020 17:47:18 +0100
Subject: [PATCH 07/14] Add edit_project_permission()

---
 admin/README.md        | 16 +++++++++++++---
 admin/project_utils.py | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 admin/project_utils.py

diff --git a/admin/README.md b/admin/README.md
index 5b2eada..0ac99db 100644
--- a/admin/README.md
+++ b/admin/README.md
@@ -2,9 +2,19 @@
 
 ## TODO
 
-- [ ] List jobs currently running
+### Jobs 
+- [x] List jobs currently running
+
+### Projects
+- [x] Assign group permissions to project
+- [ ] Create & manage project folders
+### Code environments
 - [ ] Create a code environment from a list of packages
+- [ ] Create a mapping between code environments and Python/R recipes
+
+### Security & user isolation
 - [ ] Programmatically add impersonation rules
+
+### Infrastructure
 - [ ] Create API service infrastructure
-- [ ] Create a mapping between code environments and Python/R recipes
-- [ ] Create and manage project folders 
+
diff --git a/admin/project_utils.py b/admin/project_utils.py
new file mode 100644
index 0000000..503dddd
--- /dev/null
+++ b/admin/project_utils.py
@@ -0,0 +1,33 @@
+import dataiku
+
+import dataiku
+
+def edit_project_permissions(client=None, project_key=None, group=None, perms=None, revoke=False):
+    """Grant or revoke project permissions for a given group.
+
+    Args:
+        client: A handle on the target DSS instance
+        project_key: A string representing the target project key
+        group: A string representing the target group name
+        perms: A list of permissions to grant 
+        revoke: A boolean for completely revoking access to the project
+    """
+
+    prj = client.get_project(project_key)
+    perm_obj = prj.get_permissions()
+    perm_list = perm_obj["permissions"]
+    for p in perm_list:
+        if p["group"] == group:
+            print("Deleting existing permissions...")
+            perm_list.remove(p)
+    if revoke:
+        perm_obj["permissions"] = perm_list
+        print(f"Revoking all permissions on project {project_key} for group {group}")
+    else:
+        if not perms:
+            print("Missing permission list, will grant ADMIN instead...")
+            perms = ["admin"]
+        new_group_perms = dict({"group": group}, **{p: True for p in perms})
+        perm_obj["permissions"].append(new_group_perms)
+        print(f"Granting {perms} to group {group} on project {project_key}...")
+    prj.set_permissions(perm_obj)
\ No newline at end of file

From d2f0eb9a33974f7deee83dd6d58f4b609abc203a Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Sat, 19 Dec 2020 10:36:33 +0100
Subject: [PATCH 08/14] More things on todo list

---
 machine_learning/README.md | 24 ++++++++++++++++++++++++
 webapps/README.md          |  1 +
 2 files changed, 25 insertions(+)

diff --git a/machine_learning/README.md b/machine_learning/README.md
index fd20a25..7ef4240 100644
--- a/machine_learning/README.md
+++ b/machine_learning/README.md
@@ -1,8 +1,32 @@
 # Machine learning
 
+- [ ] List all models and corresp. active version in a project
 - [ ] "Pure code" model training and batch scoring in PyTorch
 - [ ] "Pure code" model training and batch scoring in Tensorflow 2.x
+- [ ] Custom model deployed on API service
 - [ ] Visual ML: custom preprocessing (numerical + categorical)
 - [ ] Visual ML: custom evaluation metric (classification + regression)
 - [ ] Visual ML: custom Python model (classification + regression)
 - [ ] Visual ML: download pre-trained model in a managed folder
+- [ ] Retrieve and deploy the best model of a training session in the visual analysis
+    ```
+    import dataiku
+client = dataiku.api_client()
+project = client.get_project('YOUR_PROJECT_KEY')
+
+analysis_id = 'k2BRw36W' # this can be found in the analysis URL or using project.list_analyses()
+ml_taskid = 'aG8nyE8E' # this can be found in the mltask URL or using analysis.list_ml_tasks()
+model_name = 'my_model' # name of the model that vill be deployed to flow
+train_set = 'train' # name of my trainset
+
+analysis = project.get_analysis(analysis_id)
+mltask = analysis.get_ml_task(ml_taskid)
+trained_models = mltask.get_trained_models_ids()
+trained_models_snippets = [mltask.get_trained_model_snippet(model) for model in trained_models]
+​
+# Compare models to find the one you want to deploy, here we want to deploy the model with best r2 score
+best_model = max(trained_models_snippets, key=lambda x:x['r2'])
+# Deploy the best model to the flow, can also use mltask.redeploy_to_flow() to update an existing model
+mltask.deploy_to_flow(best_model['fullModelId'], model_name, train_set )
+```
+
diff --git a/webapps/README.md b/webapps/README.md
index ed38fbf..990c06c 100644
--- a/webapps/README.md
+++ b/webapps/README.md
@@ -1,5 +1,6 @@
 # Webapps
 
+- [ ] Helpers (REST) to start/stop/list webapp backends
 - [ ] Authenticate users on a Flask webapp
 - [ ] Maintaining a model per browser/frontend/user on a Flask webapp
 - [ ] Authenticate users on a Shiny webapp

From b67fe385512432f4dfc50d8b15ce12597fcefc85 Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Sun, 20 Dec 2020 10:34:19 +0100
Subject: [PATCH 09/14] Add expore_saved_models

---
 admin/project_utils.py                |  4 +---
 machine_learning/saved_model_utils.py | 30 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 machine_learning/saved_model_utils.py

diff --git a/admin/project_utils.py b/admin/project_utils.py
index 503dddd..8187901 100644
--- a/admin/project_utils.py
+++ b/admin/project_utils.py
@@ -1,7 +1,5 @@
 import dataiku
 
-import dataiku
-
 def edit_project_permissions(client=None, project_key=None, group=None, perms=None, revoke=False):
     """Grant or revoke project permissions for a given group.
 
@@ -30,4 +28,4 @@ def edit_project_permissions(client=None, project_key=None, group=None, perms=No
         new_group_perms = dict({"group": group}, **{p: True for p in perms})
         perm_obj["permissions"].append(new_group_perms)
         print(f"Granting {perms} to group {group} on project {project_key}...")
-    prj.set_permissions(perm_obj)
\ No newline at end of file
+    prj.set_permissions(perm_obj)
diff --git a/machine_learning/saved_model_utils.py b/machine_learning/saved_model_utils.py
new file mode 100644
index 0000000..00ef12d
--- /dev/null
+++ b/machine_learning/saved_model_utils.py
@@ -0,0 +1,30 @@
+import dataiku
+
+def explore_saved_models(client=None, project_key=None):
+    """List saved models of a project and give details on the active versions.
+
+    Args:
+        client: A handle on the target DSS instance
+        project_key: A string representing the target project key
+
+    Returns:
+        smdl_list: A dict with all saved model ids and perf + algorithm 
+                   for the active versions. 
+
+    """
+    smdl_list = []
+    prj = client.get_project(project_key)
+    smdl_ids = [x["id"] for x in prj.list_saved_models()]
+    for smdl in smdl_ids:
+        data = {}
+        obj = prj.get_saved_model(smdl)
+        data["version_ids"] = [m["id"] for m in obj.list_versions()]
+        active_version_id = obj.get_active_version()["id"]
+        active_version_details = obj.get_version_details(active_version_id)
+        data["active_version"] = {"id": active_version_id,
+                                  "algorithm": active_version_details.details["actualParams"]["resolved"]["algorithm"],
+                                  "performance_metrics": active_version_details.get_performance_metrics()}
+        smdl_list.append(data)
+    return smdl_list
+
+

From f0e20b3179553d58a113c6cf3e3992114c25169e Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Tue, 12 Jan 2021 12:24:06 +0100
Subject: [PATCH 10/14] More details in READMEs and mltask stuff

---
 admin/README.md                  | 20 --------------------
 admin/spark_utils.py             |  6 ++++++
 ci_cd/README.md                  |  3 +++
 deployer/README.md               |  3 +++
 machine_learning/README.md       |  2 +-
 machine_learning/mltask_utils.py | 32 ++++++++++++++++++++++++++++++++
 projects/README.md               | 27 +++++++++++++++++++++++++++
 scenarios/README.md              |  5 +++++
 8 files changed, 77 insertions(+), 21 deletions(-)
 delete mode 100644 admin/README.md
 create mode 100644 admin/spark_utils.py
 create mode 100644 ci_cd/README.md
 create mode 100644 deployer/README.md
 create mode 100644 machine_learning/mltask_utils.py
 create mode 100644 projects/README.md

diff --git a/admin/README.md b/admin/README.md
deleted file mode 100644
index 0ac99db..0000000
--- a/admin/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Administration
-
-## TODO
-
-### Jobs 
-- [x] List jobs currently running
-
-### Projects
-- [x] Assign group permissions to project
-- [ ] Create & manage project folders
-### Code environments
-- [ ] Create a code environment from a list of packages
-- [ ] Create a mapping between code environments and Python/R recipes
-
-### Security & user isolation
-- [ ] Programmatically add impersonation rules
-
-### Infrastructure
-- [ ] Create API service infrastructure
-
diff --git a/admin/spark_utils.py b/admin/spark_utils.py
new file mode 100644
index 0000000..d7b5a10
--- /dev/null
+++ b/admin/spark_utils.py
@@ -0,0 +1,6 @@
+import dataiku
+
+def add_spark_config(client=None, config=None):
+    return NotImplementedError
+
+
diff --git a/ci_cd/README.md b/ci_cd/README.md
new file mode 100644
index 0000000..5b70d0d
--- /dev/null
+++ b/ci_cd/README.md
@@ -0,0 +1,3 @@
+# CI/CD
+
+- [ ] (?) Example of Azure Devops pipeline to deploy on the automation node
diff --git a/deployer/README.md b/deployer/README.md
new file mode 100644
index 0000000..b15d067
--- /dev/null
+++ b/deployer/README.md
@@ -0,0 +1,3 @@
+- Keep a database connection persistent to speed up response time by opening a connection pool outside of the the API function
+- Image classification scoring on a custom deep learning model
+- 
\ No newline at end of file
diff --git a/machine_learning/README.md b/machine_learning/README.md
index 7ef4240..8e63100 100644
--- a/machine_learning/README.md
+++ b/machine_learning/README.md
@@ -1,6 +1,6 @@
 # Machine learning
 
-- [ ] List all models and corresp. active version in a project
+- [x] List all models and corresp. active version in a project
 - [ ] "Pure code" model training and batch scoring in PyTorch
 - [ ] "Pure code" model training and batch scoring in Tensorflow 2.x
 - [ ] Custom model deployed on API service
diff --git a/machine_learning/mltask_utils.py b/machine_learning/mltask_utils.py
new file mode 100644
index 0000000..723f022
--- /dev/null
+++ b/machine_learning/mltask_utils.py
@@ -0,0 +1,32 @@
+import dataiku
+
+def deploy_best_model(client=None,
+                   project_key=None,
+                   analysis_id=None,
+                   mltask_id=None,
+                   metric=None):
+    """Deploy the best model (according to the input metric) of a mltask to the flow.
+
+    Args:
+        client: A handle on the DSS instance
+        project_key: A string representing the target project key
+        analysis_id: A string linking to the target visual analysis
+        mltask_id: A string linking to the target mltask in a given analysis
+        metric: A string defining which metric to use for performance ranking
+
+    Returns:
+    """
+    # WIP 
+    prj = client.get_project(project_key)
+    analysis = prj.get_analysis(analysis_id)
+    mltask = analysis.get_ml_task(mltask_id)
+    trained_models = mltask.get_trained_models_ids()
+    trained_models_snippets = [mltask.get_trained_model_snippet(m) for m in trained_models]
+    best_model = max(trained_models_snippets, key=lambda x:x[metric])
+    return best_model 
+
+
+
+
+
+
diff --git a/projects/README.md b/projects/README.md
new file mode 100644
index 0000000..f1ccdbb
--- /dev/null
+++ b/projects/README.md
@@ -0,0 +1,27 @@
+- Build all
+    ```python
+    client = dataiku.api_client()
+    project = client.get_project(dataiku.default_project_key())
+    flow = project.get_flow()
+    graph = flow.get_graph()
+    for k,v in graph.data.get('nodes').items():
+        if v.get('successors') == []:     
+            definition = {
+                "type" : 'RECURSIVE_BUILD',
+                "outputs" : [{"id": k}]
+            }
+            print('Building dataset {}'.format(k)) 
+            job = project.start_job(definition)
+    ```
+    Will need adjustments if there are saved models.
+
+- Build specific tags only
+- Build specific zones only
+- Detect schema changes on a dataset and propagate them
+  ```python
+    settings = dataset.get_settings()
+    settings.get_raw()["schema"] = {"columns":[]}
+    settings.save()
+    new_settings = dataset.autodetect_settings()
+    new_settings.save()
+```
diff --git a/scenarios/README.md b/scenarios/README.md
index 940ffb3..43de12f 100644
--- a/scenarios/README.md
+++ b/scenarios/README.md
@@ -1,3 +1,8 @@
 # Scenarios
 
 - [ ] Implement a timeout for a particular scenario step
+
+- View all the "run after scenario" dependences between projects 
+> you'll need to write some code using the public API to loop over the scenario settings, look for follow_scenariorun triggers, and build the dependency tree yourself
+
+

From 30a8029d11da7233c8d54f151aab6e2df775030b Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Tue, 12 Jan 2021 16:56:43 +0100
Subject: [PATCH 11/14] Scenarios: append email reporter recipients

---
 scenarios/reporters.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 scenarios/reporters.py

diff --git a/scenarios/reporters.py b/scenarios/reporters.py
new file mode 100644
index 0000000..0186e74
--- /dev/null
+++ b/scenarios/reporters.py
@@ -0,0 +1,33 @@
+import dataiku
+
+def add_email_recipients(client=None, project_key=None, scenario_ids=[], recipients=[]):
+    """Append additional recipients to scenario email reporters.
+
+    Args:
+        client: A handle on the target DSS instance
+        project_key: A string representing the target project key
+        scenario_ids: A list of scenario ID strings
+        recipients: A list of email address strings
+        
+    """
+
+    prj = client.get_project(project_key)
+    if not scenario_ids:
+        print("No scenario id specified, will apply to ALL scenarios")
+        scenario_ids = [scn["id"] for scn in prj.list_scenarios()]
+    
+    for scn_id in scenario_ids:
+        handle = prj.get_scenario(scn_id)
+        settings = handle.get_settings()
+        reporters = settings.raw_reporters
+        if not reporters:
+            print("No reporter found.")
+        else:
+            for rep in reporters:
+                if rep["messaging"]["type"] == "mail-scenario":
+                    if rep["messaging"]["configuration"]["recipient"]:
+                        sep = ', '
+                    else:
+                        sep = ''
+                    rep["messaging"]["configuration"]["recipient"] += (sep + ', '.join(recipients))
+            settings.save()

From c8dea96a1d931b5057ae4fe32dff9ef2be572d8e Mon Sep 17 00:00:00 2001
From: Harizo Rajaona <harizo.rajaona@dataiku.com>
Date: Wed, 13 Jan 2021 15:47:21 +0100
Subject: [PATCH 12/14] Improve dbest model deployment from ML task

---
 applications/README.md           |  4 --
 ci_cd/README.md                  |  3 --
 machine_learning/README.md       | 32 -------------
 machine_learning/mltask_utils.py | 81 +++++++++++++++++++++++++++-----
 scenarios/reporters.py           |  1 -
 5 files changed, 69 insertions(+), 52 deletions(-)
 delete mode 100644 applications/README.md
 delete mode 100644 ci_cd/README.md
 delete mode 100644 machine_learning/README.md

diff --git a/applications/README.md b/applications/README.md
deleted file mode 100644
index 29b471d..0000000
--- a/applications/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Dataiku Applications
-
-- [ ] App-as-API example
-- [ ] List and cleanup application instances
diff --git a/ci_cd/README.md b/ci_cd/README.md
deleted file mode 100644
index 5b70d0d..0000000
--- a/ci_cd/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# CI/CD
-
-- [ ] (?) Example of Azure Devops pipeline to deploy on the automation node
diff --git a/machine_learning/README.md b/machine_learning/README.md
deleted file mode 100644
index 8e63100..0000000
--- a/machine_learning/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# Machine learning
-
-- [x] List all models and corresp. active version in a project
-- [ ] "Pure code" model training and batch scoring in PyTorch
-- [ ] "Pure code" model training and batch scoring in Tensorflow 2.x
-- [ ] Custom model deployed on API service
-- [ ] Visual ML: custom preprocessing (numerical + categorical)
-- [ ] Visual ML: custom evaluation metric (classification + regression)
-- [ ] Visual ML: custom Python model (classification + regression)
-- [ ] Visual ML: download pre-trained model in a managed folder
-- [ ] Retrieve and deploy the best model of a training session in the visual analysis
-    ```
-    import dataiku
-client = dataiku.api_client()
-project = client.get_project('YOUR_PROJECT_KEY')
-
-analysis_id = 'k2BRw36W' # this can be found in the analysis URL or using project.list_analyses()
-ml_taskid = 'aG8nyE8E' # this can be found in the mltask URL or using analysis.list_ml_tasks()
-model_name = 'my_model' # name of the model that vill be deployed to flow
-train_set = 'train' # name of my trainset
-
-analysis = project.get_analysis(analysis_id)
-mltask = analysis.get_ml_task(ml_taskid)
-trained_models = mltask.get_trained_models_ids()
-trained_models_snippets = [mltask.get_trained_model_snippet(model) for model in trained_models]
-​
-# Compare models to find the one you want to deploy, here we want to deploy the model with best r2 score
-best_model = max(trained_models_snippets, key=lambda x:x['r2'])
-# Deploy the best model to the flow, can also use mltask.redeploy_to_flow() to update an existing model
-mltask.deploy_to_flow(best_model['fullModelId'], model_name, train_set )
-```
-
diff --git a/machine_learning/mltask_utils.py b/machine_learning/mltask_utils.py
index 723f022..e5e062b 100644
--- a/machine_learning/mltask_utils.py
+++ b/machine_learning/mltask_utils.py
@@ -1,32 +1,89 @@
 import dataiku
 
-def deploy_best_model(client=None,
+def get_best_model(client=None,
                    project_key=None,
                    analysis_id=None,
-                   mltask_id=None,
+                   ml_task_id=None,
                    metric=None):
-    """Deploy the best model (according to the input metric) of a mltask to the flow.
+    """Return the 'best model' (according to the input metric) of a ML task.
 
     Args:
         client: A handle on the DSS instance
         project_key: A string representing the target project key
-        analysis_id: A string linking to the target visual analysis
-        mltask_id: A string linking to the target mltask in a given analysis
+        analysis_id: A string linking to the target visual analysis.
+                     Can be found in the analysis URL or via 
+                     dataikuapi.dss.project.DSSProject.list_analyses()
+        ml_task_id: A string linking to the target MLTask in a given analysis.
+                   Can be found in the ML task URL or via
+                   dataikuapi.dss.analysis.DSSAnalysis.list_ml_tasks()
         metric: A string defining which metric to use for performance ranking
 
     Returns:
+        ml_task: A handle to interact with the ML task.
+                 Useful when (re)deploying the model.
+        best_model_snippet: A string containing the ID of the ML task's 'best model'
+
     """
-    # WIP 
     prj = client.get_project(project_key)
     analysis = prj.get_analysis(analysis_id)
-    mltask = analysis.get_ml_task(mltask_id)
-    trained_models = mltask.get_trained_models_ids()
-    trained_models_snippets = [mltask.get_trained_model_snippet(m) for m in trained_models]
-    best_model = max(trained_models_snippets, key=lambda x:x[metric])
-    return best_model 
-
+    ml_task = analysis.get_ml_task(ml_task_id)
+    trained_models = ml_task.get_trained_models_ids()
+    trained_models_snippets = [ml_task.get_trained_model_snippet(m) for m in trained_models]
+    # Assumes that for your metric, "higher is better"
+    best_model_snippet = max(trained_models_snippets, key=lambda x:x[metric])
+    best_model_id = best_model_snippet["fullModelId"]
+    return ml_task, best_model_id
 
 
+def deploy_with_best_model(client=None,
+                           project_key=None,
+                           analysis_id=None,
+                           ml_task_id=None,
+                           metric=None,
+                           saved_model_name=None,
+                           training_dataset=None):
+    """Create a new Saved Model in the Flow with the 'best model' of a ML task.
 
+    Args:
+        client: A handle on the DSS instance
+        project_key: A string representing the target project key.
+        analysis_id: A string linking to the target visual analysis.
+                     Can be found in the analysis URL or via 
+                     dataikuapi.dss.project.DSSProject.list_analyses().
+        ml_task_id: A string linking to the target MLTask in a given analysis.
+                   Can be found in the ML task URL or via
+                   dataikuapi.dss.analysis.DSSAnalysis.list_ml_tasks().
+        metric: A string defining which metric to use for performance ranking.
+        saved_model_name: A string to name the newly-created Saved Model.
+        training_dataset: A string representing the name of the dataset
+                          used as train set.
 
+    """
+    ml_task, best_model_id = get_best_model(client,
+                                                 project_key,
+                                                 analysis_id,
+                                                 ml_task_id,
+                                                 metric)
+    ml_task.deploy_to_flow(best_model_id,
+                           saved_model_name,
+                           training_dataset)
 
+def update_with_best_model(client=None,
+                           project_key=None,
+                           analysis_id=None,
+                           ml_task_id=None,
+                           metric=None,
+                           saved_model_name=None,
+                           activate=True):
+    """Update an existing Saved Model in the Flow with the 'best model' 
+       of a ML task.
+    """
+    ml_task, best_model_id = get_best_model(client,
+                                            project_key,
+                                            analysis_id,
+                                            ml_task_id,
+                                            metric)
+    training_recipe_name = f"train_{saved_model_name}"
+    ml_task.redeploy_to_flow(model_id=best_model_id,
+                             recipe_name=training_recipe_name,
+                             activate=activate)
\ No newline at end of file
diff --git a/scenarios/reporters.py b/scenarios/reporters.py
index 0186e74..9de62fe 100644
--- a/scenarios/reporters.py
+++ b/scenarios/reporters.py
@@ -15,7 +15,6 @@ def add_email_recipients(client=None, project_key=None, scenario_ids=[], recipie
     if not scenario_ids:
         print("No scenario id specified, will apply to ALL scenarios")
         scenario_ids = [scn["id"] for scn in prj.list_scenarios()]
-    
     for scn_id in scenario_ids:
         handle = prj.get_scenario(scn_id)
         settings = handle.get_settings()

From 70f7804ede68d9da3fc4e32424aea99a1b67ca8d Mon Sep 17 00:00:00 2001
From: Jean-Yves Gerardy <jygerardy@DKUMBPjygerardy.lan>
Date: Fri, 15 Jan 2021 17:52:51 -0500
Subject: [PATCH 13/14] Add scenario and dataset snippets

---
 datasets/__init__.py      |  0
 datasets/dataset_utils.py | 16 +++++++++++++
 scenarios/partitions.py   | 48 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)
 create mode 100644 datasets/__init__.py
 create mode 100644 datasets/dataset_utils.py
 create mode 100644 scenarios/partitions.py

diff --git a/datasets/__init__.py b/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/datasets/dataset_utils.py b/datasets/dataset_utils.py
new file mode 100644
index 0000000..6e97fa0
--- /dev/null
+++ b/datasets/dataset_utils.py
@@ -0,0 +1,16 @@
+import dataiku 
+from datetime import datetime
+
+def get_last_build_date(client=None, project_key=None, dataset=None):
+    """Returns a datetime onject representing the last time an output
+    dataset was built.
+    Args:
+        client: A handle on the target DSS instance.
+        project_key: A string representing the target project key.
+        dataset: name of dataset,
+    """
+	dataset_info = dataiku.Dataset("test_append").get_files_info()
+	last_modif = dataset_info.get("globalPaths")[0].get("lastModified")
+	dt = datetime.fromtimestamp(last_modif/1000)
+	return dt
+
diff --git a/scenarios/partitions.py b/scenarios/partitions.py
new file mode 100644
index 0000000..6b371cb
--- /dev/null
+++ b/scenarios/partitions.py
@@ -0,0 +1,48 @@
+import dataiku
+
+def build_all_partitions(scenario=None,
+                   project_key=None,
+                   input_dataset=None,
+                   output_dataset=None):
+    """Build all output partitions present in an input dataset.
+    Requires input and output datasets to share the same partitioning 
+    format.
+    Args:
+        scenario: A dataiku.scenario.Scenario handle.
+        project_key: A string representing the target project key.
+        input_dataset: Name of the input dataset from which
+                       to list all partitions.
+        output_dataset: String of the name of the dataset to build.
+    """
+    input_dataset = dataiku.Dataset(input_dataset)
+    partitions = dataset.list_partitions()
+    partitions_str = ','.join(partitions)
+    scenario.build_dataset(output_dataset, partitions=partitions_str)
+
+def build_new_partitions(scenario=None,
+                   project_key=None,
+                   input_dataset=None,
+                   output_dataset=None):
+    """Build partitions that are present in the input dataset but
+    not in the output dataset (= new partitions).
+    Requires input and output datasets to share the same partitioning 
+    format.
+    Args:
+        scenario: A dataiku.scenario.Scenario handle.
+        project_key: A string representing the target project key.
+        input_dataset: Name of the input dataset from which
+                       to list all partitions.
+        output_dataset: String of the name of the dataset to build.
+    """
+    input_dataset = dataiku.Dataset(input_dataset)
+    output_dataset = dataiku.Dataset(output_dataset)
+    input_partitions = set(input_dataset.list_partitions())
+    output_partitions = set(output_dataset.list_partitions())
+    new_partitions = input_partitions - output_partitions
+    partitions_str = ','.join(new_partitions)
+    scenario.build_dataset(output_dataset, partitions=partitions_str)
+
+
+
+
+  
\ No newline at end of file

From 027a567d9526953361480bede48d1b55fc409bc4 Mon Sep 17 00:00:00 2001
From: Jean-Yves Gerardy <jygerardy@DKUMBPjygerardy.lan>
Date: Tue, 19 Jan 2021 15:15:48 -0500
Subject: [PATCH 14/14] Add dynamic partition build

---
 datasets/dataset_utils.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/datasets/dataset_utils.py b/datasets/dataset_utils.py
index 6e97fa0..814d9c6 100644
--- a/datasets/dataset_utils.py
+++ b/datasets/dataset_utils.py
@@ -14,3 +14,19 @@ def get_last_build_date(client=None, project_key=None, dataset=None):
 	dt = datetime.fromtimestamp(last_modif/1000)
 	return dt
 
+def build_todays_partition(client=None, project_key=None, dataset=None):
+	 """Build parition of today's date in specified dataset.
+	 Return status of build. 
+    Args:
+        client: A handle on the target DSS instance.
+        project_key: A string representing the target project key.
+        dataset: name of dataset,
+    """
+	now = datetime.now()
+	partition = now.strftime("%Y-%m-%d")
+	project = client.get_project(project_key)
+	dataset = project.get_dataset(dataset)
+	job = dataset.build(partitions=partition)
+	return job.get_status()
+
+