From a5171bcaa5761f915dd482987ffefa49361b9568 Mon Sep 17 00:00:00 2001 From: johannagehlen <88251805+johannagehlen@users.noreply.github.com> Date: Mon, 27 Sep 2021 18:06:09 +0200 Subject: [PATCH 1/3] Create solution_.md solution --- solution_.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 solution_.md diff --git a/solution_.md b/solution_.md new file mode 100644 index 0000000..51e6e3c --- /dev/null +++ b/solution_.md @@ -0,0 +1,16 @@ +Titles: + +MCC Van Dyke et al., 2019: two papers: +- "The rise of coccidioides: forces against the dust devil unleashed" +- "Fantastic yeasts and where to find them: the hidden diversity of dimorphic fungal pathogens" + + +JT Harvey, Applied Ergonomics, 2002: "An analysis of the forces required to drag sheep over various surfaces." + +DW Ziegler et al., 2005: "The Neurocognitive Effects of Alcohol on Adolescents and College Students." + +Graph: +![1output](https://user-images.githubusercontent.com/88251805/134943896-85620ba0-5120-408e-991f-3855f5b77e39.png) + +There seems to be a clear positive correlation between the number of WO students and the total hecoliters of beer consumption in the Netherlands, although causality cannot be infered from this data. +Especially after 2012 beer consumption increases at a similar rate to WO students. From 82297bb3bcf54560a1b894d579f179b936a3ae08 Mon Sep 17 00:00:00 2001 From: johannagehlen <88251805+johannagehlen@users.noreply.github.com> Date: Mon, 13 Mar 2023 11:56:57 +0100 Subject: [PATCH 2/3] Created using Colaboratory --- triplets.ipynb | 3002 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3002 insertions(+) create mode 100644 triplets.ipynb diff --git a/triplets.ipynb b/triplets.ipynb new file mode 100644 index 0000000..33e089b --- /dev/null +++ b/triplets.ipynb @@ -0,0 +1,3002 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyPiEdVddnhwssHMWKmGRPcF", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H5hliYR4D00t" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np \n", + "import networkx as nx \n", + "import io \n", + "from sklearn.feature_selection import mutual_info_classif" + ] + }, + { + "cell_type": "code", + "source": [ + "def cal_mi(df_data):\n", + " num_var = df_data.shape[1]\n", + " mi = pd.DataFrame(np.zeros(shape = (num_var, num_var)))\n", + " \n", + " for rv_f in range(num_var):\n", + " for rv_t in range(num_var):\n", + " var1_ser = df_data.iloc[:, rv_f]\n", + " var2_ser = df_data.iloc[:, rv_t]\n", + "\n", + " mi.iloc[rv_f, rv_t] = mutual_info_classif(np.transpose(var1_ser.to_numpy()).reshape(-1, 1),\n", + " var2_ser.to_numpy(),\n", + " discrete_features=True)\n", + " return mi\n", + " " + ], + "metadata": { + "id": "kcA_GMZGcg0w" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def cond_ent_mat(mi, ch_method):\n", + " num_var = len(mi)\n", + " cond_ent_matrix = pd.DataFrame(np.zeros(shape = (num_var, num_var)))\n", + " for ii in range(num_var):\n", + " for jj in range(num_var):\n", + " \n", + " if ii < jj: # upper triangle\n", + " cond_ent_1 = mi.iloc[ii, ii] - mi.iloc[ii, jj]\n", + " cond_ent_2 = mi.iloc[jj, jj] - mi.iloc[ii, jj]\n", + " else:\n", + " continue\n", + " \n", + " if ch_method == 'min':\n", + " cond_ent_matrix.iloc[ii, jj] = np.min([cond_ent_1, cond_ent_2])\n", + " cond_ent_matrix.iloc[jj, ii] = np.min([cond_ent_1, cond_ent_2])\n", + " elif ch_method == 'mean':\n", + " cond_ent_matrix.iloc[ii, jj] = np.mean([cond_ent_1, cond_ent_2])\n", + " cond_ent_matrix.iloc[jj, ii] = np.mean([cond_ent_1, cond_ent_2])\n", + " elif ch_method == 'directed':\n", + " cond_ent_matrix.iloc[ii, jj] = cond_ent_2\n", + " cond_ent_matrix.iloc[jj, ii] = cond_ent_1\n", + "\n", + " return cond_ent_matrix" + ], + "metadata": { + "id": "-ClzN9q9c7we" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def net_mat_threshold(mat, top_per):\n", + " #Reshapes the input matrix 'mat' into a 1-dimensional array and sorts it with the highest value first\n", + " mat_new = mat.copy()\n", + " reshaped = mat_new.values.flatten()\n", + "\n", + " mat_sorted = sorted(reshaped, reverse=True)\n", + " # Theshold value is calculated as the \"round(mat.size * top_per) - 1\" \n", + " threshold = mat_sorted[round(mat_new.size * top_per) - 1]\n", + " return threshold" + ], + "metadata": { + "id": "mfqGUATFdeSw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def list2tuple_in_list(list_in_list):\n", + " tuple_in_list = []\n", + " for list_ele in list_in_list:\n", + " tuple_in_list.append(tuple(list_ele))\n", + " return tuple_in_list" + ], + "metadata": { + "id": "Oa92kK-TdlFE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import files\n", + "uploaded = files.upload()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "n87kJfNXeQh3", + "outputId": "7abe32a8-d350-44bb-e673-b91598b46862" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving bayesian_discr_imp_median.csv to bayesian_discr_imp_median.csv\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "yfsdata = pd.read_csv(io.BytesIO(uploaded['bayesian_discr_imp_median.csv']))" + ], + "metadata": { + "id": "SgsdBvuvfQkp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "uploaded = files.upload()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 74 + }, + "id": "xFhpZWZ6bune", + "outputId": "48e87786-bfa2-4f08-bc22-2a29e280f7e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving REAL_mi_matrix.csv to REAL_mi_matrix.csv\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "mi_mat = pd.read_csv(io.BytesIO(uploaded['REAL_mi_matrix.csv']))" + ], + "metadata": { + "id": "CmqnVpuafxOq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "mi_mat = mi_mat.drop(\"Unnamed: 0\", axis = 1)" + ], + "metadata": { + "id": "y2idoNb-b5EV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "mi_mat = mi_mat.set_index(mi_mat.columns)" + ], + "metadata": { + "id": "rdj04nhYfxUq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "mi_mat" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 488 + }, + "id": "hk2G6KOwcW5X", + "outputId": "80c899b2-7e74-4af1-bcc7-247719155a5b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " ID ika07 SP bmi07 dkv07 \\\n", + "ID 7.428927 0.826710 0.678451 2.377599 2.623946 \n", + "ika07 0.826710 0.826710 0.000565 0.017296 0.017275 \n", + "SP 0.678451 0.000565 0.678451 0.038969 0.036207 \n", + "bmi07 2.377599 0.017296 0.038969 2.377599 0.182841 \n", + "dkv07 2.623946 0.017275 0.036207 0.182841 2.623946 \n", + "... ... ... ... ... ... \n", + "Alb07 2.059772 0.004996 0.026900 0.088029 0.087555 \n", + "Gp07 1.646898 0.004500 0.006729 0.147910 0.083168 \n", + "bohbut07 2.112376 0.005340 0.004782 0.086725 0.073216 \n", + "beckpisteet 1.992698 0.005888 0.015664 0.060881 0.051413 \n", + "masennus_neliluokkainen 0.401963 0.000797 0.003812 0.021148 0.008700 \n", + "\n", + " syst07 fmd4007 fmd40pr07 fmd6007 fmd60pr07 \\\n", + "ID 1.812784 3.113673 2.079083 2.951174 1.720179 \n", + "ika07 0.013250 0.028769 0.006398 0.023423 0.006746 \n", + "SP 0.062950 0.016493 0.032098 0.021885 0.025041 \n", + "bmi07 0.120139 0.221258 0.078945 0.215781 0.079609 \n", + "dkv07 0.469228 0.213240 0.070349 0.214604 0.066922 \n", + "... ... ... ... ... ... \n", + "Alb07 0.046249 0.134629 0.051707 0.122689 0.045825 \n", + "Gp07 0.059573 0.090277 0.032751 0.091779 0.026222 \n", + "bohbut07 0.039863 0.119953 0.039632 0.119633 0.033847 \n", + "beckpisteet 0.026476 0.092742 0.039581 0.110792 0.031833 \n", + "masennus_neliluokkainen 0.004356 0.025208 0.012242 0.023129 0.008729 \n", + "\n", + " ... Phe07 Tyr07 Ace07 AcAce07 \\\n", + "ID ... 1.839463 1.861470 1.915611 1.819938 \n", + "ika07 ... 0.004886 0.007005 0.008355 0.008283 \n", + "SP ... 0.007877 0.048382 0.005498 0.004616 \n", + "bmi07 ... 0.141310 0.123722 0.068177 0.069353 \n", + "dkv07 ... 0.102123 0.096813 0.071508 0.057682 \n", + "... ... ... ... ... ... \n", + "Alb07 ... 0.135334 0.118419 0.118923 0.113900 \n", + "Gp07 ... 0.288467 0.151240 0.105676 0.119137 \n", + "bohbut07 ... 0.128789 0.125530 0.150931 0.493336 \n", + "beckpisteet ... 0.027561 0.029797 0.029058 0.033513 \n", + "masennus_neliluokkainen ... 0.007077 0.009883 0.008801 0.007500 \n", + "\n", + " Crea07 Alb07 Gp07 bohbut07 beckpisteet \\\n", + "ID 1.936135 2.059772 1.646898 2.112376 1.992698 \n", + "ika07 0.006359 0.004996 0.004500 0.005340 0.005888 \n", + "SP 0.160306 0.026900 0.006729 0.004782 0.015664 \n", + "bmi07 0.088128 0.088029 0.147910 0.086725 0.060881 \n", + "dkv07 0.078587 0.087555 0.083168 0.073216 0.051413 \n", + "... ... ... ... ... ... \n", + "Alb07 0.150669 2.059772 0.206831 0.115041 0.032030 \n", + "Gp07 0.122943 0.206831 1.646898 0.138273 0.025404 \n", + "bohbut07 0.122192 0.115041 0.138273 2.112376 0.027945 \n", + "beckpisteet 0.039429 0.032030 0.025404 0.027945 1.992698 \n", + "masennus_neliluokkainen 0.011058 0.006570 0.007075 0.007552 0.401963 \n", + "\n", + " masennus_neliluokkainen \n", + "ID 0.401963 \n", + "ika07 0.000797 \n", + "SP 0.003812 \n", + "bmi07 0.021148 \n", + "dkv07 0.008700 \n", + "... ... \n", + "Alb07 0.006570 \n", + "Gp07 0.007075 \n", + "bohbut07 0.007552 \n", + "beckpisteet 0.401963 \n", + "masennus_neliluokkainen 0.401963 \n", + "\n", + "[691 rows x 691 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDika07SPbmi07dkv07syst07fmd4007fmd40pr07fmd6007fmd60pr07...Phe07Tyr07Ace07AcAce07Crea07Alb07Gp07bohbut07beckpisteetmasennus_neliluokkainen
ID7.4289270.8267100.6784512.3775992.6239461.8127843.1136732.0790832.9511741.720179...1.8394631.8614701.9156111.8199381.9361352.0597721.6468982.1123761.9926980.401963
ika070.8267100.8267100.0005650.0172960.0172750.0132500.0287690.0063980.0234230.006746...0.0048860.0070050.0083550.0082830.0063590.0049960.0045000.0053400.0058880.000797
SP0.6784510.0005650.6784510.0389690.0362070.0629500.0164930.0320980.0218850.025041...0.0078770.0483820.0054980.0046160.1603060.0269000.0067290.0047820.0156640.003812
bmi072.3775990.0172960.0389692.3775990.1828410.1201390.2212580.0789450.2157810.079609...0.1413100.1237220.0681770.0693530.0881280.0880290.1479100.0867250.0608810.021148
dkv072.6239460.0172750.0362070.1828412.6239460.4692280.2132400.0703490.2146040.066922...0.1021230.0968130.0715080.0576820.0785870.0875550.0831680.0732160.0514130.008700
..................................................................
Alb072.0597720.0049960.0269000.0880290.0875550.0462490.1346290.0517070.1226890.045825...0.1353340.1184190.1189230.1139000.1506692.0597720.2068310.1150410.0320300.006570
Gp071.6468980.0045000.0067290.1479100.0831680.0595730.0902770.0327510.0917790.026222...0.2884670.1512400.1056760.1191370.1229430.2068311.6468980.1382730.0254040.007075
bohbut072.1123760.0053400.0047820.0867250.0732160.0398630.1199530.0396320.1196330.033847...0.1287890.1255300.1509310.4933360.1221920.1150410.1382732.1123760.0279450.007552
beckpisteet1.9926980.0058880.0156640.0608810.0514130.0264760.0927420.0395810.1107920.031833...0.0275610.0297970.0290580.0335130.0394290.0320300.0254040.0279451.9926980.401963
masennus_neliluokkainen0.4019630.0007970.0038120.0211480.0087000.0043560.0252080.0122420.0231290.008729...0.0070770.0098830.0088010.0075000.0110580.0065700.0070750.0075520.4019630.401963
\n", + "

691 rows × 691 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ch_mat = cond_ent_mat(mi_mat, \"mean\")" + ], + "metadata": { + "id": "H80AWYWVrRcb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "ch_mat = ch_mat.set_index(mi_mat.columns)\n", + "ch_mat.columns = mi_mat.columns\n", + "ch_mat" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 488 + }, + "id": "7zvWvl7Ac_4D", + "outputId": "3df87090-4a38-43b8-9c73-f3809b5c0810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " ID ika07 SP bmi07 dkv07 \\\n", + "ID 0.000000 3.301108 3.375238 2.525664 2.402491 \n", + "ika07 3.301108 0.000000 0.752016 1.584859 1.708054 \n", + "SP 3.375238 0.752016 0.000000 1.489056 1.614992 \n", + "bmi07 2.525664 1.584859 1.489056 0.000000 2.317931 \n", + "dkv07 2.402491 1.708054 1.614992 2.317931 0.000000 \n", + "... ... ... ... ... ... \n", + "Alb07 2.684577 1.438245 1.342212 2.130657 2.254305 \n", + "Gp07 2.891015 1.232304 1.155945 1.864339 2.052254 \n", + "bohbut07 2.658275 1.464204 1.390632 2.158262 2.294945 \n", + "beckpisteet 2.718114 1.403817 1.319910 2.124267 2.256909 \n", + "masennus_neliluokkainen 3.513482 0.613540 0.536395 1.368633 1.504255 \n", + "\n", + " syst07 fmd4007 fmd40pr07 fmd6007 fmd60pr07 \\\n", + "ID 2.808072 2.157627 2.674922 2.238876 2.854374 \n", + "ika07 1.306497 1.941422 1.446498 1.865520 1.266698 \n", + "SP 1.182668 1.879569 1.346668 1.792928 1.174274 \n", + "bmi07 1.975053 2.524378 2.149396 2.448606 1.969280 \n", + "dkv07 1.749137 2.655569 2.281165 2.572956 2.105141 \n", + "... ... ... ... ... ... \n", + "Alb07 1.890029 2.452093 2.017721 2.382784 1.844151 \n", + "Gp07 1.670268 2.290008 1.830239 2.207258 1.657317 \n", + "bohbut07 1.922717 2.493072 2.056098 2.412142 1.882431 \n", + "beckpisteet 1.876265 2.460444 1.996309 2.361144 1.824605 \n", + "masennus_neliluokkainen 1.103017 1.732610 1.228281 1.653440 1.052342 \n", + "\n", + " ... Phe07 Tyr07 Ace07 AcAce07 \\\n", + "ID ... 2.794732 2.783729 2.756658 2.804495 \n", + "ika07 ... 1.328201 1.337085 1.362806 1.315041 \n", + "SP ... 1.251080 1.221579 1.291533 1.244578 \n", + "bmi07 ... 1.967221 1.995813 2.078428 2.029415 \n", + "dkv07 ... 2.129582 2.145895 2.198271 2.164260 \n", + "... ... ... ... ... ... \n", + "Alb07 ... 1.814284 1.842202 1.868768 1.825955 \n", + "Gp07 ... 1.454713 1.602943 1.675579 1.614281 \n", + "bohbut07 ... 1.847131 1.861393 1.863063 1.472821 \n", + "beckpisteet ... 1.888519 1.897287 1.925097 1.872805 \n", + "masennus_neliluokkainen ... 1.113637 1.121834 1.149986 1.103451 \n", + "\n", + " Crea07 Alb07 Gp07 bohbut07 beckpisteet \\\n", + "ID 2.746396 2.684577 2.891015 2.658275 2.718114 \n", + "ika07 1.375063 1.438245 1.232304 1.464204 1.403817 \n", + "SP 1.146987 1.342212 1.155945 1.390632 1.319910 \n", + "bmi07 2.068739 2.130657 1.864339 2.158262 2.124267 \n", + "dkv07 2.201454 2.254305 2.052254 2.294945 2.256909 \n", + "... ... ... ... ... ... \n", + "Alb07 1.847285 0.000000 1.646504 1.971033 1.994205 \n", + "Gp07 1.668573 1.646504 0.000000 1.741364 1.794394 \n", + "bohbut07 1.902064 1.971033 1.741364 0.000000 2.024592 \n", + "beckpisteet 1.924987 1.994205 1.794394 2.024592 0.000000 \n", + "masennus_neliluokkainen 1.157991 1.224298 1.017355 1.249618 0.795367 \n", + "\n", + " masennus_neliluokkainen \n", + "ID 3.513482 \n", + "ika07 0.613540 \n", + "SP 0.536395 \n", + "bmi07 1.368633 \n", + "dkv07 1.504255 \n", + "... ... \n", + "Alb07 1.224298 \n", + "Gp07 1.017355 \n", + "bohbut07 1.249618 \n", + "beckpisteet 0.795367 \n", + "masennus_neliluokkainen 0.000000 \n", + "\n", + "[691 rows x 691 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDika07SPbmi07dkv07syst07fmd4007fmd40pr07fmd6007fmd60pr07...Phe07Tyr07Ace07AcAce07Crea07Alb07Gp07bohbut07beckpisteetmasennus_neliluokkainen
ID0.0000003.3011083.3752382.5256642.4024912.8080722.1576272.6749222.2388762.854374...2.7947322.7837292.7566582.8044952.7463962.6845772.8910152.6582752.7181143.513482
ika073.3011080.0000000.7520161.5848591.7080541.3064971.9414221.4464981.8655201.266698...1.3282011.3370851.3628061.3150411.3750631.4382451.2323041.4642041.4038170.613540
SP3.3752380.7520160.0000001.4890561.6149921.1826681.8795691.3466681.7929281.174274...1.2510801.2215791.2915331.2445781.1469871.3422121.1559451.3906321.3199100.536395
bmi072.5256641.5848591.4890560.0000002.3179311.9750532.5243782.1493962.4486061.969280...1.9672211.9958132.0784282.0294152.0687392.1306571.8643392.1582622.1242671.368633
dkv072.4024911.7080541.6149922.3179310.0000001.7491372.6555692.2811652.5729562.105141...2.1295822.1458952.1982712.1642602.2014542.2543052.0522542.2949452.2569091.504255
..................................................................
Alb072.6845771.4382451.3422122.1306572.2543051.8900292.4520932.0177212.3827841.844151...1.8142841.8422021.8687681.8259551.8472850.0000001.6465041.9710331.9942051.224298
Gp072.8910151.2323041.1559451.8643392.0522541.6702682.2900081.8302392.2072581.657317...1.4547131.6029431.6755791.6142811.6685731.6465040.0000001.7413641.7943941.017355
bohbut072.6582751.4642041.3906322.1582622.2949451.9227172.4930722.0560982.4121421.882431...1.8471311.8613931.8630631.4728211.9020641.9710331.7413640.0000002.0245921.249618
beckpisteet2.7181141.4038171.3199102.1242672.2569091.8762652.4604441.9963092.3611441.824605...1.8885191.8972871.9250971.8728051.9249871.9942051.7943942.0245920.0000000.795367
masennus_neliluokkainen3.5134820.6135400.5363951.3686331.5042551.1030171.7326101.2282811.6534401.052342...1.1136371.1218341.1499861.1034511.1579911.2242981.0173551.2496180.7953670.000000
\n", + "

691 rows × 691 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ch_mat.to_csv('ch_mat.csv') \n", + "files.download('ch_mat.csv')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "0PYzO0bIdPV1", + "outputId": "2960f3f6-b011-45b1-f58f-8c4a88de8a80" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_0fde6829-9eec-44db-9926-d396de98e67f\", \"ch_mat.csv\", 8957461)" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Calculate threshold for adj_mat" + ], + "metadata": { + "id": "Owt47pWce_Es" + } + }, + { + "cell_type": "code", + "source": [ + "adj_mat1 = ch_mat" + ], + "metadata": { + "id": "GYt3RiaNultp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "adj_mat1.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "y7ip-ckKdm2x", + "outputId": "f9563b0e-ea20-4173-bda5-9a8586935810" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(691, 691)" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "threshold = net_mat_threshold(adj_mat1, 0.08)" + ], + "metadata": { + "id": "1qr3UtrbuS_O" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "threshold" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "T-WMQUBye0Wb", + "outputId": "9175a5ea-df67-4814-cab5-b20f0598843c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "1.8429603053651986" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Apply threshold to ch_mat to get adj_mat" + ], + "metadata": { + "id": "X3tFUVIBfC-m" + } + }, + { + "cell_type": "code", + "source": [ + "adj_mat1[adj_mat1 < threshold] = 0" + ], + "metadata": { + "id": "jRe1ZuAee8jU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "adj_mat1.to_csv('adj_mat1.csv') \n", + "files.download('adj_mat1.csv')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "y5C_8bkFfX62", + "outputId": "463c6088-0226-4e1c-d5cd-9c509e8c7ef1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_50a8ff6b-e8f3-405f-8be8-fde3d229a2b6\", \"adj_mat1.csv\", 2485987)" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Find the triplets" + ], + "metadata": { + "id": "eaXMbq4NfhPg" + } + }, + { + "cell_type": "code", + "source": [ + "G = nx.from_numpy_array(adj_mat1.to_numpy())" + ], + "metadata": { + "id": "XVdS3KapfmFx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import itertools" + ], + "metadata": { + "id": "TA5_i-__fmLO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "three_cliques = list(itertools.takewhile(lambda x: len(x) <= 3, nx.enumerate_all_cliques(G)))" + ], + "metadata": { + "id": "b_MlXwpEfpm9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "only_three_cliques = [i for i in three_cliques if len(i) == 3]" + ], + "metadata": { + "id": "JTfvFtpNftas" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(only_three_cliques)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lWr6qJaHftc7", + "outputId": "42fadef8-2597-4e22-846c-4491b95276cf" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "342524" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "triadic_cliques_tuple = list2tuple_in_list(only_three_cliques)" + ], + "metadata": { + "id": "p4WiHRYgftfQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_df = pd.DataFrame({'comb': triadic_cliques_tuple})" + ], + "metadata": { + "id": "pJC6s_M1gMhq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_df.to_csv('tri_clq.csv') \n", + "files.download('tri_clq.csv')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "DhEZt2GCgbqy", + "outputId": "161e69a4-0c57-4d76-84c9-f9525050d0e2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_61910784-ba04-407a-9831-86a4ed63ebaf\", \"tri_clq.csv\", 8035796)" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "def turn_to_int(strtup):\n", + " ls = []\n", + " for i in strtup:\n", + " ls.append(int(i))\n", + " tup = tuple(ls)\n", + " return tup" + ], + "metadata": { + "id": "_bRpN8aJgMj-" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_df[\"combint\"] = tri_clq_df[\"comb\"].apply(lambda x: turn_to_int(x))" + ], + "metadata": { + "id": "92Ce1MapeN8f" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "MneuJwjkg4AM", + "outputId": "5d9aa0ed-661b-4b84-a751-e6730e68727e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " comb combint\n", + "0 (0, 1, 6) (0, 1, 6)\n", + "1 (0, 1, 8) (0, 1, 8)\n", + "2 (0, 1, 10) (0, 1, 10)\n", + "3 (0, 1, 12) (0, 1, 12)\n", + "4 (0, 1, 16) (0, 1, 16)\n", + "... ... ...\n", + "342519 (683, 688, 689) (683, 688, 689)\n", + "342520 (685, 686, 688) (685, 686, 688)\n", + "342521 (685, 686, 689) (685, 686, 689)\n", + "342522 (685, 688, 689) (685, 688, 689)\n", + "342523 (686, 688, 689) (686, 688, 689)\n", + "\n", + "[342524 rows x 2 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
combcombint
0(0, 1, 6)(0, 1, 6)
1(0, 1, 8)(0, 1, 8)
2(0, 1, 10)(0, 1, 10)
3(0, 1, 12)(0, 1, 12)
4(0, 1, 16)(0, 1, 16)
.........
342519(683, 688, 689)(683, 688, 689)
342520(685, 686, 688)(685, 686, 688)
342521(685, 686, 689)(685, 686, 689)
342522(685, 688, 689)(685, 688, 689)
342523(686, 688, 689)(686, 688, 689)
\n", + "

342524 rows × 2 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 54 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_df.to_csv('tri_clq.csv') \n", + "files.download('tri_clq.csv')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "R1SQuo6og2DV", + "outputId": "bd4b9981-f9f7-4e43-a6e6-ccb2c768506f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_6b80a31e-8c2f-43b1-8087-81cfecb62224\", \"tri_clq.csv\", 13785036)" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "def ave_ch(ch_matrix, tri_clqs):\n", + " tri_clqs_ch = tri_clqs.copy()\n", + " tri_clqs_ch[\"ave_ch\"] = 0 \n", + "\n", + " for row in range(len(tri_clqs)):\n", + " tup = tri_clqs[\"combint\"][row]\n", + " var1, var2, var3 = tup\n", + "\n", + " mean_ch1 = ch_matrix.iloc[var1, var2]\n", + " mean_ch2 = ch_matrix.iloc[var1, var3]\n", + " mean_ch3 = ch_matrix.iloc[var2, var3]\n", + " \n", + " tri_clqs_ch.loc[row, \"ave_ch\"] = (mean_ch1 + mean_ch2 + mean_ch3)/3\n", + "\n", + " return tri_clqs_ch\n" + ], + "metadata": { + "id": "2Mp5AYcNHeN8" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_ch_df = ave_ch(ch_mat, tri_clq_df)" + ], + "metadata": { + "id": "CQpc4Aj8dHUo" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_ch_df.head(5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "M_B8OUXdl9oO", + "outputId": "ee977765-2d55-4550-f45d-418cdeadbc1f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " comb combint ave_ch\n", + "0 (0, 1, 6) (0, 1, 6) 2.466719\n", + "1 (0, 1, 8) (0, 1, 8) 2.468501\n", + "2 (0, 1, 10) (0, 1, 10) 2.464880\n", + "3 (0, 1, 12) (0, 1, 12) 2.466750\n", + "4 (0, 1, 16) (0, 1, 16) 2.456308" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
combcombintave_ch
0(0, 1, 6)(0, 1, 6)2.466719
1(0, 1, 8)(0, 1, 8)2.468501
2(0, 1, 10)(0, 1, 10)2.464880
3(0, 1, 12)(0, 1, 12)2.466750
4(0, 1, 16)(0, 1, 16)2.456308
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 59 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_ch_df.to_csv('tri_clq_ch_df.csv') \n", + "files.download('tri_clq_ch_df.csv')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "haSVqxZ4mBPD", + "outputId": "818be33b-b708-423c-8270-aad9da37a38a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_55a7bcaf-4bc9-4713-a463-25031941f758\", \"tri_clq_ch_df.csv\", 20165244)" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_ch_sorted_df = tri_clq_ch_df.sort_values(by = \"ave_ch\", ascending = False)" + ], + "metadata": { + "id": "SH8NY9X9lVCI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_ch_sorted_df.head(5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "k0MGkBLLlj7g", + "outputId": "834283f7-d057-4f29-f543-1926deb9acde" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " comb combint ave_ch\n", + "96300 (10, 16, 23) (10, 16, 23) 2.852171\n", + "55234 (6, 16, 23) (6, 16, 23) 2.841799\n", + "111650 (12, 16, 23) (12, 16, 23) 2.824512\n", + "81271 (8, 16, 23) (8, 16, 23) 2.810755\n", + "98300 (10, 19, 23) (10, 19, 23) 2.807840" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
combcombintave_ch
96300(10, 16, 23)(10, 16, 23)2.852171
55234(6, 16, 23)(6, 16, 23)2.841799
111650(12, 16, 23)(12, 16, 23)2.824512
81271(8, 16, 23)(8, 16, 23)2.810755
98300(10, 19, 23)(10, 19, 23)2.807840
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 63 + } + ] + }, + { + "cell_type": "code", + "source": [ + "tri_clq_ch_sorted_df.to_csv('tri_clq_ch_sorted_df.csv') \n", + "files.download('tri_clq_ch_sorted_df.csv')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "eNe1f3A0yA4K", + "outputId": "8fe59bca-8c6d-4de3-db25-8f3020ff3b75" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_05f9edc2-95af-494c-8201-48acbcf85055\", \"tri_clq_ch_sorted_df.csv\", 20165244)" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "top_100_triplets = tri_clq_ch_sorted_df.head(100)" + ], + "metadata": { + "id": "-qSRSjonmPob" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "top_100_triplets.to_csv('top_100_triplets.csv') \n", + "files.download('top_100_triplets.csv')" + ], + "metadata": { + "id": "ecgiGrD0mX6e", + "outputId": "1f11dad9-edd8-45d3-d734-f57469384c7b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + } + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "application/javascript": [ + "download(\"download_44ec9393-3230-49d7-a11d-0952a03654f4\", \"top_100_triplets.csv\", 5509)" + ] + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file From 0070ae63dd6092870ac38da912a905aa7795bc3e Mon Sep 17 00:00:00 2001 From: johannagehlen <88251805+johannagehlen@users.noreply.github.com> Date: Mon, 13 Mar 2023 11:57:10 +0100 Subject: [PATCH 3/3] Delete triplets.ipynb --- triplets.ipynb | 3002 ------------------------------------------------ 1 file changed, 3002 deletions(-) delete mode 100644 triplets.ipynb diff --git a/triplets.ipynb b/triplets.ipynb deleted file mode 100644 index 33e089b..0000000 --- a/triplets.ipynb +++ /dev/null @@ -1,3002 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "authorship_tag": "ABX9TyPiEdVddnhwssHMWKmGRPcF", - "include_colab_link": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H5hliYR4D00t" - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np \n", - "import networkx as nx \n", - "import io \n", - "from sklearn.feature_selection import mutual_info_classif" - ] - }, - { - "cell_type": "code", - "source": [ - "def cal_mi(df_data):\n", - " num_var = df_data.shape[1]\n", - " mi = pd.DataFrame(np.zeros(shape = (num_var, num_var)))\n", - " \n", - " for rv_f in range(num_var):\n", - " for rv_t in range(num_var):\n", - " var1_ser = df_data.iloc[:, rv_f]\n", - " var2_ser = df_data.iloc[:, rv_t]\n", - "\n", - " mi.iloc[rv_f, rv_t] = mutual_info_classif(np.transpose(var1_ser.to_numpy()).reshape(-1, 1),\n", - " var2_ser.to_numpy(),\n", - " discrete_features=True)\n", - " return mi\n", - " " - ], - "metadata": { - "id": "kcA_GMZGcg0w" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def cond_ent_mat(mi, ch_method):\n", - " num_var = len(mi)\n", - " cond_ent_matrix = pd.DataFrame(np.zeros(shape = (num_var, num_var)))\n", - " for ii in range(num_var):\n", - " for jj in range(num_var):\n", - " \n", - " if ii < jj: # upper triangle\n", - " cond_ent_1 = mi.iloc[ii, ii] - mi.iloc[ii, jj]\n", - " cond_ent_2 = mi.iloc[jj, jj] - mi.iloc[ii, jj]\n", - " else:\n", - " continue\n", - " \n", - " if ch_method == 'min':\n", - " cond_ent_matrix.iloc[ii, jj] = np.min([cond_ent_1, cond_ent_2])\n", - " cond_ent_matrix.iloc[jj, ii] = np.min([cond_ent_1, cond_ent_2])\n", - " elif ch_method == 'mean':\n", - " cond_ent_matrix.iloc[ii, jj] = np.mean([cond_ent_1, cond_ent_2])\n", - " cond_ent_matrix.iloc[jj, ii] = np.mean([cond_ent_1, cond_ent_2])\n", - " elif ch_method == 'directed':\n", - " cond_ent_matrix.iloc[ii, jj] = cond_ent_2\n", - " cond_ent_matrix.iloc[jj, ii] = cond_ent_1\n", - "\n", - " return cond_ent_matrix" - ], - "metadata": { - "id": "-ClzN9q9c7we" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def net_mat_threshold(mat, top_per):\n", - " #Reshapes the input matrix 'mat' into a 1-dimensional array and sorts it with the highest value first\n", - " mat_new = mat.copy()\n", - " reshaped = mat_new.values.flatten()\n", - "\n", - " mat_sorted = sorted(reshaped, reverse=True)\n", - " # Theshold value is calculated as the \"round(mat.size * top_per) - 1\" \n", - " threshold = mat_sorted[round(mat_new.size * top_per) - 1]\n", - " return threshold" - ], - "metadata": { - "id": "mfqGUATFdeSw" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "def list2tuple_in_list(list_in_list):\n", - " tuple_in_list = []\n", - " for list_ele in list_in_list:\n", - " tuple_in_list.append(tuple(list_ele))\n", - " return tuple_in_list" - ], - "metadata": { - "id": "Oa92kK-TdlFE" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "from google.colab import files\n", - "uploaded = files.upload()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "n87kJfNXeQh3", - "outputId": "7abe32a8-d350-44bb-e673-b91598b46862" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " \n", - " Upload widget is only available when the cell has been executed in the\n", - " current browser session. Please rerun this cell to enable.\n", - " \n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Saving bayesian_discr_imp_median.csv to bayesian_discr_imp_median.csv\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "yfsdata = pd.read_csv(io.BytesIO(uploaded['bayesian_discr_imp_median.csv']))" - ], - "metadata": { - "id": "SgsdBvuvfQkp" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "uploaded = files.upload()" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 74 - }, - "id": "xFhpZWZ6bune", - "outputId": "48e87786-bfa2-4f08-bc22-2a29e280f7e6" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "text/html": [ - "\n", - " \n", - " \n", - " Upload widget is only available when the cell has been executed in the\n", - " current browser session. Please rerun this cell to enable.\n", - " \n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Saving REAL_mi_matrix.csv to REAL_mi_matrix.csv\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "mi_mat = pd.read_csv(io.BytesIO(uploaded['REAL_mi_matrix.csv']))" - ], - "metadata": { - "id": "CmqnVpuafxOq" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "mi_mat = mi_mat.drop(\"Unnamed: 0\", axis = 1)" - ], - "metadata": { - "id": "y2idoNb-b5EV" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "mi_mat = mi_mat.set_index(mi_mat.columns)" - ], - "metadata": { - "id": "rdj04nhYfxUq" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "mi_mat" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 488 - }, - "id": "hk2G6KOwcW5X", - "outputId": "80c899b2-7e74-4af1-bcc7-247719155a5b" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " ID ika07 SP bmi07 dkv07 \\\n", - "ID 7.428927 0.826710 0.678451 2.377599 2.623946 \n", - "ika07 0.826710 0.826710 0.000565 0.017296 0.017275 \n", - "SP 0.678451 0.000565 0.678451 0.038969 0.036207 \n", - "bmi07 2.377599 0.017296 0.038969 2.377599 0.182841 \n", - "dkv07 2.623946 0.017275 0.036207 0.182841 2.623946 \n", - "... ... ... ... ... ... \n", - "Alb07 2.059772 0.004996 0.026900 0.088029 0.087555 \n", - "Gp07 1.646898 0.004500 0.006729 0.147910 0.083168 \n", - "bohbut07 2.112376 0.005340 0.004782 0.086725 0.073216 \n", - "beckpisteet 1.992698 0.005888 0.015664 0.060881 0.051413 \n", - "masennus_neliluokkainen 0.401963 0.000797 0.003812 0.021148 0.008700 \n", - "\n", - " syst07 fmd4007 fmd40pr07 fmd6007 fmd60pr07 \\\n", - "ID 1.812784 3.113673 2.079083 2.951174 1.720179 \n", - "ika07 0.013250 0.028769 0.006398 0.023423 0.006746 \n", - "SP 0.062950 0.016493 0.032098 0.021885 0.025041 \n", - "bmi07 0.120139 0.221258 0.078945 0.215781 0.079609 \n", - "dkv07 0.469228 0.213240 0.070349 0.214604 0.066922 \n", - "... ... ... ... ... ... \n", - "Alb07 0.046249 0.134629 0.051707 0.122689 0.045825 \n", - "Gp07 0.059573 0.090277 0.032751 0.091779 0.026222 \n", - "bohbut07 0.039863 0.119953 0.039632 0.119633 0.033847 \n", - "beckpisteet 0.026476 0.092742 0.039581 0.110792 0.031833 \n", - "masennus_neliluokkainen 0.004356 0.025208 0.012242 0.023129 0.008729 \n", - "\n", - " ... Phe07 Tyr07 Ace07 AcAce07 \\\n", - "ID ... 1.839463 1.861470 1.915611 1.819938 \n", - "ika07 ... 0.004886 0.007005 0.008355 0.008283 \n", - "SP ... 0.007877 0.048382 0.005498 0.004616 \n", - "bmi07 ... 0.141310 0.123722 0.068177 0.069353 \n", - "dkv07 ... 0.102123 0.096813 0.071508 0.057682 \n", - "... ... ... ... ... ... \n", - "Alb07 ... 0.135334 0.118419 0.118923 0.113900 \n", - "Gp07 ... 0.288467 0.151240 0.105676 0.119137 \n", - "bohbut07 ... 0.128789 0.125530 0.150931 0.493336 \n", - "beckpisteet ... 0.027561 0.029797 0.029058 0.033513 \n", - "masennus_neliluokkainen ... 0.007077 0.009883 0.008801 0.007500 \n", - "\n", - " Crea07 Alb07 Gp07 bohbut07 beckpisteet \\\n", - "ID 1.936135 2.059772 1.646898 2.112376 1.992698 \n", - "ika07 0.006359 0.004996 0.004500 0.005340 0.005888 \n", - "SP 0.160306 0.026900 0.006729 0.004782 0.015664 \n", - "bmi07 0.088128 0.088029 0.147910 0.086725 0.060881 \n", - "dkv07 0.078587 0.087555 0.083168 0.073216 0.051413 \n", - "... ... ... ... ... ... \n", - "Alb07 0.150669 2.059772 0.206831 0.115041 0.032030 \n", - "Gp07 0.122943 0.206831 1.646898 0.138273 0.025404 \n", - "bohbut07 0.122192 0.115041 0.138273 2.112376 0.027945 \n", - "beckpisteet 0.039429 0.032030 0.025404 0.027945 1.992698 \n", - "masennus_neliluokkainen 0.011058 0.006570 0.007075 0.007552 0.401963 \n", - "\n", - " masennus_neliluokkainen \n", - "ID 0.401963 \n", - "ika07 0.000797 \n", - "SP 0.003812 \n", - "bmi07 0.021148 \n", - "dkv07 0.008700 \n", - "... ... \n", - "Alb07 0.006570 \n", - "Gp07 0.007075 \n", - "bohbut07 0.007552 \n", - "beckpisteet 0.401963 \n", - "masennus_neliluokkainen 0.401963 \n", - "\n", - "[691 rows x 691 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDika07SPbmi07dkv07syst07fmd4007fmd40pr07fmd6007fmd60pr07...Phe07Tyr07Ace07AcAce07Crea07Alb07Gp07bohbut07beckpisteetmasennus_neliluokkainen
ID7.4289270.8267100.6784512.3775992.6239461.8127843.1136732.0790832.9511741.720179...1.8394631.8614701.9156111.8199381.9361352.0597721.6468982.1123761.9926980.401963
ika070.8267100.8267100.0005650.0172960.0172750.0132500.0287690.0063980.0234230.006746...0.0048860.0070050.0083550.0082830.0063590.0049960.0045000.0053400.0058880.000797
SP0.6784510.0005650.6784510.0389690.0362070.0629500.0164930.0320980.0218850.025041...0.0078770.0483820.0054980.0046160.1603060.0269000.0067290.0047820.0156640.003812
bmi072.3775990.0172960.0389692.3775990.1828410.1201390.2212580.0789450.2157810.079609...0.1413100.1237220.0681770.0693530.0881280.0880290.1479100.0867250.0608810.021148
dkv072.6239460.0172750.0362070.1828412.6239460.4692280.2132400.0703490.2146040.066922...0.1021230.0968130.0715080.0576820.0785870.0875550.0831680.0732160.0514130.008700
..................................................................
Alb072.0597720.0049960.0269000.0880290.0875550.0462490.1346290.0517070.1226890.045825...0.1353340.1184190.1189230.1139000.1506692.0597720.2068310.1150410.0320300.006570
Gp071.6468980.0045000.0067290.1479100.0831680.0595730.0902770.0327510.0917790.026222...0.2884670.1512400.1056760.1191370.1229430.2068311.6468980.1382730.0254040.007075
bohbut072.1123760.0053400.0047820.0867250.0732160.0398630.1199530.0396320.1196330.033847...0.1287890.1255300.1509310.4933360.1221920.1150410.1382732.1123760.0279450.007552
beckpisteet1.9926980.0058880.0156640.0608810.0514130.0264760.0927420.0395810.1107920.031833...0.0275610.0297970.0290580.0335130.0394290.0320300.0254040.0279451.9926980.401963
masennus_neliluokkainen0.4019630.0007970.0038120.0211480.0087000.0043560.0252080.0122420.0231290.008729...0.0070770.0098830.0088010.0075000.0110580.0065700.0070750.0075520.4019630.401963
\n", - "

691 rows × 691 columns

\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ] - }, - "metadata": {}, - "execution_count": 17 - } - ] - }, - { - "cell_type": "code", - "source": [ - "ch_mat = cond_ent_mat(mi_mat, \"mean\")" - ], - "metadata": { - "id": "H80AWYWVrRcb" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "ch_mat = ch_mat.set_index(mi_mat.columns)\n", - "ch_mat.columns = mi_mat.columns\n", - "ch_mat" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 488 - }, - "id": "7zvWvl7Ac_4D", - "outputId": "3df87090-4a38-43b8-9c73-f3809b5c0810" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " ID ika07 SP bmi07 dkv07 \\\n", - "ID 0.000000 3.301108 3.375238 2.525664 2.402491 \n", - "ika07 3.301108 0.000000 0.752016 1.584859 1.708054 \n", - "SP 3.375238 0.752016 0.000000 1.489056 1.614992 \n", - "bmi07 2.525664 1.584859 1.489056 0.000000 2.317931 \n", - "dkv07 2.402491 1.708054 1.614992 2.317931 0.000000 \n", - "... ... ... ... ... ... \n", - "Alb07 2.684577 1.438245 1.342212 2.130657 2.254305 \n", - "Gp07 2.891015 1.232304 1.155945 1.864339 2.052254 \n", - "bohbut07 2.658275 1.464204 1.390632 2.158262 2.294945 \n", - "beckpisteet 2.718114 1.403817 1.319910 2.124267 2.256909 \n", - "masennus_neliluokkainen 3.513482 0.613540 0.536395 1.368633 1.504255 \n", - "\n", - " syst07 fmd4007 fmd40pr07 fmd6007 fmd60pr07 \\\n", - "ID 2.808072 2.157627 2.674922 2.238876 2.854374 \n", - "ika07 1.306497 1.941422 1.446498 1.865520 1.266698 \n", - "SP 1.182668 1.879569 1.346668 1.792928 1.174274 \n", - "bmi07 1.975053 2.524378 2.149396 2.448606 1.969280 \n", - "dkv07 1.749137 2.655569 2.281165 2.572956 2.105141 \n", - "... ... ... ... ... ... \n", - "Alb07 1.890029 2.452093 2.017721 2.382784 1.844151 \n", - "Gp07 1.670268 2.290008 1.830239 2.207258 1.657317 \n", - "bohbut07 1.922717 2.493072 2.056098 2.412142 1.882431 \n", - "beckpisteet 1.876265 2.460444 1.996309 2.361144 1.824605 \n", - "masennus_neliluokkainen 1.103017 1.732610 1.228281 1.653440 1.052342 \n", - "\n", - " ... Phe07 Tyr07 Ace07 AcAce07 \\\n", - "ID ... 2.794732 2.783729 2.756658 2.804495 \n", - "ika07 ... 1.328201 1.337085 1.362806 1.315041 \n", - "SP ... 1.251080 1.221579 1.291533 1.244578 \n", - "bmi07 ... 1.967221 1.995813 2.078428 2.029415 \n", - "dkv07 ... 2.129582 2.145895 2.198271 2.164260 \n", - "... ... ... ... ... ... \n", - "Alb07 ... 1.814284 1.842202 1.868768 1.825955 \n", - "Gp07 ... 1.454713 1.602943 1.675579 1.614281 \n", - "bohbut07 ... 1.847131 1.861393 1.863063 1.472821 \n", - "beckpisteet ... 1.888519 1.897287 1.925097 1.872805 \n", - "masennus_neliluokkainen ... 1.113637 1.121834 1.149986 1.103451 \n", - "\n", - " Crea07 Alb07 Gp07 bohbut07 beckpisteet \\\n", - "ID 2.746396 2.684577 2.891015 2.658275 2.718114 \n", - "ika07 1.375063 1.438245 1.232304 1.464204 1.403817 \n", - "SP 1.146987 1.342212 1.155945 1.390632 1.319910 \n", - "bmi07 2.068739 2.130657 1.864339 2.158262 2.124267 \n", - "dkv07 2.201454 2.254305 2.052254 2.294945 2.256909 \n", - "... ... ... ... ... ... \n", - "Alb07 1.847285 0.000000 1.646504 1.971033 1.994205 \n", - "Gp07 1.668573 1.646504 0.000000 1.741364 1.794394 \n", - "bohbut07 1.902064 1.971033 1.741364 0.000000 2.024592 \n", - "beckpisteet 1.924987 1.994205 1.794394 2.024592 0.000000 \n", - "masennus_neliluokkainen 1.157991 1.224298 1.017355 1.249618 0.795367 \n", - "\n", - " masennus_neliluokkainen \n", - "ID 3.513482 \n", - "ika07 0.613540 \n", - "SP 0.536395 \n", - "bmi07 1.368633 \n", - "dkv07 1.504255 \n", - "... ... \n", - "Alb07 1.224298 \n", - "Gp07 1.017355 \n", - "bohbut07 1.249618 \n", - "beckpisteet 0.795367 \n", - "masennus_neliluokkainen 0.000000 \n", - "\n", - "[691 rows x 691 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
IDika07SPbmi07dkv07syst07fmd4007fmd40pr07fmd6007fmd60pr07...Phe07Tyr07Ace07AcAce07Crea07Alb07Gp07bohbut07beckpisteetmasennus_neliluokkainen
ID0.0000003.3011083.3752382.5256642.4024912.8080722.1576272.6749222.2388762.854374...2.7947322.7837292.7566582.8044952.7463962.6845772.8910152.6582752.7181143.513482
ika073.3011080.0000000.7520161.5848591.7080541.3064971.9414221.4464981.8655201.266698...1.3282011.3370851.3628061.3150411.3750631.4382451.2323041.4642041.4038170.613540
SP3.3752380.7520160.0000001.4890561.6149921.1826681.8795691.3466681.7929281.174274...1.2510801.2215791.2915331.2445781.1469871.3422121.1559451.3906321.3199100.536395
bmi072.5256641.5848591.4890560.0000002.3179311.9750532.5243782.1493962.4486061.969280...1.9672211.9958132.0784282.0294152.0687392.1306571.8643392.1582622.1242671.368633
dkv072.4024911.7080541.6149922.3179310.0000001.7491372.6555692.2811652.5729562.105141...2.1295822.1458952.1982712.1642602.2014542.2543052.0522542.2949452.2569091.504255
..................................................................
Alb072.6845771.4382451.3422122.1306572.2543051.8900292.4520932.0177212.3827841.844151...1.8142841.8422021.8687681.8259551.8472850.0000001.6465041.9710331.9942051.224298
Gp072.8910151.2323041.1559451.8643392.0522541.6702682.2900081.8302392.2072581.657317...1.4547131.6029431.6755791.6142811.6685731.6465040.0000001.7413641.7943941.017355
bohbut072.6582751.4642041.3906322.1582622.2949451.9227172.4930722.0560982.4121421.882431...1.8471311.8613931.8630631.4728211.9020641.9710331.7413640.0000002.0245921.249618
beckpisteet2.7181141.4038171.3199102.1242672.2569091.8762652.4604441.9963092.3611441.824605...1.8885191.8972871.9250971.8728051.9249871.9942051.7943942.0245920.0000000.795367
masennus_neliluokkainen3.5134820.6135400.5363951.3686331.5042551.1030171.7326101.2282811.6534401.052342...1.1136371.1218341.1499861.1034511.1579911.2242981.0173551.2496180.7953670.000000
\n", - "

691 rows × 691 columns

\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ] - }, - "metadata": {}, - "execution_count": 21 - } - ] - }, - { - "cell_type": "code", - "source": [ - "ch_mat.to_csv('ch_mat.csv') \n", - "files.download('ch_mat.csv')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "0PYzO0bIdPV1", - "outputId": "2960f3f6-b011-45b1-f58f-8c4a88de8a80" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_0fde6829-9eec-44db-9926-d396de98e67f\", \"ch_mat.csv\", 8957461)" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Calculate threshold for adj_mat" - ], - "metadata": { - "id": "Owt47pWce_Es" - } - }, - { - "cell_type": "code", - "source": [ - "adj_mat1 = ch_mat" - ], - "metadata": { - "id": "GYt3RiaNultp" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "adj_mat1.shape" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "y7ip-ckKdm2x", - "outputId": "f9563b0e-ea20-4173-bda5-9a8586935810" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(691, 691)" - ] - }, - "metadata": {}, - "execution_count": 25 - } - ] - }, - { - "cell_type": "code", - "source": [ - "threshold = net_mat_threshold(adj_mat1, 0.08)" - ], - "metadata": { - "id": "1qr3UtrbuS_O" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "threshold" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "T-WMQUBye0Wb", - "outputId": "9175a5ea-df67-4814-cab5-b20f0598843c" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "1.8429603053651986" - ] - }, - "metadata": {}, - "execution_count": 37 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Apply threshold to ch_mat to get adj_mat" - ], - "metadata": { - "id": "X3tFUVIBfC-m" - } - }, - { - "cell_type": "code", - "source": [ - "adj_mat1[adj_mat1 < threshold] = 0" - ], - "metadata": { - "id": "jRe1ZuAee8jU" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "adj_mat1.to_csv('adj_mat1.csv') \n", - "files.download('adj_mat1.csv')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "y5C_8bkFfX62", - "outputId": "463c6088-0226-4e1c-d5cd-9c509e8c7ef1" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_50a8ff6b-e8f3-405f-8be8-fde3d229a2b6\", \"adj_mat1.csv\", 2485987)" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Find the triplets" - ], - "metadata": { - "id": "eaXMbq4NfhPg" - } - }, - { - "cell_type": "code", - "source": [ - "G = nx.from_numpy_array(adj_mat1.to_numpy())" - ], - "metadata": { - "id": "XVdS3KapfmFx" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "import itertools" - ], - "metadata": { - "id": "TA5_i-__fmLO" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "three_cliques = list(itertools.takewhile(lambda x: len(x) <= 3, nx.enumerate_all_cliques(G)))" - ], - "metadata": { - "id": "b_MlXwpEfpm9" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "only_three_cliques = [i for i in three_cliques if len(i) == 3]" - ], - "metadata": { - "id": "JTfvFtpNftas" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "len(only_three_cliques)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "lWr6qJaHftc7", - "outputId": "42fadef8-2597-4e22-846c-4491b95276cf" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "342524" - ] - }, - "metadata": {}, - "execution_count": 45 - } - ] - }, - { - "cell_type": "code", - "source": [ - "triadic_cliques_tuple = list2tuple_in_list(only_three_cliques)" - ], - "metadata": { - "id": "p4WiHRYgftfQ" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_df = pd.DataFrame({'comb': triadic_cliques_tuple})" - ], - "metadata": { - "id": "pJC6s_M1gMhq" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_df.to_csv('tri_clq.csv') \n", - "files.download('tri_clq.csv')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "DhEZt2GCgbqy", - "outputId": "161e69a4-0c57-4d76-84c9-f9525050d0e2" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_61910784-ba04-407a-9831-86a4ed63ebaf\", \"tri_clq.csv\", 8035796)" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": [ - "def turn_to_int(strtup):\n", - " ls = []\n", - " for i in strtup:\n", - " ls.append(int(i))\n", - " tup = tuple(ls)\n", - " return tup" - ], - "metadata": { - "id": "_bRpN8aJgMj-" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_df[\"combint\"] = tri_clq_df[\"comb\"].apply(lambda x: turn_to_int(x))" - ], - "metadata": { - "id": "92Ce1MapeN8f" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_df" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 424 - }, - "id": "MneuJwjkg4AM", - "outputId": "5d9aa0ed-661b-4b84-a751-e6730e68727e" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " comb combint\n", - "0 (0, 1, 6) (0, 1, 6)\n", - "1 (0, 1, 8) (0, 1, 8)\n", - "2 (0, 1, 10) (0, 1, 10)\n", - "3 (0, 1, 12) (0, 1, 12)\n", - "4 (0, 1, 16) (0, 1, 16)\n", - "... ... ...\n", - "342519 (683, 688, 689) (683, 688, 689)\n", - "342520 (685, 686, 688) (685, 686, 688)\n", - "342521 (685, 686, 689) (685, 686, 689)\n", - "342522 (685, 688, 689) (685, 688, 689)\n", - "342523 (686, 688, 689) (686, 688, 689)\n", - "\n", - "[342524 rows x 2 columns]" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
combcombint
0(0, 1, 6)(0, 1, 6)
1(0, 1, 8)(0, 1, 8)
2(0, 1, 10)(0, 1, 10)
3(0, 1, 12)(0, 1, 12)
4(0, 1, 16)(0, 1, 16)
.........
342519(683, 688, 689)(683, 688, 689)
342520(685, 686, 688)(685, 686, 688)
342521(685, 686, 689)(685, 686, 689)
342522(685, 688, 689)(685, 688, 689)
342523(686, 688, 689)(686, 688, 689)
\n", - "

342524 rows × 2 columns

\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ] - }, - "metadata": {}, - "execution_count": 54 - } - ] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_df.to_csv('tri_clq.csv') \n", - "files.download('tri_clq.csv')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "R1SQuo6og2DV", - "outputId": "bd4b9981-f9f7-4e43-a6e6-ccb2c768506f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_6b80a31e-8c2f-43b1-8087-81cfecb62224\", \"tri_clq.csv\", 13785036)" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": [ - "def ave_ch(ch_matrix, tri_clqs):\n", - " tri_clqs_ch = tri_clqs.copy()\n", - " tri_clqs_ch[\"ave_ch\"] = 0 \n", - "\n", - " for row in range(len(tri_clqs)):\n", - " tup = tri_clqs[\"combint\"][row]\n", - " var1, var2, var3 = tup\n", - "\n", - " mean_ch1 = ch_matrix.iloc[var1, var2]\n", - " mean_ch2 = ch_matrix.iloc[var1, var3]\n", - " mean_ch3 = ch_matrix.iloc[var2, var3]\n", - " \n", - " tri_clqs_ch.loc[row, \"ave_ch\"] = (mean_ch1 + mean_ch2 + mean_ch3)/3\n", - "\n", - " return tri_clqs_ch\n" - ], - "metadata": { - "id": "2Mp5AYcNHeN8" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_ch_df = ave_ch(ch_mat, tri_clq_df)" - ], - "metadata": { - "id": "CQpc4Aj8dHUo" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_ch_df.head(5)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "M_B8OUXdl9oO", - "outputId": "ee977765-2d55-4550-f45d-418cdeadbc1f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " comb combint ave_ch\n", - "0 (0, 1, 6) (0, 1, 6) 2.466719\n", - "1 (0, 1, 8) (0, 1, 8) 2.468501\n", - "2 (0, 1, 10) (0, 1, 10) 2.464880\n", - "3 (0, 1, 12) (0, 1, 12) 2.466750\n", - "4 (0, 1, 16) (0, 1, 16) 2.456308" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
combcombintave_ch
0(0, 1, 6)(0, 1, 6)2.466719
1(0, 1, 8)(0, 1, 8)2.468501
2(0, 1, 10)(0, 1, 10)2.464880
3(0, 1, 12)(0, 1, 12)2.466750
4(0, 1, 16)(0, 1, 16)2.456308
\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ] - }, - "metadata": {}, - "execution_count": 59 - } - ] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_ch_df.to_csv('tri_clq_ch_df.csv') \n", - "files.download('tri_clq_ch_df.csv')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "haSVqxZ4mBPD", - "outputId": "818be33b-b708-423c-8270-aad9da37a38a" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_55a7bcaf-4bc9-4713-a463-25031941f758\", \"tri_clq_ch_df.csv\", 20165244)" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_ch_sorted_df = tri_clq_ch_df.sort_values(by = \"ave_ch\", ascending = False)" - ], - "metadata": { - "id": "SH8NY9X9lVCI" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_ch_sorted_df.head(5)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "k0MGkBLLlj7g", - "outputId": "834283f7-d057-4f29-f543-1926deb9acde" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - " comb combint ave_ch\n", - "96300 (10, 16, 23) (10, 16, 23) 2.852171\n", - "55234 (6, 16, 23) (6, 16, 23) 2.841799\n", - "111650 (12, 16, 23) (12, 16, 23) 2.824512\n", - "81271 (8, 16, 23) (8, 16, 23) 2.810755\n", - "98300 (10, 19, 23) (10, 19, 23) 2.807840" - ], - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
combcombintave_ch
96300(10, 16, 23)(10, 16, 23)2.852171
55234(6, 16, 23)(6, 16, 23)2.841799
111650(12, 16, 23)(12, 16, 23)2.824512
81271(8, 16, 23)(8, 16, 23)2.810755
98300(10, 19, 23)(10, 19, 23)2.807840
\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ] - }, - "metadata": {}, - "execution_count": 63 - } - ] - }, - { - "cell_type": "code", - "source": [ - "tri_clq_ch_sorted_df.to_csv('tri_clq_ch_sorted_df.csv') \n", - "files.download('tri_clq_ch_sorted_df.csv')" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - }, - "id": "eNe1f3A0yA4K", - "outputId": "8fe59bca-8c6d-4de3-db25-8f3020ff3b75" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_05f9edc2-95af-494c-8201-48acbcf85055\", \"tri_clq_ch_sorted_df.csv\", 20165244)" - ] - }, - "metadata": {} - } - ] - }, - { - "cell_type": "code", - "source": [ - "top_100_triplets = tri_clq_ch_sorted_df.head(100)" - ], - "metadata": { - "id": "-qSRSjonmPob" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "top_100_triplets.to_csv('top_100_triplets.csv') \n", - "files.download('top_100_triplets.csv')" - ], - "metadata": { - "id": "ecgiGrD0mX6e", - "outputId": "1f11dad9-edd8-45d3-d734-f57469384c7b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 17 - } - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "\n", - " async function download(id, filename, size) {\n", - " if (!google.colab.kernel.accessAllowed) {\n", - " return;\n", - " }\n", - " const div = document.createElement('div');\n", - " const label = document.createElement('label');\n", - " label.textContent = `Downloading \"${filename}\": `;\n", - " div.appendChild(label);\n", - " const progress = document.createElement('progress');\n", - " progress.max = size;\n", - " div.appendChild(progress);\n", - " document.body.appendChild(div);\n", - "\n", - " const buffers = [];\n", - " let downloaded = 0;\n", - "\n", - " const channel = await google.colab.kernel.comms.open(id);\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - "\n", - " for await (const message of channel.messages) {\n", - " // Send a message to notify the kernel that we're ready.\n", - " channel.send({})\n", - " if (message.buffers) {\n", - " for (const buffer of message.buffers) {\n", - " buffers.push(buffer);\n", - " downloaded += buffer.byteLength;\n", - " progress.value = downloaded;\n", - " }\n", - " }\n", - " }\n", - " const blob = new Blob(buffers, {type: 'application/binary'});\n", - " const a = document.createElement('a');\n", - " a.href = window.URL.createObjectURL(blob);\n", - " a.download = filename;\n", - " div.appendChild(a);\n", - " a.click();\n", - " div.remove();\n", - " }\n", - " " - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "" - ], - "application/javascript": [ - "download(\"download_44ec9393-3230-49d7-a11d-0952a03654f4\", \"top_100_triplets.csv\", 5509)" - ] - }, - "metadata": {} - } - ] - } - ] -} \ No newline at end of file