Timestamp feature redesign

master
Petr Masopust 6 years ago
parent 4b8a777036
commit 73a20885c7
  1. 283
      external.ipynb
  2. 6
      inference/src/main/java/cz/aprar/bonitoo/inference/CacheInference.java
  3. 6
      inference/src/test/java/cz/aprar/bonitoo/inference/CacheInferenceTest.java
  4. 2
      inference/src/test/resources/model/encoder.json
  5. BIN
      inference/src/test/resources/model/xgboost-model.bin
  6. 15
      src/train_model.py
  7. 43
      xgboost load.ipynb

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -29,16 +29,17 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"train_input = 's3://customers-bonitoo-cachettl/sagemaker/data/export.csv'"
"train_input = 'file:///home/ehp/soukrome/git/bonitoo/var/data'\n",
"#train_input = 's3://customers-bonitoo-cachettl/sagemaker/data/export.csv'\n"
]
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
@ -69,7 +70,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 26,
"metadata": {
"scrolled": true
},
@ -78,148 +79,146 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Creating tmplob1feqn_algo-1-hpz08_1 ... \n",
"\u001b[1BAttaching to tmplob1feqn_algo-1-hpz08_12mdone\u001b[0m\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Generating setup.py\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m /usr/bin/python3 -m pip install . -r requirements.txt\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Processing /opt/ml/code\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: pandas in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 1)) (0.24.2)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: numpy in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 2)) (1.17.2)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2019.2)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.5/dist-packages (from python-dateutil>=2.5.0->pandas->-r requirements.txt (line 1)) (1.12.0)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Building wheels for collected packages: train-model\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=6619 sha256=6c26988fc9ee9788904607f15cc01766c89ceb130812a83c95a48b0ab4cb0314\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-zzsn3vvg/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Successfully built train-model\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Installing collected packages: train-model\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Successfully installed train-model-1.0.0\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \u001b[33mWARNING: You are using pip version 19.2.3, however version 19.3.1 is available.\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Training Env:\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m {\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"additional_framework_parameters\": {},\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"is_master\": true,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"user_entry_point\": \"train_model.py\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-23-20-20-20-788/source/sourcedir.tar.gz\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"algo-1-hpz08\"\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m ],\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-23-20-20-20-788\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"master_hostname\": \"algo-1-hpz08\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_gpus\": 0,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"input_data_config\": {\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"training\": {\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m }\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_cpus\": 6,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"current_host\": \"algo-1-hpz08\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"log_level\": 20,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"channel_input_dirs\": {\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"module_name\": \"train_model\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"hyperparameters\": {\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_round\": 10,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"max_depth\": 15,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_class\": 8,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_neg_abs\": 200,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_pos_perc\": 0.05,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_pos_abs\": 1000,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"eta\": 0.5,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"objective\": \"multi:softprob\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_neg_perc\": 0.05,\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"eval_metric\": \"mlogloss\"\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"resource_config\": {\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"current_host\": \"algo-1-hpz08\",\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"algo-1-hpz08\"\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m ]\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\"\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m }\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Environment variables:\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m PYTHONPATH=/usr/local/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/usr/lib/python35.zip:/usr/lib/python3.5:/usr/lib/python3.5/plat-x86_64-linux-gnu:/usr/lib/python3.5/lib-dynload:/usr/local/lib/python3.5/dist-packages:/usr/lib/python3/dist-packages\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HPS={\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":10,\"objective\":\"multi:softprob\"}\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_NUM_ROUND=10\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_ABS=1000\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HOSTS=[\"algo-1-hpz08\"]\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_PERC=0.05\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_LOG_LEVEL=20\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_NUM_GPUS=0\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softprob\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-hpz08\",\"hosts\":[\"algo-1-hpz08\"]}\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-23-20-20-20-788/source/sourcedir.tar.gz\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_ETA=0.5\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_CURRENT_HOST=algo-1-hpz08\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_neg_abs\",\"200\",\"--bonitoo_price_neg_perc\",\"0.05\",\"--bonitoo_price_pos_abs\",\"1000\",\"--bonitoo_price_pos_perc\",\"0.05\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"10\",\"--objective\",\"multi:softprob\"]\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_PERC=0.05\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_NUM_CLASS=8\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_MODULE_NAME=train_model\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_NUM_CPUS=6\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-hpz08\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-hpz08\"],\"hyperparameters\":{\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":10,\"objective\":\"multi:softprob\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-23-20-20-20-788\",\"log_level\":20,\"master_hostname\":\"algo-1-hpz08\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-23-20-20-20-788/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":6,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-hpz08\",\"hosts\":[\"algo-1-hpz08\"]},\"user_entry_point\":\"train_model.py\"}\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_ABS=200\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m Invoking script with the following command:\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m /usr/bin/python3 -m train_model --bonitoo_price_neg_abs 200 --bonitoo_price_neg_perc 0.05 --bonitoo_price_pos_abs 1000 --bonitoo_price_pos_perc 0.05 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 10 --objective multi:softprob\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n"
"Creating tmp_3oe6esm_algo-1-z7zik_1 ... \n",
"\u001b[1BAttaching to tmp_3oe6esm_algo-1-z7zik_12mdone\u001b[0m\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Generating setup.py\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m /miniconda3/bin/python -m pip install . -r requirements.txt\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Processing /opt/ml/code\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Requirement already satisfied: pandas in /miniconda3/lib/python3.7/site-packages (from -r requirements.txt (line 1)) (0.25.1)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Requirement already satisfied: numpy in /miniconda3/lib/python3.7/site-packages (from -r requirements.txt (line 2)) (1.17.2)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.6.1 in /miniconda3/lib/python3.7/site-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Requirement already satisfied: pytz>=2017.2 in /miniconda3/lib/python3.7/site-packages (from pandas->-r requirements.txt (line 1)) (2019.3)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /miniconda3/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas->-r requirements.txt (line 1)) (1.12.0)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Building wheels for collected packages: train-model\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=12858 sha256=cbbc20f68f0e136ef85c4050e540996d0f98c89e6b741b5260e7e9873b23ebf7\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-zrodcnfl/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Successfully built train-model\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Installing collected packages: train-model\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Successfully installed train-model-1.0.0\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Training Env:\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m {\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"additional_framework_parameters\": {},\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"channel_input_dirs\": {\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m },\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"current_host\": \"algo-1-z7zik\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"algo-1-z7zik\"\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m ],\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"hyperparameters\": {\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"bonitoo_price_pos_abs\": 1000,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"bonitoo_price_neg_abs\": 200,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"bonitoo_price_pos_perc\": 0.05,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"bonitoo_price_neg_perc\": 0.05,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"num_round\": 10,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"max_depth\": 15,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"eta\": 0.5,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"num_class\": 8,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"objective\": \"multi:softprob\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"eval_metric\": \"mlogloss\"\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m },\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"input_data_config\": {\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"training\": {\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m }\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m },\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"is_master\": true,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-26-08-41-25-312\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"log_level\": 20,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"master_hostname\": \"algo-1-z7zik\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-26-08-41-25-312/source/sourcedir.tar.gz\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"module_name\": \"train_model\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"num_cpus\": 4,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"num_gpus\": 0,\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"resource_config\": {\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"current_host\": \"algo-1-z7zik\",\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"algo-1-z7zik\"\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m ]\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m },\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \"user_entry_point\": \"train_model.py\"\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m }\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Environment variables:\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HOSTS=[\"algo-1-z7zik\"]\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HPS={\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":10,\"objective\":\"multi:softprob\"}\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-z7zik\",\"hosts\":[\"algo-1-z7zik\"]}\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_CURRENT_HOST=algo-1-z7zik\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_MODULE_NAME=train_model\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_LOG_LEVEL=20\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_NUM_CPUS=4\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_NUM_GPUS=0\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-26-08-41-25-312/source/sourcedir.tar.gz\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-z7zik\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-z7zik\"],\"hyperparameters\":{\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":10,\"objective\":\"multi:softprob\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-26-08-41-25-312\",\"log_level\":20,\"master_hostname\":\"algo-1-z7zik\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-26-08-41-25-312/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":4,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-z7zik\",\"hosts\":[\"algo-1-z7zik\"]},\"user_entry_point\":\"train_model.py\"}\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_neg_abs\",\"200\",\"--bonitoo_price_neg_perc\",\"0.05\",\"--bonitoo_price_pos_abs\",\"1000\",\"--bonitoo_price_pos_perc\",\"0.05\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"10\",\"--objective\",\"multi:softprob\"]\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_ABS=1000\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_ABS=200\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_PERC=0.05\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_PERC=0.05\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_NUM_ROUND=10\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_ETA=0.5\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_NUM_CLASS=8\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softprob\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m PYTHONPATH=/miniconda3/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m Invoking script with the following command:\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m /miniconda3/bin/python -m train_model --bonitoo_price_neg_abs 200 --bonitoo_price_neg_perc 0.05 --bonitoo_price_pos_abs 1000 --bonitoo_price_pos_perc 0.05 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 10 --objective multi:softprob\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [0]\ttrain-mlogloss:0.943508\tvalidation-mlogloss:1.02402\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [1]\ttrain-mlogloss:0.66422\tvalidation-mlogloss:0.798095\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [2]\ttrain-mlogloss:0.501615\tvalidation-mlogloss:0.67508\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [3]\ttrain-mlogloss:0.395626\tvalidation-mlogloss:0.600831\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [4]\ttrain-mlogloss:0.327754\tvalidation-mlogloss:0.556783\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [5]\ttrain-mlogloss:0.28117\tvalidation-mlogloss:0.530051\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [6]\ttrain-mlogloss:0.245839\tvalidation-mlogloss:0.512267\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [7]\ttrain-mlogloss:0.220107\tvalidation-mlogloss:0.500467\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [8]\ttrain-mlogloss:0.199213\tvalidation-mlogloss:0.493193\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m [9]\ttrain-mlogloss:0.18576\tvalidation-mlogloss:0.489638\n",
"\u001b[36mtmplob1feqn_algo-1-hpz08_1 exited with code 0\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [0]\ttrain-mlogloss:0.929483\tvalidation-mlogloss:1.0241\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [1]\ttrain-mlogloss:0.645144\tvalidation-mlogloss:0.796067\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [2]\ttrain-mlogloss:0.478228\tvalidation-mlogloss:0.672454\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [3]\ttrain-mlogloss:0.369705\tvalidation-mlogloss:0.599333\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [4]\ttrain-mlogloss:0.297172\tvalidation-mlogloss:0.556288\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [5]\ttrain-mlogloss:0.247464\tvalidation-mlogloss:0.528165\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [6]\ttrain-mlogloss:0.213406\tvalidation-mlogloss:0.509508\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [7]\ttrain-mlogloss:0.186961\tvalidation-mlogloss:0.498194\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [8]\ttrain-mlogloss:0.168055\tvalidation-mlogloss:0.490675\n",
"\u001b[36malgo-1-z7zik_1 |\u001b[0m [9]\ttrain-mlogloss:0.153464\tvalidation-mlogloss:0.485999\n",
"\u001b[36mtmp_3oe6esm_algo-1-z7zik_1 exited with code 0\n",
"\u001b[0mAborting on container exit...\n",
"===== Job Complete =====\n"
]

@ -70,7 +70,7 @@ public class CacheInference {
}
private DMatrix createMatrix(final FlightData data, final ZonedDateTime now) throws XGBoostError {
final float[] arr = new float[16];
final float[] arr = new float[20];
arr[0] = data.getInputPrice().floatValue();
arr[1] = labels.get("type").getOrDefault(data.getType(), 0);
arr[2] = labels.get("flight.inboundSegments.departure").getOrDefault(joinTimestampList(data.getInboundDeparture()), 0);
@ -87,6 +87,10 @@ public class CacheInference {
arr[13] = labels.get("flight.outboundMCX.code").getOrDefault(data.getOutboundMCXAirlines(), 0);
arr[14] = computeDuration(data.getInboundDeparture(), data.getOutboundDeparture());
arr[15] = computePrebooking(data.getOutboundDeparture(), now);
arr[16] = now.getDayOfMonth();
arr[17] = now.getDayOfWeek().getValue() - 1;
arr[18] = data.getOutboundDeparture().get(0).getDayOfMonth();
arr[19] = data.getOutboundDeparture().get(0).getDayOfWeek().getValue() - 1;
return new DMatrix(arr, 1, arr.length);
}

@ -68,7 +68,7 @@ public class CacheInferenceTest {
toList("FR"),
"FR",
336.258
), TTL.D2},
), TTL.D14},
{new FlightData(
"AVIA",
toTimestampList("2020-02-07T02:25:00", "2020-02-07T14:50:00"),
@ -84,7 +84,7 @@ public class CacheInferenceTest {
toList("AY", "AY"),
"AY",
5971.77978
), TTL.D3},
), TTL.D2},
{new FlightData(
"HH",
toTimestampList("2019-11-01T16:30:00", "2019-11-01T23:35:00"),
@ -116,7 +116,7 @@ public class CacheInferenceTest {
toList("unknown"),
"unknown",
0.0
), TTL.NOCACHE}
), TTL.D1}
};
}

File diff suppressed because one or more lines are too long

@ -135,12 +135,17 @@ def compute_prebooking(row):
outdt = fromisoformat(outdeparture.split('|')[0])
return (outdt - tstamp).days
def compute_dom(outdeparture):
outdt = fromisoformat(outdeparture.split('|')[0])
return outdt.day
def compute_dow(outdeparture):
outdt = fromisoformat(outdeparture.split('|')[0])
return outdt.weekday()
def preprocess_data(df):
logging.info('Preprocessing start')
# df = df[df.loc[:,'success'] == True]
# df = df.dropna(subset=['cacheAt'])
booleanDictionary = {True: 1, False: 0}
df.loc[:, 'success'] = df.loc[:, 'success'].replace(booleanDictionary)
@ -155,6 +160,10 @@ def preprocess_data(df):
df.loc[:, 'duration'] = df.apply(lambda x: compute_duration(x), axis=1)
df.loc[:, 'prebooking'] = df.apply(lambda x: compute_prebooking(x), axis=1)
df.loc[:, 'order_dom'] = df.loc[:, 'timestamp'].apply(lambda x: x.day)
df.loc[:, 'order_dow'] = df.loc[:, 'timestamp'].apply(lambda x: x.dayofweek)
df.loc[:, 'flight_dom'] = df.loc[:, 'flight.outboundSegments.departure'].apply(lambda x: compute_dom(x))
df.loc[:, 'flight_dow'] = df.loc[:, 'flight.outboundSegments.departure'].apply(lambda x: compute_dow(x))
return df

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save