Computed features

master
EHP 6 years ago
parent d675ca673a
commit 6e4ffcc292
  1. 22
      export.py
  2. 347
      external.ipynb
  3. 97
      inference/src/main/java/cz/aprar/bonitoo/inference/CacheInference.java
  4. 74
      inference/src/main/java/cz/aprar/bonitoo/inference/FlightData.java
  5. 104
      inference/src/test/java/cz/aprar/bonitoo/inference/CacheInferenceTest.java
  6. 2
      inference/src/test/resources/model/encoder.json
  7. BIN
      inference/src/test/resources/model/xgboost-model.bin
  8. 11
      runner.py
  9. 124
      src/train_model.py
  10. 82
      xgboost load.ipynb

@ -13,19 +13,17 @@ fieldnames = [
'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.airline.code',
'flight.outboundSegments.departure',
'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode',
'flight.outboundSegments.airline.code',
'input.price',
'input.tax',
'input.currency',
'success',
'status',
'output.price',
'output.tax',
'output.currency',
'duration'
'cacheAt',
'cacheExp'
]
counter = 0
@ -50,15 +48,13 @@ with open('export.csv', mode='w') as ef:
[x['origin']['airportCode'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.destination.airportCode': '|'.join(
[x['destination']['airportCode'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.airline.code': '|'.join(
[x['airline']['code'] for x in it['flight']['outboundSegments']]),
'input.price': it['input']['price'],
'input.tax': it['input']['tax'],
'input.currency': it['input']['currency'],
'success': it['success'],
'status': it.get('status', ''),
'output.price': it.get('output', {'price': 0})['price'],
'output.tax': it.get('output', {'tax': 0})['tax'],
'output.currency': it.get('output', {'currency': 0})['currency'],
'duration': it['duration']
'cacheAt': it.get('cacheAt').isoformat() if it.get('cacheAt', None) else '',
'cacheExp': it.get('cacheExp').isoformat() if it.get('cacheExp', None) else ''
}
if 'inboundSegments' in it['flight']:
@ -71,6 +67,8 @@ with open('export.csv', mode='w') as ef:
[x['origin']['airportCode'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.destination.airportCode': '|'.join(
[x['destination']['airportCode'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.airline.code': '|'.join(
[x['airline']['code'] for x in it['flight']['inboundSegments']]),
}
d = {**d, **inb}
writer.writerow(d)

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -29,7 +29,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
@ -54,19 +54,22 @@
" framework_version='0.90-1',\n",
" py_version='py3',\n",
" hyperparameters={\n",
" 'bonitoo_price_limit': 1000,\n",
" 'num_round': 15,\n",
" 'bonitoo_price_pos_abs': 1000,\n",
" 'bonitoo_price_neg_abs': 200,\n",
" 'bonitoo_price_pos_perc': 0.05,\n",
" 'bonitoo_price_neg_perc': 0.05,\n",
" 'num_round': 20,\n",
" 'max_depth': 15,\n",
" 'eta': 0.5,\n",
" 'num_class': 8,\n",
" 'objective': 'multi:softmax',\n",
" 'objective': 'multi:softprob',\n",
" 'eval_metric': 'mlogloss'\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 78,
"metadata": {
"scrolled": true
},
@ -75,194 +78,158 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Creating tmptao5hpuc_algo-1-x6dhm_1 ... \n",
"\u001b[1BAttaching to tmptao5hpuc_algo-1-x6dhm_12mdone\u001b[0m\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Generating setup.py\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/bin/python3 -m pip install . -r requirements.txt\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Processing /opt/ml/code\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: pandas in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 1)) (0.24.2)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: numpy in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 2)) (1.17.2)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2019.2)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.5/dist-packages (from python-dateutil>=2.5.0->pandas->-r requirements.txt (line 1)) (1.12.0)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Building wheels for collected packages: train-model\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=6578 sha256=f2f4bac7a2d0260f534e32b3ac0341fb291f30669499adf59ead09aa62b7ccc5\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-vdfjugbr/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Successfully built train-model\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Installing collected packages: train-model\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Successfully installed train-model-1.0.0\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Training Env:\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m {\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"algo-1-x6dhm\"\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m ],\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"log_level\": 20,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"channel_input_dirs\": {\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_gpus\": 0,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-05-20-16-58-398\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"user_entry_point\": \"train_model.py\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"current_host\": \"algo-1-x6dhm\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-05-20-16-58-398/source/sourcedir.tar.gz\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"master_hostname\": \"algo-1-x6dhm\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"module_name\": \"train_model\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"resource_config\": {\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"current_host\": \"algo-1-x6dhm\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"algo-1-x6dhm\"\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m ]\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"additional_framework_parameters\": {},\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_cpus\": 6,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"input_data_config\": {\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"training\": {\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m }\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"is_master\": true,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"hyperparameters\": {\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"bonitoo_price_limit\": 1000,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"max_depth\": 15,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"objective\": \"multi:softmax\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_class\": 8,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"eta\": 0.5,\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"eval_metric\": \"mlogloss\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_round\": 15\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\"\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m }\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Environment variables:\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_LOG_LEVEL=20\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_NUM_CPUS=6\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_NUM_ROUND=15\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softmax\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HPS={\"bonitoo_price_limit\":1000,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":15,\"objective\":\"multi:softmax\"}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m PYTHONPATH=/usr/local/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/usr/lib/python35.zip:/usr/lib/python3.5:/usr/lib/python3.5/plat-x86_64-linux-gnu:/usr/lib/python3.5/lib-dynload:/usr/local/lib/python3.5/dist-packages:/usr/lib/python3/dist-packages\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-x6dhm\",\"hosts\":[\"algo-1-x6dhm\"]}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_NUM_GPUS=0\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_ETA=0.5\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_NUM_CLASS=8\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-05-20-16-58-398/source/sourcedir.tar.gz\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_limit\",\"1000\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"15\",\"--objective\",\"multi:softmax\"]\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_CURRENT_HOST=algo-1-x6dhm\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_MODULE_NAME=train_model\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_BONITOO_PRICE_LIMIT=1000\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HOSTS=[\"algo-1-x6dhm\"]\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-x6dhm\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-x6dhm\"],\"hyperparameters\":{\"bonitoo_price_limit\":1000,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":15,\"objective\":\"multi:softmax\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-05-20-16-58-398\",\"log_level\":20,\"master_hostname\":\"algo-1-x6dhm\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-05-20-16-58-398/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":6,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-x6dhm\",\"hosts\":[\"algo-1-x6dhm\"]},\"user_entry_point\":\"train_model.py\"}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Invoking script with the following command:\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/bin/python3 -m train_model --bonitoo_price_limit 1000 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 15 --objective multi:softmax\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n"
"Creating tmpsn7kurwo_algo-1-hibva_1 ... \n",
"\u001b[1BAttaching to tmpsn7kurwo_algo-1-hibva_12mdone\u001b[0m\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Generating setup.py\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m /miniconda3/bin/python -m pip install . -r requirements.txt\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Processing /opt/ml/code\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: pandas in /miniconda3/lib/python3.7/site-packages (from -r requirements.txt (line 1)) (0.25.1)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: numpy in /miniconda3/lib/python3.7/site-packages (from -r requirements.txt (line 2)) (1.17.2)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.6.1 in /miniconda3/lib/python3.7/site-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: pytz>=2017.2 in /miniconda3/lib/python3.7/site-packages (from pandas->-r requirements.txt (line 1)) (2019.3)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /miniconda3/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas->-r requirements.txt (line 1)) (1.12.0)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Building wheels for collected packages: train-model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=12596 sha256=1e1372c49fcc19ef6d93ad652d2e5c79e5855068be011b19b2273a3aff1b098f\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-yjganydo/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Successfully built train-model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Installing collected packages: train-model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Successfully installed train-model-1.0.0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Training Env:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"additional_framework_parameters\": {},\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"channel_input_dirs\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"current_host\": \"algo-1-hibva\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"algo-1-hibva\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m ],\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"hyperparameters\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_pos_abs\": 1000,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_neg_abs\": 200,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_pos_perc\": 0.05,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_neg_perc\": 0.05,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_round\": 20,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"max_depth\": 15,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"eta\": 0.5,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_class\": 8,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"objective\": \"multi:softprob\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"eval_metric\": \"mlogloss\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"input_data_config\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"training\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m }\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"is_master\": true,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-19-17-27-30-738\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"log_level\": 20,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"master_hostname\": \"algo-1-hibva\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-19-17-27-30-738/source/sourcedir.tar.gz\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"module_name\": \"train_model\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_cpus\": 4,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_gpus\": 0,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"resource_config\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"current_host\": \"algo-1-hibva\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"algo-1-hibva\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m ]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"user_entry_point\": \"train_model.py\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m }\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Environment variables:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HOSTS=[\"algo-1-hibva\"]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HPS={\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":20,\"objective\":\"multi:softprob\"}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-hibva\",\"hosts\":[\"algo-1-hibva\"]}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_CURRENT_HOST=algo-1-hibva\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_MODULE_NAME=train_model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_LOG_LEVEL=20\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_NUM_CPUS=4\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_NUM_GPUS=0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-19-17-27-30-738/source/sourcedir.tar.gz\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-hibva\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-hibva\"],\"hyperparameters\":{\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":20,\"objective\":\"multi:softprob\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-19-17-27-30-738\",\"log_level\":20,\"master_hostname\":\"algo-1-hibva\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-19-17-27-30-738/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":4,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-hibva\",\"hosts\":[\"algo-1-hibva\"]},\"user_entry_point\":\"train_model.py\"}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_neg_abs\",\"200\",\"--bonitoo_price_neg_perc\",\"0.05\",\"--bonitoo_price_pos_abs\",\"1000\",\"--bonitoo_price_pos_perc\",\"0.05\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"20\",\"--objective\",\"multi:softprob\"]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_ABS=1000\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_ABS=200\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_PERC=0.05\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_PERC=0.05\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_NUM_ROUND=20\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_ETA=0.5\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_NUM_CLASS=8\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softprob\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m PYTHONPATH=/miniconda3/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Invoking script with the following command:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m /miniconda3/bin/python -m train_model --bonitoo_price_neg_abs 200 --bonitoo_price_neg_perc 0.05 --bonitoo_price_pos_abs 1000 --bonitoo_price_pos_perc 0.05 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 20 --objective multi:softprob\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m ERROR:sagemaker-containers:ExecuteUserScriptError:\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Command \"/usr/bin/python3 -m train_model --bonitoo_price_limit 1000 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 15 --objective multi:softmax\"\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:hyperparameters {'num_round': 15, 'num_class': 8, 'objective': 'multi:softmax', 'eta': 0.5, 'max_depth': 15, 'eval_metric': ['mlogloss']}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:channels {'training': {'TrainingInputMode': 'File'}}\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Determined delimiter of CSV input is ','\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Loading csv file export.csv\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Preprocessing start\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m A value is trying to be set on a copy of a slice from a DataFrame.\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Try using .loc[row_indexer,col_indexer] = value instead\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m self.obj[item] = s\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:362: SettingWithCopyWarning: \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m A value is trying to be set on a copy of a slice from a DataFrame.\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Try using .loc[row_indexer,col_indexer] = value instead\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m self.obj[key] = _infer_fill_value(value)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Computing cached times\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Splitting dataset with ration 0.800000\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m if getattr(data, 'base', None) is not None and \\\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/xgboost/core.py:588: FutureWarning: Series.base is deprecated and will be removed in a future version\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m data.base is not None and isinstance(data, np.ndarray) \\\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Single node training.\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Train matrix has 25393 rows\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Validation matrix has 6314 rows\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:cols: ['index', 'adults', 'children', 'infants', 'input.price', 'input.tax', 'status', 'output.price', 'output.tax', 'duration', 'client.channel_codes', 'type_codes', 'flight.inboundSegments.departure_codes', 'flight.inboundSegments.arrival_codes', 'flight.inboundSegments.origin.airportCode_codes', 'flight.inboundSegments.destination.airportCode_codes', 'flight.inboundSegments.flightNumber_codes', 'flight.inboundSegments.travelClass_codes', 'flight.inboundSegments.bookingCode_codes', 'flight.inboundSegments.availability_codes', 'flight.inboundSegments.elapsedFlyingTime_codes', 'flight.outboundSegments.departure_codes', 'flight.outboundSegments.arrival_codes', 'flight.outboundSegments.origin.airportCode_codes', 'flight.outboundSegments.destination.airportCode_codes', 'flight.outboundSegments.flightNumber_codes', 'flight.outboundSegments.travelClass_codes', 'flight.outboundSegments.bookingCode_codes', 'flight.outboundSegments.availability_codes', 'flight.outboundSegments.elapsedFlyingTime_codes', 'flight.inboundEFT_codes', 'flight.outboundEFT_codes', 'input.currency_codes', 'output.currency_codes', 'oneWay_codes']\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Traceback (most recent call last):\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/usr/lib/python3.5/runpy.py\", line 184, in _run_module_as_main\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"__main__\", mod_spec)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/usr/lib/python3.5/runpy.py\", line 85, in _run_code\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m exec(code, run_globals)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/opt/ml/code/train_model.py\", line 417, in <module>\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m checkpoint_config=checkpoint_config\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/opt/ml/code/train_model.py\", line 320, in sagemaker_train\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m train_job(**train_args)\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/opt/ml/code/train_model.py\", line 364, in train_job\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m raise Exception(\"cols: %s\" % str(train_dmatrix.feature_names))\n",
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Exception: cols: ['index', 'adults', 'children', 'infants', 'input.price', 'input.tax', 'status', 'output.price', 'output.tax', 'duration', 'client.channel_codes', 'type_codes', 'flight.inboundSegments.departure_codes', 'flight.inboundSegments.arrival_codes', 'flight.inboundSegments.origin.airportCode_codes', 'flight.inboundSegments.destination.airportCode_codes', 'flight.inboundSegments.flightNumber_codes', 'flight.inboundSegments.travelClass_codes', 'flight.inboundSegments.bookingCode_codes', 'flight.inboundSegments.availability_codes', 'flight.inboundSegments.elapsedFlyingTime_codes', 'flight.outboundSegments.departure_codes', 'flight.outboundSegments.arrival_codes', 'flight.outboundSegments.origin.airportCode_codes', 'flight.outboundSegments.destination.airportCode_codes', 'flight.outboundSegments.flightNumber_codes', 'flight.outboundSegments.travelClass_codes', 'flight.outboundSegments.bookingCode_codes', 'flight.outboundSegments.availability_codes', 'flight.outboundSegments.elapsedFlyingTime_codes', 'flight.inboundEFT_codes', 'flight.outboundEFT_codes', 'input.currency_codes', 'output.currency_codes', 'oneWay_codes']\n",
"\u001b[36mtmptao5hpuc_algo-1-x6dhm_1 exited with code 1\n",
"\u001b[0mAborting on container exit...\n"
]
},
{
"ename": "RuntimeError",
"evalue": "Failed to run: ['docker-compose', '-f', '/tmp/tmptao5hpuc/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/image.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, input_data_config, output_data_config, hyperparameters, job_name)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0m_stream_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocess\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mRuntimeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/image.py\u001b[0m in \u001b[0;36m_stream_output\u001b[0;34m(process)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mexit_code\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 657\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Process exited with code: %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mexit_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 658\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mRuntimeError\u001b[0m: Process exited with code: 1",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-51-ffc1ca8d95fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mestimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'training'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtrain_input\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;31m#estimator = sklearn.attach('sagemaker-scikit-learn-2019-01-25-16-34-38-829')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/estimator.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, inputs, wait, logs, job_name)\u001b[0m\n\u001b[1;32m 337\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_for_training\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjob_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 339\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlatest_training_job\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_TrainingJob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_new\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 340\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mwait\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlatest_training_job\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlogs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/estimator.py\u001b[0m in \u001b[0;36mstart_new\u001b[0;34m(cls, estimator, inputs)\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_add_spot_checkpoint_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocal_mode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 862\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 863\u001b[0;31m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msagemaker_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mtrain_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 864\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 865\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msagemaker_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_current_job_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/session.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, input_mode, input_config, role, job_name, output_config, resource_config, vpc_config, hyperparameters, stop_condition, tags, metric_definitions, enable_network_isolation, image, algorithm_arn, encrypt_inter_container_traffic, train_use_spot_instances, checkpoint_s3_uri, checkpoint_local_path)\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0mLOGGER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Creating training-job with name: %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 391\u001b[0m \u001b[0mLOGGER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"train request: %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_request\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 392\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msagemaker_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_training_job\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mtrain_request\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 393\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 394\u001b[0m def compile_model(\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/local_session.py\u001b[0m in \u001b[0;36mcreate_training_job\u001b[0;34m(self, TrainingJobName, AlgorithmSpecification, OutputDataConfig, ResourceConfig, InputDataConfig, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mtraining_job\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_LocalTrainingJob\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontainer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0mhyperparameters\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"HyperParameters\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"HyperParameters\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0mtraining_job\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mInputDataConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mOutputDataConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhyperparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTrainingJobName\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0mLocalSagemakerClient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_training_jobs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTrainingJobName\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_job\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/entities.py\u001b[0m in \u001b[0;36mstart\u001b[0;34m(self, input_data_config, output_data_config, hyperparameters, job_name)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m self.model_artifacts = self.container.train(\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0minput_data_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_data_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhyperparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m )\n\u001b[1;32m 91\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/image.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, input_data_config, output_data_config, hyperparameters, job_name)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;31m# which contains the exit code and append the command line to it.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Failed to run: %s, %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcompose_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 154\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0martifacts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve_artifacts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompose_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_data_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mRuntimeError\u001b[0m: Failed to run: ['docker-compose', '-f', '/tmp/tmptao5hpuc/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1"
"\u001b[36malgo-1-hibva_1 |\u001b[0m [0]\ttrain-mlogloss:0.848017\tvalidation-mlogloss:0.922091\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [1]\ttrain-mlogloss:0.578424\tvalidation-mlogloss:0.697124\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [2]\ttrain-mlogloss:0.419099\tvalidation-mlogloss:0.572552\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [3]\ttrain-mlogloss:0.31692\tvalidation-mlogloss:0.497591\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [4]\ttrain-mlogloss:0.247843\tvalidation-mlogloss:0.450857\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [5]\ttrain-mlogloss:0.20313\tvalidation-mlogloss:0.42247\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [6]\ttrain-mlogloss:0.171749\tvalidation-mlogloss:0.404928\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [7]\ttrain-mlogloss:0.15009\tvalidation-mlogloss:0.393772\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [8]\ttrain-mlogloss:0.133377\tvalidation-mlogloss:0.385623\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [9]\ttrain-mlogloss:0.120209\tvalidation-mlogloss:0.378456\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [10]\ttrain-mlogloss:0.110155\tvalidation-mlogloss:0.374374\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [11]\ttrain-mlogloss:0.09938\tvalidation-mlogloss:0.36958\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [12]\ttrain-mlogloss:0.092882\tvalidation-mlogloss:0.366292\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [13]\ttrain-mlogloss:0.085552\tvalidation-mlogloss:0.363469\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [14]\ttrain-mlogloss:0.079976\tvalidation-mlogloss:0.363688\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [15]\ttrain-mlogloss:0.075524\tvalidation-mlogloss:0.36325\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [16]\ttrain-mlogloss:0.069857\tvalidation-mlogloss:0.36269\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [17]\ttrain-mlogloss:0.065141\tvalidation-mlogloss:0.361854\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [18]\ttrain-mlogloss:0.062355\tvalidation-mlogloss:0.361638\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [19]\ttrain-mlogloss:0.060227\tvalidation-mlogloss:0.361047\n",
"\u001b[36mtmpsn7kurwo_algo-1-hibva_1 exited with code 0\n",
"\u001b[0mAborting on container exit...\n",
"===== Job Complete =====\n"
]
}
],

@ -12,8 +12,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class CacheInference {
private static final TTL[] TTL_VALUES = TTL.values();
@ -37,45 +41,94 @@ public class CacheInference {
}
public TTL cacheTTL(final FlightData data) throws XGBoostError {
float[][] predicts = booster.predict(createMatrix(data));
return TTL_VALUES[(int) predicts[0][0]];
return cacheTTL(data, ZonedDateTime.now());
}
/**
* Method for backtesting
*/
public TTL cacheTTL(final FlightData data, final ZonedDateTime now) throws XGBoostError {
final float[] predicts = cacheTTLProbability(data, now);
return TTL_VALUES[argmax(predicts)];
}
public float[] cacheTTLProbability(final FlightData data) throws XGBoostError {
return cacheTTLProbability(data, ZonedDateTime.now());
}
/**
* Method for backtesting
*/
public float[] cacheTTLProbability(final FlightData data, final ZonedDateTime now) throws XGBoostError {
final DMatrix matrix = createMatrix(data, now);
float[][] predicts = booster.predict(matrix);
return predicts[0];
}
private Booster loadModel(InputStream model) throws XGBoostError, IOException {
return XGBoost.loadModel(model);
}
private DMatrix createMatrix(final FlightData data) throws XGBoostError {
final float[] arr = new float[18];
arr[0] = labels.get("client.channel").getOrDefault(data.getClientChannel(), 0);
arr[1] = labels.get("type").getOrDefault(data.getType(), 0);
arr[2] = labels.get("flight.inboundSegments.departure").getOrDefault(joinList(data.getInboundDeparture()), 0);
arr[3] = labels.get("flight.inboundSegments.arrival").getOrDefault(joinList(data.getInboundArrival()), 0);
arr[4] = labels.get("flight.inboundSegments.origin.airportCode").getOrDefault(joinList(data.getInboundOrigin()), 0);
arr[5] = labels.get("flight.inboundSegments.destination.airportCode").getOrDefault(joinList(data.getInboundDestination()), 0);
arr[6] = labels.get("flight.outboundSegments.departure").getOrDefault(joinList(data.getOutboundDeparture()), 0);
arr[7] = labels.get("flight.outboundSegments.arrival").getOrDefault(joinList(data.getOutboundArrival()), 0);
arr[8] = labels.get("flight.outboundSegments.origin.airportCode").getOrDefault(joinList(data.getOutboundOrigin()), 0);
arr[9] = labels.get("flight.outboundSegments.destination.airportCode").getOrDefault(joinList(data.getOutboundDestination()), 0);
arr[10] = data.getInputPrice().floatValue();
arr[11] = data.getInputTax().floatValue();
arr[12] = labels.get("input.currency").getOrDefault(data.getInputCurrency(), 0);
arr[13] = data.getStatus().floatValue();
arr[14] = data.getOutputPrice().floatValue();
arr[15] = data.getOutputTax().floatValue();
arr[16] = labels.get("output.currency").getOrDefault(data.getOutputCurrency(), 0);
arr[17] = data.getDuration().floatValue();
private DMatrix createMatrix(final FlightData data, final ZonedDateTime now) throws XGBoostError {
final float[] arr = new float[17];
arr[0] = data.getInputPrice().floatValue();
arr[1] = data.getSuccess() ? 1 : 0;
arr[2] = data.getOutputPrice().floatValue();
arr[3] = labels.get("client.channel").getOrDefault(data.getClientChannel(), 0);
arr[4] = labels.get("type").getOrDefault(data.getType(), 0);
arr[5] = labels.get("flight.inboundSegments.departure").getOrDefault(joinTimestampList(data.getInboundDeparture()), 0);
arr[6] = labels.get("flight.inboundSegments.arrival").getOrDefault(joinTimestampList(data.getInboundArrival()), 0);
arr[7] = labels.get("flight.inboundSegments.origin.airportCode").getOrDefault(joinList(data.getInboundOrigin()), 0);
arr[8] = labels.get("flight.inboundSegments.airline.code").getOrDefault(joinList(data.getInboundAirlines()), 0);
arr[9] = labels.get("flight.inboundSegments.destination.airportCode").getOrDefault(joinList(data.getInboundDestination()), 0);
arr[10] = labels.get("flight.outboundSegments.departure").getOrDefault(joinTimestampList(data.getOutboundDeparture()), 0);
arr[11] = labels.get("flight.outboundSegments.arrival").getOrDefault(joinTimestampList(data.getOutboundArrival()), 0);
arr[12] = labels.get("flight.outboundSegments.origin.airportCode").getOrDefault(joinList(data.getOutboundOrigin()), 0);
arr[13] = labels.get("flight.outboundSegments.destination.airportCode").getOrDefault(joinList(data.getOutboundDestination()), 0);
arr[14] = labels.get("flight.outboundSegments.airline.code").getOrDefault(joinList(data.getOutboundAirlines()), 0);
arr[15] = computeDuration(data.getInboundDeparture(), data.getOutboundDeparture());
arr[16] = computePrebooking(data.getOutboundDeparture(), now);
return new DMatrix(arr, 1, arr.length);
}
private float computeDuration(final List<ZonedDateTime> indeparture, final List<ZonedDateTime> outdeparture) {
if (indeparture.isEmpty()) {
return 0;
}
return ChronoUnit.DAYS.between(outdeparture.get(0), indeparture.get(0));
}
private float computePrebooking(final List<ZonedDateTime> outdeparture, final ZonedDateTime now) {
return ChronoUnit.DAYS.between(now, outdeparture.get(0));
}
private Map<String, Map<String, Integer>> loadLabels(InputStream labels) throws IOException {
final TypeReference<Map<String, Map<String, Integer>>> typeRef = new TypeReference<Map<String, Map<String, Integer>>>() {
};
return mapper.readValue(labels, typeRef);
}
private String joinTimestampList(final List<ZonedDateTime> data) {
return data.stream().map(DateTimeFormatter.ISO_LOCAL_DATE_TIME::format).collect(Collectors.joining("|"));
}
private String joinList(final List<String> data) {
return String.join("|", data);
}
private int argmax(final float[] data) {
int idx = 0;
float max = Float.MIN_VALUE;
for (int i = 0; i < data.length; i++) {
if (data[i] > max) {
max = data[i];
idx = i;
}
}
return idx;
}
}

@ -1,52 +1,48 @@
package cz.aprar.bonitoo.inference;
import java.time.ZonedDateTime;
import java.util.Collections;
import java.util.List;
public class FlightData {
private final String clientChannel;
private final String type;
private final List<String> inboundDeparture;
private final List<String> inboundArrival;
private final List<ZonedDateTime> inboundDeparture;
private final List<ZonedDateTime> inboundArrival;
private final List<String> inboundOrigin;
private final List<String> inboundDestination;
private final List<String> outboundDeparture;
private final List<String> outboundArrival;
private final List<String> inboundAirlines;
private final List<ZonedDateTime> outboundDeparture;
private final List<ZonedDateTime> outboundArrival;
private final List<String> outboundOrigin;
private final List<String> outboundDestination;
private final List<String> outboundAirlines;
private final Double inputPrice;
private final Double inputTax;
private final String inputCurrency;
private final Integer status;
private final Boolean success;
private final Double outputPrice;
private final Double outputTax;
private final String outputCurrency;
private final Integer duration;
public FlightData(final String clientChannel, final String type,
final List<String> inboundDeparture, final List<String> inboundArrival, final List<String> inboundOrigin,
final List<String> inboundDestination, final List<String> outboundDeparture, final List<String> outboundArrival,
final List<String> outboundOrigin, final List<String> outboundDestination, final Double inputPrice,
final Double inputTax, final String inputCurrency, final Integer status, final Double outputPrice,
final Double outputTax, final String outputCurrency, final Integer duration) {
public FlightData(final String clientChannel, final String type, final List<ZonedDateTime> inboundDeparture,
final List<ZonedDateTime> inboundArrival, final List<String> inboundOrigin,
final List<String> inboundDestination, final List<String> inboundAirlines,
final List<ZonedDateTime> outboundDeparture, final List<ZonedDateTime> outboundArrival,
final List<String> outboundOrigin, final List<String> outboundDestination,
final List<String> outboundAirlines, final Double inputPrice, final Boolean success,
final Double outputPrice) {
this.clientChannel = clientChannel;
this.type = type;
this.inboundDeparture = inboundDeparture;
this.inboundArrival = inboundArrival;
this.inboundOrigin = inboundOrigin;
this.inboundDestination = inboundDestination;
this.inboundAirlines = inboundAirlines;
this.outboundDeparture = outboundDeparture;
this.outboundArrival = outboundArrival;
this.outboundOrigin = outboundOrigin;
this.outboundDestination = outboundDestination;
this.outboundAirlines = outboundAirlines;
this.inputPrice = inputPrice;
this.inputTax = inputTax;
this.inputCurrency = inputCurrency;
this.status = status;
this.success = success;
this.outputPrice = outputPrice;
this.outputTax = outputTax;
this.outputCurrency = outputCurrency;
this.duration = duration;
}
public String getClientChannel() {
@ -57,11 +53,11 @@ public class FlightData {
return type;
}
public List<String> getInboundDeparture() {
public List<ZonedDateTime> getInboundDeparture() {
return Collections.unmodifiableList(inboundDeparture);
}
public List<String> getInboundArrival() {
public List<ZonedDateTime> getInboundArrival() {
return Collections.unmodifiableList(inboundArrival);
}
@ -73,11 +69,11 @@ public class FlightData {
return Collections.unmodifiableList(inboundDestination);
}
public List<String> getOutboundDeparture() {
public List<ZonedDateTime> getOutboundDeparture() {
return Collections.unmodifiableList(outboundDeparture);
}
public List<String> getOutboundArrival() {
public List<ZonedDateTime> getOutboundArrival() {
return Collections.unmodifiableList(outboundArrival);
}
@ -93,31 +89,19 @@ public class FlightData {
return inputPrice;
}
public Double getInputTax() {
return inputTax;
}
public String getInputCurrency() {
return inputCurrency;
}
public Integer getStatus() {
return status;
}
public Double getOutputPrice() {
return outputPrice;
}
public Double getOutputTax() {
return outputTax;
public List<String> getInboundAirlines() {
return Collections.unmodifiableList(inboundAirlines);
}
public String getOutputCurrency() {
return outputCurrency;
public List<String> getOutboundAirlines() {
return Collections.unmodifiableList(outboundAirlines);
}
public Integer getDuration() {
return duration;
public Boolean getSuccess() {
return success;
}
}

@ -6,8 +6,11 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.IOException;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import static java.util.Collections.emptyList;
import static org.testng.Assert.assertEquals;
@ -34,23 +37,20 @@ public class CacheInferenceTest {
{new FlightData(
"fly-me-to",
"WS",
toList("2020-05-09T23:59:00", "2020-05-10T10:30:00"),
toList("2020-05-10T08:55:00", "2020-05-10T11:30:00"),
toTimestampList("2020-05-09T23:59:00", "2020-05-10T10:30:00"),
toTimestampList("2020-05-10T08:55:00", "2020-05-10T11:30:00"),
toList("MCO", "FRA"),
toList("FRA", "PRG"),
toList("2020-05-01T09:50:00", "2020-05-01T11:55:00"),
toList("2020-05-01T11:00:00", "2020-05-01T21:55:00"),
toList("LH", "LH"),
toTimestampList("2020-05-01T09:50:00", "2020-05-01T11:55:00"),
toTimestampList("2020-05-01T11:00:00", "2020-05-01T21:55:00"),
toList("PRG", "FRA"),
toList("FRA", "MCO"),
toList("LH", "LH"),
39766.0,
19776.0,
"CZK",
0,
39766.0,
19776.0,
"CZK",
427
), TTL.D1},
Boolean.TRUE,
39766.0
), TTL.D2},
{new FlightData(
"fly-me-to",
"PYTON",
@ -58,78 +58,66 @@ public class CacheInferenceTest {
emptyList(),
emptyList(),
emptyList(),
toList("2019-12-18T05:45:00"),
toList("2019-12-18T08:05:00"),
emptyList(),
toTimestampList("2019-12-18T05:45:00"),
toTimestampList("2019-12-18T08:05:00"),
toList("KRK"),
toList("BVA"),
toList("FR"),
336.258,
0.0,
"CZK",
0,
336.258,
0.0,
"CZK",
2284
), TTL.D7},
Boolean.TRUE,
336.258
), TTL.D2},
{new FlightData(
"levne",
"AVIA",
toList("2020-02-07T02:25:00", "2020-02-07T14:50:00"),
toList("2020-02-07T13:10:00", "2020-02-07T16:55:00"),
toTimestampList("2020-02-07T02:25:00", "2020-02-07T14:50:00"),
toTimestampList("2020-02-07T13:10:00", "2020-02-07T16:55:00"),
toList("LAX", "LHR"),
toList("LHR", "PRG"),
toList("2020-01-28T10:35:00", "2020-01-28T14:40:00"),
toList("2020-01-28T12:45:00", "2020-01-29T01:45:00"),
toList("AA", "BA"),
toTimestampList("2020-01-28T10:35:00", "2020-01-28T14:40:00"),
toTimestampList("2020-01-28T12:45:00", "2020-01-29T01:45:00"),
toList("PRG", "HEL"),
toList("HEL", "LAX"),
toList("AY", "AY"),
5971.77978,
0.0,
"CZK",
0,
15971.77978,
0.0,
"CZK",
551
), TTL.D7},
Boolean.TRUE,
15971.77978
), TTL.D2},
{new FlightData(
"fly-me-to",
"HH",
toList("2019-11-01T16:30:00", "2019-11-01T23:35:00"),
toList("2019-11-01T21:12:00", "2019-11-02T07:45:00"),
toTimestampList("2019-11-01T16:30:00", "2019-11-01T23:35:00"),
toTimestampList("2019-11-01T21:12:00", "2019-11-02T07:45:00"),
toList("YVR", "YUL"),
toList("YUL", "VIE"),
toList("2019-10-18T08:10:00", "2019-10-18T11:30:00"),
toList("2019-10-18T09:40:00", "2019-10-18T21:25:00"),
toList("LH", "LH"),
toTimestampList("2019-10-18T08:10:00", "2019-10-18T11:30:00"),
toTimestampList("2019-10-18T09:40:00", "2019-10-18T21:25:00"),
toList("VIE", "FRA"),
toList("FRA", "YVR"),
toList("LH", "LH"),
17723.0,
7708.0,
"CZK",
0,
17723.0,
7708.0,
"CZK",
1786
), TTL.D1},
Boolean.TRUE,
17723.0
), TTL.D14},
{new FlightData(
"unknown",
"unknown",
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toList("unknown"),
toList("unknown"),
toList("unknown"),
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toList("unknown"),
toList("unknown"),
toList("unknown"),
toList("unknown"),
toList("unknown"),
0.0,
0.0,
"unknown",
0,
0.0,
0.0,
"unknown",
0
Boolean.FALSE,
0.0
), TTL.NOCACHE}
};
}
@ -137,4 +125,10 @@ public class CacheInferenceTest {
private List<String> toList(final String... data) {
return Arrays.asList(data);
}
private List<ZonedDateTime> toTimestampList(final String... data) {
return Arrays.stream(data)
.map(x -> x + "Z")
.map(x -> ZonedDateTime.parse(x, DateTimeFormatter.ISO_ZONED_DATE_TIME)).collect(Collectors.toList());
}
}

File diff suppressed because one or more lines are too long

@ -9,7 +9,7 @@ boto_session = boto3.Session(profile_name='bonitoo', region_name='eu-central-1')
sagemaker_session = sagemaker.Session(boto_session=boto_session)
role = 'Bonitoo_SageMaker_Execution'
train_input = 's3://customers-bonitoo-cachettl/sagemaker/data/export-reduced.csv'
train_input = 's3://customers-bonitoo-cachettl/sagemaker/data/export.csv'
tf = XGBoost(
entry_point='train_model.py',
@ -21,12 +21,15 @@ tf = XGBoost(
framework_version='0.90-1',
py_version='py3',
hyperparameters={
'bonitoo_price_limit': 1000,
'num_round': 15,
'bonitoo_price_pos_abs': 1000,
'bonitoo_price_neg_abs': 200,
'bonitoo_price_pos_perc': 0.05,
'bonitoo_price_neg_perc': 0.05,
'num_round': 20,
'max_depth': 15,
'eta': 0.5,
'num_class': 8,
'objective': 'multi:softmax',
'objective': 'multi:softprob',
'eval_metric': 'mlogloss'
})

@ -5,6 +5,7 @@ import argparse
import pandas as pd
import xgboost as xgb
import numpy as np
import datetime as dt
from sagemaker_algorithm_toolkit import exceptions as exc
from sagemaker_xgboost_container.constants import sm_env_constants
@ -25,19 +26,17 @@ columns = [
'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.airline.code',
'flight.outboundSegments.departure',
'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode',
'flight.outboundSegments.airline.code',
'input.price',
'input.tax',
'input.currency',
'success',
'status',
'output.price',
'output.tax',
'output.currency',
'duration'
'cacheAt',
'cacheExp'
]
catcolumns = [
@ -47,95 +46,112 @@ catcolumns = [
'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.airline.code',
'flight.outboundSegments.departure',
'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode',
'input.currency',
'output.currency'
'flight.outboundSegments.airline.code'
]
floatcolumns = [
'input.price',
'input.tax',
'output.price',
'output.tax'
'output.price'
]
intcolumns = [
'status',
'duration'
timestampcolumns = [
'timestamp',
'cacheAt',
'cacheExp'
]
pkcolumns = [
'flight.inboundSegments.departure',
'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode',
'flight.outboundSegments.departure',
'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode'
]
def excessive_price(inprice, outprice, price_pos_abs, price_neg_abs, price_pos_perc, price_neg_perc):
return outprice - inprice > price_pos_abs or \
inprice - outprice > price_neg_abs or \
outprice > inprice * (1.0 + price_pos_perc) or \
outprice < inprice * (1.0 - price_neg_perc)
def equal_price(inprice, outprice):
return abs(inprice - outprice) < 10
def expected_value(row, price_pos_abs=200, price_neg_abs=100, price_pos_perc=0.05, price_neg_perc=0.05):
# do not cache errors
success = row['success']
if success == 0:
return 0
def expected_value(row, cachetime_df, price_limit=1000):
# TODO sum tax + price ?
inprice, outprice = row['input.price'], row['output.price']
pricestatus = abs(inprice - outprice)
tstamp, cacheAt, cacheExp = row['timestamp'], row['cacheAt'], row['cacheExp']
# TODO correct in cache time ?
timestamps = cachetime_df.loc[row[pkcolumns].fillna(''),'timestamp']
tdiff = timestamps['max'] - timestamps['min']
if tdiff.shape[0] > 0:
incachetime = tdiff[0]
if cacheAt:
incachetime = tstamp - cacheAt
expcachetime = cacheExp - cacheAt
else:
incachetime = np.timedelta64('NaT')
modifier = 0
if pricestatus > price_limit:
if excessive_price(inprice, outprice, price_pos_abs, price_neg_abs, price_pos_perc, price_neg_perc):
modifier = -1
if pricestatus < 0.1:
if not incachetime:
expcachetime = incachetime
if equal_price(inprice, outprice):
if pd.isnull(incachetime):
return 3
modifier = 1
if pd.isnull(incachetime):
return 0
return 1
if incachetime <= np.timedelta64(12,'h'):
if expcachetime <= np.timedelta64(12,'h'):
return 1 + modifier
if incachetime <= np.timedelta64(1,'D'):
if expcachetime <= np.timedelta64(1,'D'):
return 2 + modifier
if incachetime <= np.timedelta64(2,'D'):
if expcachetime <= np.timedelta64(2,'D'):
return 3 + modifier
if incachetime <= np.timedelta64(3,'D'):
if expcachetime <= np.timedelta64(3,'D'):
return 4 + modifier
if incachetime <= np.timedelta64(7,'D'):
if expcachetime <= np.timedelta64(7,'D'):
return 5 + modifier
if incachetime <= np.timedelta64(14,'D'):
if expcachetime <= np.timedelta64(14,'D'):
return 6 + modifier
return min(7, 7 + modifier)
def compute_cached_time(df):
logging.info('Computing cached times')
return df.set_index(pkcolumns).groupby(pkcolumns).agg({'timestamp': ['min', 'max']})
def compute_duration(row):
indeparture, outdeparture = row['flight.inboundSegments.departure'], row['flight.outboundSegments.departure']
if pd.isna(indeparture):
return 0
else:
indt = dt.datetime.fromisoformat(indeparture.split('|')[0])
outdt = dt.datetime.fromisoformat(outdeparture.split('|')[0])
return (indt - outdt).days
def compute_prebooking(row):
tstamp, outdeparture = row['timestamp'], row['flight.outboundSegments.departure']
outdt = dt.datetime.fromisoformat(outdeparture.split('|')[0])
return (outdt - tstamp).days
def preprocess_data(df):
logging.info('Preprocessing start')
df = df[df.loc[:, 'success'] == True]
df.loc[:, 'timestamp'] = df.loc[:, 'timestamp'].apply(lambda x: pd.to_datetime(x))
booleanDictionary = {True: 1, False: 0}
df.loc[:, 'success'] = df.loc[:, 'success'].replace(booleanDictionary)
for ct in timestampcolumns:
df.loc[:, ct] = df.loc[:, ct].apply(lambda x: pd.to_datetime(x))
for cc in catcolumns:
df.loc[:, cc] = df.loc[:, cc].astype('category')
df.loc[:, '%s_codes' % cc] = df[cc].cat.codes
df.loc[:, floatcolumns] = df.loc[:, floatcolumns].astype('float64')
df.loc[:, intcolumns] = df.loc[:, intcolumns].fillna(-1).astype('int32')
df.loc[:, 'duration'] = df.apply(lambda x: compute_duration(x), axis=1)
df.loc[:, 'prebooking'] = df.apply(lambda x: compute_prebooking(x), axis=1)
return df
def remove_non_features(df):
return df.drop(['timestamp', 'success'] + catcolumns, axis=1), df
return df.drop(catcolumns + timestampcolumns, axis=1), df
def train_test_split(df, label, ratio):
logging.info('Splitting dataset with ration %f', ratio)
@ -162,7 +178,6 @@ def get_csv_pandas(files_path):
df = pd.read_csv(os.path.join(files_path, csv_file), header=None)
df.columns = columns
#raise "cols: " + df.info()
return df
except Exception as e:
@ -183,6 +198,7 @@ def get_pandas_df(data_path):
return df
def get_df(train_path, validate_path, content_type='text/csv'):
train_files_size = get_size(train_path) if train_path else 0
val_files_size = get_size(validate_path) if validate_path else 0
@ -226,7 +242,11 @@ def sagemaker_train(train_config, data_config, train_path, val_path, model_dir,
metrics = metrics_mod.initialize()
hyperparameters = hpv.initialize(metrics)
price_limit = int(train_config.get('bonitoo_price_limit', 1000))
price_pos_abs = int(train_config.get('bonitoo_price_pos_abs', 200))
price_neg_abs = int(train_config.get('bonitoo_price_neg_abs', 200))
price_pos_perc = float(train_config.get('bonitoo_price_pos_perc', 0.05))
price_neg_perc = float(train_config.get('bonitoo_price_neg_perc', 0.05))
train_config = {k:v.replace('"', '') for k,v in train_config.items() if not k.startswith('sagemaker_') and not k.startswith('bonitoo_')}
train_config = hyperparameters.validate(train_config)
@ -243,14 +263,13 @@ def sagemaker_train(train_config, data_config, train_path, val_path, model_dir,
train_df, val_df = get_df(train_path, val_path)
train_df = preprocess_data(train_df)
cachetime_df = compute_cached_time(train_df)
train_label_df = train_df.apply(lambda x: expected_value(x, cachetime_df, price_limit), axis=1).to_frame(name='label')
train_label_df = train_df.apply(lambda x: expected_value(x, price_pos_abs, price_neg_abs, price_pos_perc, price_neg_perc), axis=1).to_frame(name='label')
train_df, train_df_orig = remove_non_features(train_df)
val_label_df = None
if val_df:
val_df = preprocess_data(val_df)
val_label_df = val_df.apply(lambda x: expected_value(x, cachetime_df, price_limit), axis=1).to_frame(name='label')
val_label_df = val_df.apply(lambda x: expected_value(x, cachetime_df, price_pos_abs, price_neg_abs, price_pos_perc, price_neg_perc), axis=1).to_frame(name='label')
val_df, val_df_orig = remove_non_features(val_df)
train_dmatrix, val_dmatrix = get_dmatrices(train_df, train_label_df, val_df, val_label_df)
@ -288,6 +307,7 @@ def sagemaker_train(train_config, data_config, train_path, val_path, model_dir,
else:
raise exc.PlatformError("Number of hosts should be an int greater than or equal to 1")
def train_job(train_cfg, train_dmatrix, val_dmatrix, train_df, model_dir, checkpoint_dir, is_master):
# Parse arguments for train() API
early_stopping_rounds = train_cfg.get('early_stopping_rounds')

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save