Feature redesign

master
EHP 6 years ago
parent b200ed833d
commit 4b8a777036
  1. 8
      export.py
  2. 292
      external.ipynb
  3. 33
      inference/src/main/java/cz/aprar/bonitoo/inference/CacheInference.java
  4. 36
      inference/src/main/java/cz/aprar/bonitoo/inference/FlightData.java
  5. 32
      inference/src/test/java/cz/aprar/bonitoo/inference/CacheInferenceTest.java
  6. 2
      inference/src/test/resources/model/encoder.json
  7. BIN
      inference/src/test/resources/model/xgboost-model.bin
  8. 2
      runner.py
  9. 15
      src/train_model.py
  10. 30
      xgboost load.ipynb

@ -7,18 +7,19 @@ db = client.bonitoo
fieldnames = [ fieldnames = [
'timestamp', 'timestamp',
'client.channel',
'type', 'type',
'flight.inboundSegments.departure', 'flight.inboundSegments.departure',
'flight.inboundSegments.arrival', 'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode', 'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode', 'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.airline.code', 'flight.inboundSegments.airline.code',
'flight.inboundMCX.code',
'flight.outboundSegments.departure', 'flight.outboundSegments.departure',
'flight.outboundSegments.arrival', 'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode', 'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode', 'flight.outboundSegments.destination.airportCode',
'flight.outboundSegments.airline.code', 'flight.outboundSegments.airline.code',
'flight.outboundMCX.code',
'input.price', 'input.price',
'success', 'success',
'output.price', 'output.price',
@ -38,7 +39,6 @@ with open('export.csv', mode='w') as ef:
print('Iterace %d' % counter) print('Iterace %d' % counter)
d = { d = {
'timestamp': datetime.fromtimestamp(it['timestamp'] / 1000).isoformat(), 'timestamp': datetime.fromtimestamp(it['timestamp'] / 1000).isoformat(),
'client.channel': it['client']['channel'],
'type': it['type'], 'type': it['type'],
'flight.outboundSegments.departure': '|'.join( 'flight.outboundSegments.departure': '|'.join(
[x['departure'].isoformat() for x in it['flight']['outboundSegments']]), [x['departure'].isoformat() for x in it['flight']['outboundSegments']]),
@ -50,7 +50,8 @@ with open('export.csv', mode='w') as ef:
[x['destination']['airportCode'] for x in it['flight']['outboundSegments']]), [x['destination']['airportCode'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.airline.code': '|'.join( 'flight.outboundSegments.airline.code': '|'.join(
[x['airline']['code'] for x in it['flight']['outboundSegments']]), [x['airline']['code'] for x in it['flight']['outboundSegments']]),
'input.price': it['input']['price'], 'flight.outboundMCX.code': it['flight']['outboundMCX']['code'],
'input.price': it.get('input', {'price': 0})['price'],
'success': it['success'], 'success': it['success'],
'output.price': it.get('output', {'price': 0})['price'], 'output.price': it.get('output', {'price': 0})['price'],
'cacheAt': it.get('cacheAt').isoformat() if it.get('cacheAt', None) else '', 'cacheAt': it.get('cacheAt').isoformat() if it.get('cacheAt', None) else '',
@ -69,6 +70,7 @@ with open('export.csv', mode='w') as ef:
[x['destination']['airportCode'] for x in it['flight']['inboundSegments']]), [x['destination']['airportCode'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.airline.code': '|'.join( 'flight.inboundSegments.airline.code': '|'.join(
[x['airline']['code'] for x in it['flight']['inboundSegments']]), [x['airline']['code'] for x in it['flight']['inboundSegments']]),
'flight.inboundMCX.code': it['flight']['inboundMCX']['code']
} }
d = {**d, **inb} d = {**d, **inb}
writer.writerow(d) writer.writerow(d)

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -17,7 +17,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -29,7 +29,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -38,7 +38,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -58,7 +58,7 @@
" 'bonitoo_price_neg_abs': 200,\n", " 'bonitoo_price_neg_abs': 200,\n",
" 'bonitoo_price_pos_perc': 0.05,\n", " 'bonitoo_price_pos_perc': 0.05,\n",
" 'bonitoo_price_neg_perc': 0.05,\n", " 'bonitoo_price_neg_perc': 0.05,\n",
" 'num_round': 20,\n", " 'num_round': 10,\n",
" 'max_depth': 15,\n", " 'max_depth': 15,\n",
" 'eta': 0.5,\n", " 'eta': 0.5,\n",
" 'num_class': 8,\n", " 'num_class': 8,\n",
@ -69,7 +69,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 60,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -78,156 +78,148 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Creating tmpsn7kurwo_algo-1-hibva_1 ... \n", "Creating tmplob1feqn_algo-1-hpz08_1 ... \n",
"\u001b[1BAttaching to tmpsn7kurwo_algo-1-hibva_12mdone\u001b[0m\n", "\u001b[1BAttaching to tmplob1feqn_algo-1-hpz08_12mdone\u001b[0m\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Generating setup.py\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Generating setup.py\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m /miniconda3/bin/python -m pip install . -r requirements.txt\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m /usr/bin/python3 -m pip install . -r requirements.txt\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Processing /opt/ml/code\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Processing /opt/ml/code\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: pandas in /miniconda3/lib/python3.7/site-packages (from -r requirements.txt (line 1)) (0.25.1)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: pandas in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 1)) (0.24.2)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: numpy in /miniconda3/lib/python3.7/site-packages (from -r requirements.txt (line 2)) (1.17.2)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: numpy in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 2)) (1.17.2)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.6.1 in /miniconda3/lib/python3.7/site-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: pytz>=2017.2 in /miniconda3/lib/python3.7/site-packages (from pandas->-r requirements.txt (line 1)) (2019.3)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2019.2)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /miniconda3/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas->-r requirements.txt (line 1)) (1.12.0)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.5/dist-packages (from python-dateutil>=2.5.0->pandas->-r requirements.txt (line 1)) (1.12.0)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Building wheels for collected packages: train-model\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Building wheels for collected packages: train-model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=12596 sha256=1e1372c49fcc19ef6d93ad652d2e5c79e5855068be011b19b2273a3aff1b098f\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=6619 sha256=6c26988fc9ee9788904607f15cc01766c89ceb130812a83c95a48b0ab4cb0314\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-yjganydo/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-zzsn3vvg/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Successfully built train-model\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Successfully built train-model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Installing collected packages: train-model\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Installing collected packages: train-model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Successfully installed train-model-1.0.0\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Successfully installed train-model-1.0.0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \u001b[33mWARNING: You are using pip version 19.2.3, however version 19.3.1 is available.\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Training Env:\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m {\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Training Env:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"additional_framework_parameters\": {},\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"channel_input_dirs\": {\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"additional_framework_parameters\": {},\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"is_master\": true,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"current_host\": \"algo-1-hibva\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"user_entry_point\": \"train_model.py\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"hosts\": [\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"algo-1-hibva\"\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-23-20-20-20-788/source/sourcedir.tar.gz\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m ],\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"hyperparameters\": {\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"algo-1-hpz08\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_pos_abs\": 1000,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m ],\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_neg_abs\": 200,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_pos_perc\": 0.05,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-23-20-20-20-788\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"bonitoo_price_neg_perc\": 0.05,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_round\": 20,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"max_depth\": 15,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"master_hostname\": \"algo-1-hpz08\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"eta\": 0.5,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_gpus\": 0,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_class\": 8,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"input_data_config\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"objective\": \"multi:softprob\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"training\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"eval_metric\": \"mlogloss\"\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m }\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"input_data_config\": {\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"training\": {\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_cpus\": 6,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"current_host\": \"algo-1-hpz08\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m }\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"log_level\": 20,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"channel_input_dirs\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"is_master\": true,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-19-17-27-30-738\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"module_name\": \"train_model\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"log_level\": 20,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"master_hostname\": \"algo-1-hibva\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"hyperparameters\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_round\": 10,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-19-17-27-30-738/source/sourcedir.tar.gz\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"max_depth\": 15,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"module_name\": \"train_model\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"num_class\": 8,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_neg_abs\": 200,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_cpus\": 4,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_pos_perc\": 0.05,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"num_gpus\": 0,\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_pos_abs\": 1000,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"eta\": 0.5,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"objective\": \"multi:softprob\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"bonitoo_price_neg_perc\": 0.05,\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"resource_config\": {\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"eval_metric\": \"mlogloss\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"current_host\": \"algo-1-hibva\",\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"hosts\": [\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"resource_config\": {\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"algo-1-hibva\"\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"current_host\": \"algo-1-hpz08\",\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m ]\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"hosts\": [\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m },\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"algo-1-hpz08\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \"user_entry_point\": \"train_model.py\"\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m ]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m }\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m },\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\"\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Environment variables:\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m }\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HOSTS=[\"algo-1-hibva\"]\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Environment variables:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HPS={\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":20,\"objective\":\"multi:softprob\"}\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m PYTHONPATH=/usr/local/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/usr/lib/python35.zip:/usr/lib/python3.5:/usr/lib/python3.5/plat-x86_64-linux-gnu:/usr/lib/python3.5/lib-dynload:/usr/local/lib/python3.5/dist-packages:/usr/lib/python3/dist-packages\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HPS={\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":10,\"objective\":\"multi:softprob\"}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_NUM_ROUND=10\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-hibva\",\"hosts\":[\"algo-1-hibva\"]}\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_ABS=1000\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HOSTS=[\"algo-1-hpz08\"]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_CURRENT_HOST=algo-1-hibva\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_MODULE_NAME=train_model\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_PERC=0.05\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_LOG_LEVEL=20\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_LOG_LEVEL=20\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_NUM_GPUS=0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softprob\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-hpz08\",\"hosts\":[\"algo-1-hpz08\"]}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_NUM_CPUS=4\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_NUM_GPUS=0\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-19-17-27-30-738/source/sourcedir.tar.gz\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-hibva\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-hibva\"],\"hyperparameters\":{\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":20,\"objective\":\"multi:softprob\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-19-17-27-30-738\",\"log_level\":20,\"master_hostname\":\"algo-1-hibva\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-19-17-27-30-738/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":4,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-hibva\",\"hosts\":[\"algo-1-hibva\"]},\"user_entry_point\":\"train_model.py\"}\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-23-20-20-20-788/source/sourcedir.tar.gz\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_neg_abs\",\"200\",\"--bonitoo_price_neg_perc\",\"0.05\",\"--bonitoo_price_pos_abs\",\"1000\",\"--bonitoo_price_pos_perc\",\"0.05\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"20\",\"--objective\",\"multi:softprob\"]\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_ABS=1000\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_ABS=200\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_ETA=0.5\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_PERC=0.05\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_CURRENT_HOST=algo-1-hpz08\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_PERC=0.05\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_neg_abs\",\"200\",\"--bonitoo_price_neg_perc\",\"0.05\",\"--bonitoo_price_pos_abs\",\"1000\",\"--bonitoo_price_pos_perc\",\"0.05\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"10\",\"--objective\",\"multi:softprob\"]\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_NUM_ROUND=20\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_POS_PERC=0.05\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_ETA=0.5\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_NUM_CLASS=8\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_NUM_CLASS=8\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softprob\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_MODULE_NAME=train_model\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_NUM_CPUS=6\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m PYTHONPATH=/miniconda3/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/miniconda3/lib/python37.zip:/miniconda3/lib/python3.7:/miniconda3/lib/python3.7/lib-dynload:/miniconda3/lib/python3.7/site-packages\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-hpz08\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-hpz08\"],\"hyperparameters\":{\"bonitoo_price_neg_abs\":200,\"bonitoo_price_neg_perc\":0.05,\"bonitoo_price_pos_abs\":1000,\"bonitoo_price_pos_perc\":0.05,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":10,\"objective\":\"multi:softprob\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-23-20-20-20-788\",\"log_level\":20,\"master_hostname\":\"algo-1-hpz08\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-23-20-20-20-788/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":6,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-hpz08\",\"hosts\":[\"algo-1-hpz08\"]},\"user_entry_point\":\"train_model.py\"}\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_HP_BONITOO_PRICE_NEG_ABS=200\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m Invoking script with the following command:\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m /miniconda3/bin/python -m train_model --bonitoo_price_neg_abs 200 --bonitoo_price_neg_perc 0.05 --bonitoo_price_pos_abs 1000 --bonitoo_price_pos_perc 0.05 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 20 --objective multi:softprob\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m Invoking script with the following command:\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m \n" "\u001b[36malgo-1-hpz08_1 |\u001b[0m /usr/bin/python3 -m train_model --bonitoo_price_neg_abs 200 --bonitoo_price_neg_perc 0.05 --bonitoo_price_pos_abs 1000 --bonitoo_price_pos_perc 0.05 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 10 --objective multi:softprob\n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n",
"\u001b[36malgo-1-hpz08_1 |\u001b[0m \n"
] ]
}, },
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\u001b[36malgo-1-hibva_1 |\u001b[0m [0]\ttrain-mlogloss:0.848017\tvalidation-mlogloss:0.922091\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [0]\ttrain-mlogloss:0.943508\tvalidation-mlogloss:1.02402\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [1]\ttrain-mlogloss:0.578424\tvalidation-mlogloss:0.697124\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [1]\ttrain-mlogloss:0.66422\tvalidation-mlogloss:0.798095\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [2]\ttrain-mlogloss:0.419099\tvalidation-mlogloss:0.572552\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [2]\ttrain-mlogloss:0.501615\tvalidation-mlogloss:0.67508\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [3]\ttrain-mlogloss:0.31692\tvalidation-mlogloss:0.497591\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [3]\ttrain-mlogloss:0.395626\tvalidation-mlogloss:0.600831\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [4]\ttrain-mlogloss:0.247843\tvalidation-mlogloss:0.450857\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [4]\ttrain-mlogloss:0.327754\tvalidation-mlogloss:0.556783\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [5]\ttrain-mlogloss:0.20313\tvalidation-mlogloss:0.42247\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [5]\ttrain-mlogloss:0.28117\tvalidation-mlogloss:0.530051\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [6]\ttrain-mlogloss:0.171749\tvalidation-mlogloss:0.404928\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [6]\ttrain-mlogloss:0.245839\tvalidation-mlogloss:0.512267\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [7]\ttrain-mlogloss:0.15009\tvalidation-mlogloss:0.393772\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [7]\ttrain-mlogloss:0.220107\tvalidation-mlogloss:0.500467\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [8]\ttrain-mlogloss:0.133377\tvalidation-mlogloss:0.385623\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [8]\ttrain-mlogloss:0.199213\tvalidation-mlogloss:0.493193\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [9]\ttrain-mlogloss:0.120209\tvalidation-mlogloss:0.378456\n", "\u001b[36malgo-1-hpz08_1 |\u001b[0m [9]\ttrain-mlogloss:0.18576\tvalidation-mlogloss:0.489638\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [10]\ttrain-mlogloss:0.110155\tvalidation-mlogloss:0.374374\n", "\u001b[36mtmplob1feqn_algo-1-hpz08_1 exited with code 0\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [11]\ttrain-mlogloss:0.09938\tvalidation-mlogloss:0.36958\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [12]\ttrain-mlogloss:0.092882\tvalidation-mlogloss:0.366292\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [13]\ttrain-mlogloss:0.085552\tvalidation-mlogloss:0.363469\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [14]\ttrain-mlogloss:0.079976\tvalidation-mlogloss:0.363688\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [15]\ttrain-mlogloss:0.075524\tvalidation-mlogloss:0.36325\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [16]\ttrain-mlogloss:0.069857\tvalidation-mlogloss:0.36269\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [17]\ttrain-mlogloss:0.065141\tvalidation-mlogloss:0.361854\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [18]\ttrain-mlogloss:0.062355\tvalidation-mlogloss:0.361638\n",
"\u001b[36malgo-1-hibva_1 |\u001b[0m [19]\ttrain-mlogloss:0.060227\tvalidation-mlogloss:0.361047\n",
"\u001b[36mtmpsn7kurwo_algo-1-hibva_1 exited with code 0\n",
"\u001b[0mAborting on container exit...\n", "\u001b[0mAborting on container exit...\n",
"===== Job Complete =====\n" "===== Job Complete =====\n"
] ]

@ -70,24 +70,23 @@ public class CacheInference {
} }
private DMatrix createMatrix(final FlightData data, final ZonedDateTime now) throws XGBoostError { private DMatrix createMatrix(final FlightData data, final ZonedDateTime now) throws XGBoostError {
final float[] arr = new float[17]; final float[] arr = new float[16];
arr[0] = data.getInputPrice().floatValue(); arr[0] = data.getInputPrice().floatValue();
arr[1] = data.getSuccess() ? 1 : 0; arr[1] = labels.get("type").getOrDefault(data.getType(), 0);
arr[2] = data.getOutputPrice().floatValue(); arr[2] = labels.get("flight.inboundSegments.departure").getOrDefault(joinTimestampList(data.getInboundDeparture()), 0);
arr[3] = labels.get("client.channel").getOrDefault(data.getClientChannel(), 0); arr[3] = labels.get("flight.inboundSegments.arrival").getOrDefault(joinTimestampList(data.getInboundArrival()), 0);
arr[4] = labels.get("type").getOrDefault(data.getType(), 0); arr[4] = labels.get("flight.inboundSegments.origin.airportCode").getOrDefault(joinList(data.getInboundOrigin()), 0);
arr[5] = labels.get("flight.inboundSegments.departure").getOrDefault(joinTimestampList(data.getInboundDeparture()), 0); arr[5] = labels.get("flight.inboundSegments.destination.airportCode").getOrDefault(joinList(data.getInboundDestination()), 0);
arr[6] = labels.get("flight.inboundSegments.arrival").getOrDefault(joinTimestampList(data.getInboundArrival()), 0); arr[6] = labels.get("flight.inboundSegments.airline.code").getOrDefault(joinList(data.getInboundAirlines()), 0);
arr[7] = labels.get("flight.inboundSegments.origin.airportCode").getOrDefault(joinList(data.getInboundOrigin()), 0); arr[7] = labels.get("flight.inboundMCX.code").getOrDefault(data.getInboundMCXAirlines(), 0);
arr[8] = labels.get("flight.inboundSegments.airline.code").getOrDefault(joinList(data.getInboundAirlines()), 0); arr[8] = labels.get("flight.outboundSegments.departure").getOrDefault(joinTimestampList(data.getOutboundDeparture()), 0);
arr[9] = labels.get("flight.inboundSegments.destination.airportCode").getOrDefault(joinList(data.getInboundDestination()), 0); arr[9] = labels.get("flight.outboundSegments.arrival").getOrDefault(joinTimestampList(data.getOutboundArrival()), 0);
arr[10] = labels.get("flight.outboundSegments.departure").getOrDefault(joinTimestampList(data.getOutboundDeparture()), 0); arr[10] = labels.get("flight.outboundSegments.origin.airportCode").getOrDefault(joinList(data.getOutboundOrigin()), 0);
arr[11] = labels.get("flight.outboundSegments.arrival").getOrDefault(joinTimestampList(data.getOutboundArrival()), 0); arr[11] = labels.get("flight.outboundSegments.destination.airportCode").getOrDefault(joinList(data.getOutboundDestination()), 0);
arr[12] = labels.get("flight.outboundSegments.origin.airportCode").getOrDefault(joinList(data.getOutboundOrigin()), 0); arr[12] = labels.get("flight.outboundSegments.airline.code").getOrDefault(joinList(data.getOutboundAirlines()), 0);
arr[13] = labels.get("flight.outboundSegments.destination.airportCode").getOrDefault(joinList(data.getOutboundDestination()), 0); arr[13] = labels.get("flight.outboundMCX.code").getOrDefault(data.getOutboundMCXAirlines(), 0);
arr[14] = labels.get("flight.outboundSegments.airline.code").getOrDefault(joinList(data.getOutboundAirlines()), 0); arr[14] = computeDuration(data.getInboundDeparture(), data.getOutboundDeparture());
arr[15] = computeDuration(data.getInboundDeparture(), data.getOutboundDeparture()); arr[15] = computePrebooking(data.getOutboundDeparture(), now);
arr[16] = computePrebooking(data.getOutboundDeparture(), now);
return new DMatrix(arr, 1, arr.length); return new DMatrix(arr, 1, arr.length);
} }

@ -5,48 +5,42 @@ import java.util.Collections;
import java.util.List; import java.util.List;
public class FlightData { public class FlightData {
private final String clientChannel;
private final String type; private final String type;
private final List<ZonedDateTime> inboundDeparture; private final List<ZonedDateTime> inboundDeparture;
private final List<ZonedDateTime> inboundArrival; private final List<ZonedDateTime> inboundArrival;
private final List<String> inboundOrigin; private final List<String> inboundOrigin;
private final List<String> inboundDestination; private final List<String> inboundDestination;
private final List<String> inboundAirlines; private final List<String> inboundAirlines;
private final String inboundMCXAirlines;
private final List<ZonedDateTime> outboundDeparture; private final List<ZonedDateTime> outboundDeparture;
private final List<ZonedDateTime> outboundArrival; private final List<ZonedDateTime> outboundArrival;
private final List<String> outboundOrigin; private final List<String> outboundOrigin;
private final List<String> outboundDestination; private final List<String> outboundDestination;
private final List<String> outboundAirlines; private final List<String> outboundAirlines;
private final String outboundMCXAirlines;
private final Double inputPrice; private final Double inputPrice;
private final Boolean success;
private final Double outputPrice;
public FlightData(final String clientChannel, final String type, final List<ZonedDateTime> inboundDeparture, public FlightData(final String type, final List<ZonedDateTime> inboundDeparture,
final List<ZonedDateTime> inboundArrival, final List<String> inboundOrigin, final List<ZonedDateTime> inboundArrival, final List<String> inboundOrigin,
final List<String> inboundDestination, final List<String> inboundAirlines, final List<String> inboundDestination, final List<String> inboundAirlines,
final List<ZonedDateTime> outboundDeparture, final List<ZonedDateTime> outboundArrival, final String inboundMCXAirlines, final List<ZonedDateTime> outboundDeparture,
final List<String> outboundOrigin, final List<String> outboundDestination, final List<ZonedDateTime> outboundArrival, final List<String> outboundOrigin,
final List<String> outboundAirlines, final Double inputPrice, final Boolean success, final List<String> outboundDestination, final List<String> outboundAirlines,
final Double outputPrice) { final String outboundMCXAirlines, final Double inputPrice) {
this.clientChannel = clientChannel;
this.type = type; this.type = type;
this.inboundDeparture = inboundDeparture; this.inboundDeparture = inboundDeparture;
this.inboundArrival = inboundArrival; this.inboundArrival = inboundArrival;
this.inboundOrigin = inboundOrigin; this.inboundOrigin = inboundOrigin;
this.inboundDestination = inboundDestination; this.inboundDestination = inboundDestination;
this.inboundAirlines = inboundAirlines; this.inboundAirlines = inboundAirlines;
this.inboundMCXAirlines = inboundMCXAirlines;
this.outboundDeparture = outboundDeparture; this.outboundDeparture = outboundDeparture;
this.outboundArrival = outboundArrival; this.outboundArrival = outboundArrival;
this.outboundOrigin = outboundOrigin; this.outboundOrigin = outboundOrigin;
this.outboundDestination = outboundDestination; this.outboundDestination = outboundDestination;
this.outboundAirlines = outboundAirlines; this.outboundAirlines = outboundAirlines;
this.outboundMCXAirlines = outboundMCXAirlines;
this.inputPrice = inputPrice; this.inputPrice = inputPrice;
this.success = success;
this.outputPrice = outputPrice;
}
public String getClientChannel() {
return clientChannel;
} }
public String getType() { public String getType() {
@ -89,10 +83,6 @@ public class FlightData {
return inputPrice; return inputPrice;
} }
public Double getOutputPrice() {
return outputPrice;
}
public List<String> getInboundAirlines() { public List<String> getInboundAirlines() {
return Collections.unmodifiableList(inboundAirlines); return Collections.unmodifiableList(inboundAirlines);
} }
@ -101,7 +91,11 @@ public class FlightData {
return Collections.unmodifiableList(outboundAirlines); return Collections.unmodifiableList(outboundAirlines);
} }
public Boolean getSuccess() { public String getInboundMCXAirlines() {
return success; return inboundMCXAirlines;
}
public String getOutboundMCXAirlines() {
return outboundMCXAirlines;
} }
} }

@ -6,6 +6,7 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.IOException; import java.io.IOException;
import java.time.ZoneId;
import java.time.ZonedDateTime; import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.Arrays; import java.util.Arrays;
@ -16,6 +17,8 @@ import static java.util.Collections.emptyList;
import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertEquals;
public class CacheInferenceTest { public class CacheInferenceTest {
private static final ZonedDateTime TEST_DATE = ZonedDateTime.of(2019, 10, 10, 0, 0, 0, 0, ZoneId.of("UTC"));
private CacheInference ci; private CacheInference ci;
@BeforeClass @BeforeClass
@ -27,7 +30,7 @@ public class CacheInferenceTest {
@Test(dataProvider = "inferenceData") @Test(dataProvider = "inferenceData")
void testInference(final FlightData input, final TTL expected) throws XGBoostError { void testInference(final FlightData input, final TTL expected) throws XGBoostError {
final TTL result = ci.cacheTTL(input); final TTL result = ci.cacheTTL(input, TEST_DATE);
assertEquals(result, expected); assertEquals(result, expected);
} }
@ -35,88 +38,83 @@ public class CacheInferenceTest {
public Object[][] inferenceData() { public Object[][] inferenceData() {
return new Object[][]{ return new Object[][]{
{new FlightData( {new FlightData(
"fly-me-to",
"WS", "WS",
toTimestampList("2020-05-09T23:59:00", "2020-05-10T10:30:00"), toTimestampList("2020-05-09T23:59:00", "2020-05-10T10:30:00"),
toTimestampList("2020-05-10T08:55:00", "2020-05-10T11:30:00"), toTimestampList("2020-05-10T08:55:00", "2020-05-10T11:30:00"),
toList("MCO", "FRA"), toList("MCO", "FRA"),
toList("FRA", "PRG"), toList("FRA", "PRG"),
toList("LH", "LH"), toList("LH", "LH"),
"LH",
toTimestampList("2020-05-01T09:50:00", "2020-05-01T11:55:00"), toTimestampList("2020-05-01T09:50:00", "2020-05-01T11:55:00"),
toTimestampList("2020-05-01T11:00:00", "2020-05-01T21:55:00"), toTimestampList("2020-05-01T11:00:00", "2020-05-01T21:55:00"),
toList("PRG", "FRA"), toList("PRG", "FRA"),
toList("FRA", "MCO"), toList("FRA", "MCO"),
toList("LH", "LH"), toList("LH", "LH"),
39766.0, "LH",
Boolean.TRUE,
39766.0 39766.0
), TTL.D3}, ), TTL.D3},
{new FlightData( {new FlightData(
"fly-me-to",
"PYTON", "PYTON",
emptyList(), emptyList(),
emptyList(), emptyList(),
emptyList(), emptyList(),
emptyList(), emptyList(),
emptyList(), emptyList(),
"",
toTimestampList("2019-12-18T05:45:00"), toTimestampList("2019-12-18T05:45:00"),
toTimestampList("2019-12-18T08:05:00"), toTimestampList("2019-12-18T08:05:00"),
toList("KRK"), toList("KRK"),
toList("BVA"), toList("BVA"),
toList("FR"), toList("FR"),
336.258, "FR",
Boolean.TRUE,
336.258 336.258
), TTL.D2}, ), TTL.D2},
{new FlightData( {new FlightData(
"levne",
"AVIA", "AVIA",
toTimestampList("2020-02-07T02:25:00", "2020-02-07T14:50:00"), toTimestampList("2020-02-07T02:25:00", "2020-02-07T14:50:00"),
toTimestampList("2020-02-07T13:10:00", "2020-02-07T16:55:00"), toTimestampList("2020-02-07T13:10:00", "2020-02-07T16:55:00"),
toList("LAX", "LHR"), toList("LAX", "LHR"),
toList("LHR", "PRG"), toList("LHR", "PRG"),
toList("AA", "BA"), toList("AA", "BA"),
"AA",
toTimestampList("2020-01-28T10:35:00", "2020-01-28T14:40:00"), toTimestampList("2020-01-28T10:35:00", "2020-01-28T14:40:00"),
toTimestampList("2020-01-28T12:45:00", "2020-01-29T01:45:00"), toTimestampList("2020-01-28T12:45:00", "2020-01-29T01:45:00"),
toList("PRG", "HEL"), toList("PRG", "HEL"),
toList("HEL", "LAX"), toList("HEL", "LAX"),
toList("AY", "AY"), toList("AY", "AY"),
5971.77978, "AY",
Boolean.TRUE, 5971.77978
15971.77978
), TTL.D3}, ), TTL.D3},
{new FlightData( {new FlightData(
"fly-me-to",
"HH", "HH",
toTimestampList("2019-11-01T16:30:00", "2019-11-01T23:35:00"), toTimestampList("2019-11-01T16:30:00", "2019-11-01T23:35:00"),
toTimestampList("2019-11-01T21:12:00", "2019-11-02T07:45:00"), toTimestampList("2019-11-01T21:12:00", "2019-11-02T07:45:00"),
toList("YVR", "YUL"), toList("YVR", "YUL"),
toList("YUL", "VIE"), toList("YUL", "VIE"),
toList("LH", "LH"), toList("LH", "LH"),
"LH",
toTimestampList("2019-10-18T08:10:00", "2019-10-18T11:30:00"), toTimestampList("2019-10-18T08:10:00", "2019-10-18T11:30:00"),
toTimestampList("2019-10-18T09:40:00", "2019-10-18T21:25:00"), toTimestampList("2019-10-18T09:40:00", "2019-10-18T21:25:00"),
toList("VIE", "FRA"), toList("VIE", "FRA"),
toList("FRA", "YVR"), toList("FRA", "YVR"),
toList("LH", "LH"), toList("LH", "LH"),
17723.0, "LH",
Boolean.TRUE,
17723.0 17723.0
), TTL.D2}, ), TTL.D2},
{new FlightData( {new FlightData(
"unknown",
"unknown", "unknown",
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)), toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)), toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toList("unknown"), toList("unknown"),
toList("unknown"), toList("unknown"),
toList("unknown"), toList("unknown"),
"unknown",
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)), toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)), toTimestampList(ZonedDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)),
toList("unknown"), toList("unknown"),
toList("unknown"), toList("unknown"),
toList("unknown"), toList("unknown"),
0.0, "unknown",
Boolean.FALSE,
0.0 0.0
), TTL.NOCACHE} ), TTL.NOCACHE}
}; };

File diff suppressed because one or more lines are too long

@ -25,7 +25,7 @@ tf = XGBoost(
'bonitoo_price_neg_abs': 200, 'bonitoo_price_neg_abs': 200,
'bonitoo_price_pos_perc': 0.05, 'bonitoo_price_pos_perc': 0.05,
'bonitoo_price_neg_perc': 0.05, 'bonitoo_price_neg_perc': 0.05,
'num_round': 20, 'num_round': 10,
'max_depth': 15, 'max_depth': 15,
'eta': 0.5, 'eta': 0.5,
'num_class': 8, 'num_class': 8,

@ -20,18 +20,19 @@ from sagemaker_xgboost_container.constants.xgb_constants import CUSTOMER_ERRORS
columns = [ columns = [
'timestamp', 'timestamp',
'client.channel',
'type', 'type',
'flight.inboundSegments.departure', 'flight.inboundSegments.departure',
'flight.inboundSegments.arrival', 'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode', 'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode', 'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.airline.code', 'flight.inboundSegments.airline.code',
'flight.inboundMCX.code',
'flight.outboundSegments.departure', 'flight.outboundSegments.departure',
'flight.outboundSegments.arrival', 'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode', 'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode', 'flight.outboundSegments.destination.airportCode',
'flight.outboundSegments.airline.code', 'flight.outboundSegments.airline.code',
'flight.outboundMCX.code',
'input.price', 'input.price',
'success', 'success',
'output.price', 'output.price',
@ -40,18 +41,19 @@ columns = [
] ]
catcolumns = [ catcolumns = [
'client.channel',
'type', 'type',
'flight.inboundSegments.departure', 'flight.inboundSegments.departure',
'flight.inboundSegments.arrival', 'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode', 'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode', 'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.airline.code', 'flight.inboundSegments.airline.code',
'flight.inboundMCX.code',
'flight.outboundSegments.departure', 'flight.outboundSegments.departure',
'flight.outboundSegments.arrival', 'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode', 'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode', 'flight.outboundSegments.destination.airportCode',
'flight.outboundSegments.airline.code' 'flight.outboundSegments.airline.code',
'flight.outboundMCX.code'
] ]
floatcolumns = [ floatcolumns = [
@ -83,7 +85,7 @@ def expected_value(row, price_pos_abs=200, price_neg_abs=100, price_pos_perc=0.0
inprice, outprice = row['input.price'], row['output.price'] inprice, outprice = row['input.price'], row['output.price']
tstamp, cacheAt, cacheExp = row['timestamp'], row['cacheAt'], row['cacheExp'] tstamp, cacheAt, cacheExp = row['timestamp'], row['cacheAt'], row['cacheExp']
if cacheAt: if not pd.isnull(cacheAt):
incachetime = tstamp - cacheAt incachetime = tstamp - cacheAt
expcachetime = cacheExp - cacheAt expcachetime = cacheExp - cacheAt
else: else:
@ -136,6 +138,9 @@ def compute_prebooking(row):
def preprocess_data(df): def preprocess_data(df):
logging.info('Preprocessing start') logging.info('Preprocessing start')
# df = df[df.loc[:,'success'] == True]
# df = df.dropna(subset=['cacheAt'])
booleanDictionary = {True: 1, False: 0} booleanDictionary = {True: 1, False: 0}
df.loc[:, 'success'] = df.loc[:, 'success'].replace(booleanDictionary) df.loc[:, 'success'] = df.loc[:, 'success'].replace(booleanDictionary)
@ -154,7 +159,7 @@ def preprocess_data(df):
return df return df
def remove_non_features(df): def remove_non_features(df):
return df.drop(catcolumns + timestampcolumns, axis=1), df return df.drop(catcolumns + timestampcolumns + ['output.price', 'success'], axis=1), df
def train_test_split(df, label, ratio): def train_test_split(df, label, ratio):
logging.info('Splitting dataset with ration %f', ratio) logging.info('Splitting dataset with ration %f', ratio)

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save