commit
76645f273d
5 changed files with 995 additions and 0 deletions
@ -0,0 +1,115 @@ |
||||
from pymongo import MongoClient |
||||
from pprint import pprint |
||||
from datetime import datetime |
||||
import csv |
||||
client = MongoClient() |
||||
db=client.bonitoo |
||||
|
||||
fieldnames = [ |
||||
'timestamp', |
||||
'client.channel', |
||||
'type', |
||||
'flight.inboundSegments.departure', |
||||
'flight.inboundSegments.arrival', |
||||
'flight.inboundSegments.origin.airportCode', |
||||
'flight.inboundSegments.destination.airportCode', |
||||
'flight.inboundSegments.flightNumber', |
||||
'flight.inboundSegments.travelClass', |
||||
'flight.inboundSegments.bookingCode', |
||||
'flight.inboundSegments.availability', |
||||
'flight.inboundSegments.elapsedFlyingTime', |
||||
'flight.outboundSegments.departure', |
||||
'flight.outboundSegments.arrival', |
||||
'flight.outboundSegments.origin.airportCode', |
||||
'flight.outboundSegments.destination.airportCode', |
||||
'flight.outboundSegments.flightNumber', |
||||
'flight.outboundSegments.travelClass', |
||||
'flight.outboundSegments.bookingCode', |
||||
'flight.outboundSegments.availability', |
||||
'flight.outboundSegments.elapsedFlyingTime', |
||||
'flight.inboundEFT', # elapsed flying time |
||||
'flight.outboundEFT', |
||||
'oneWay', |
||||
'adults', # pocet osob = (adults + children) |
||||
'children', |
||||
'infants', |
||||
'input.price', |
||||
'input.tax', |
||||
'input.currency', |
||||
'success', |
||||
'status', |
||||
'output.price', |
||||
'output.tax', |
||||
'output.currency', |
||||
'duration' # delka volani do nadrazeneho systemu |
||||
] |
||||
|
||||
# 5% nebo 200 kc rozdil nahoru |
||||
# -200 kc dolu |
||||
# abs(+-10kc) ignorovat |
||||
|
||||
# timestamp + ok price - ma byt v cache od cacheat |
||||
# timestamp + notok price - nema byt v cache od cacheat |
||||
|
||||
# delka pobytu prilet-odlet |
||||
# delka letu ? |
||||
# pokud je chyba tak nocache (= chybi priceout) |
||||
|
||||
# brat v uvahu in/out kody aerolinek (mcx ?) - mirek jeste zjisti |
||||
|
||||
# vypocitat uspesnost je/neni v cache v % |
||||
|
||||
counter = 0 |
||||
with open('export.csv', mode='w') as ef: |
||||
writer = csv.DictWriter(ef, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) |
||||
# do not write header for s3 files |
||||
# writer.writeheader() |
||||
|
||||
for it in db.pricing_audit.find(): |
||||
counter += 1 |
||||
if counter % 1000 == 0: |
||||
print('Iterace %d' % counter) |
||||
d = { |
||||
'timestamp': datetime.fromtimestamp(it['timestamp']/1000).isoformat(), |
||||
'client.channel': it['client']['channel'], |
||||
'type': it['type'], |
||||
'flight.outboundSegments.departure': '|'.join([x['departure'].isoformat() for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.arrival': '|'.join([x['arrival'].isoformat() for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.origin.airportCode': '|'.join([x['origin']['airportCode'] for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.destination.airportCode': '|'.join([x['destination']['airportCode'] for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.flightNumber': '|'.join([x['flightNumber'] for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.travelClass': '|'.join([x['travelClass'] for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.bookingCode': '|'.join([x.get('bookingCode','') for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.availability': '|'.join([str(x.get('availability','')) for x in it['flight']['outboundSegments']]), |
||||
'flight.outboundSegments.elapsedFlyingTime': '|'.join([str(x.get('elapsedFlyingTime','')) for x in it['flight']['outboundSegments']]), |
||||
'flight.inboundEFT': it['flight'].get('inboundEFT',''), |
||||
'flight.outboundEFT': it['flight'].get('outboundEFT',''), |
||||
'oneWay': it['oneWay'], |
||||
'adults': it['adults'], |
||||
'children': it['children'], |
||||
'infants': it['infants'], |
||||
'input.price': it['input']['price'], |
||||
'input.tax': it['input']['tax'], |
||||
'input.currency': it['input']['currency'], |
||||
'success': it['success'], |
||||
'status': it.get('status',''), |
||||
'output.price': it.get('output', {'price': 0})['price'], |
||||
'output.tax': it.get('output', {'tax': 0})['tax'], |
||||
'output.currency': it.get('output', {'currency': 0})['currency'], |
||||
'duration': it['duration'] |
||||
} |
||||
|
||||
if 'inboundSegments' in it['flight']: |
||||
inb = { |
||||
'flight.inboundSegments.departure': '|'.join([x['departure'].isoformat() for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.arrival': '|'.join([x['arrival'].isoformat() for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.origin.airportCode': '|'.join([x['origin']['airportCode'] for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.destination.airportCode': '|'.join([x['destination']['airportCode'] for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.flightNumber': '|'.join([x['flightNumber'] for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.travelClass': '|'.join([x['travelClass'] for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.bookingCode': '|'.join([x.get('bookingCode', '') for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.availability': '|'.join([str(x.get('availability','')) for x in it['flight']['inboundSegments']]), |
||||
'flight.inboundSegments.elapsedFlyingTime': '|'.join([str(x.get('elapsedFlyingTime','')) for x in it['flight']['inboundSegments']]) |
||||
} |
||||
d = {**d, **inb} |
||||
writer.writerow(d) |
@ -0,0 +1,303 @@ |
||||
{ |
||||
"cells": [ |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 1, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"import sagemaker\n", |
||||
"import boto3\n", |
||||
"from sagemaker import get_execution_role\n", |
||||
"\n", |
||||
"boto_session = boto3.Session(profile_name='bonitoo', region_name='eu-central-1')\n", |
||||
"sagemaker_session = sagemaker.LocalSession(boto_session=boto_session)\n", |
||||
"#sagemaker_session = sagemaker.Session()" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 2, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"# Get a SageMaker-compatible role used by this Notebook Instance.\n", |
||||
"#role = get_execution_role()\n", |
||||
"role = 'Bonitoo_SageMaker_Execution'\n", |
||||
"region = boto_session.region_name" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 3, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"train_input = 's3://customers-bonitoo-cachettl/sagemaker/data/export.csv'" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 50, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [ |
||||
"from sagemaker.xgboost.estimator import XGBoost\n", |
||||
"\n", |
||||
"tf = XGBoost(\n", |
||||
" entry_point='train_model.py',\n", |
||||
" source_dir='./src',\n", |
||||
" train_instance_type='local',\n", |
||||
" train_instance_count=1,\n", |
||||
" role=role,\n", |
||||
" sagemaker_session=sagemaker_session,\n", |
||||
" framework_version='0.90-1',\n", |
||||
" py_version='py3',\n", |
||||
" hyperparameters={\n", |
||||
" 'bonitoo_price_limit': 1000,\n", |
||||
" 'num_round': 15,\n", |
||||
" 'max_depth': 15,\n", |
||||
" 'eta': 0.5,\n", |
||||
" 'num_class': 8,\n", |
||||
" 'objective': 'multi:softmax',\n", |
||||
" 'eval_metric': 'mlogloss'\n", |
||||
" })" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": 51, |
||||
"metadata": { |
||||
"scrolled": true |
||||
}, |
||||
"outputs": [ |
||||
{ |
||||
"name": "stdout", |
||||
"output_type": "stream", |
||||
"text": [ |
||||
"Creating tmptao5hpuc_algo-1-x6dhm_1 ... \n", |
||||
"\u001b[1BAttaching to tmptao5hpuc_algo-1-x6dhm_12mdone\u001b[0m\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Imported framework sagemaker_xgboost_container.training\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker_xgboost_container.training:Invoking user training script.\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Module train_model does not provide a setup.py. \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Generating setup.py\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Generating setup.cfg\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Generating MANIFEST.in\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Installing module with the following command:\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/bin/python3 -m pip install . -r requirements.txt\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Processing /opt/ml/code\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: pandas in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 1)) (0.24.2)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: numpy in /usr/local/lib/python3.5/dist-packages (from -r requirements.txt (line 2)) (1.17.2)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2019.2)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: python-dateutil>=2.5.0 in /usr/local/lib/python3.5/dist-packages (from pandas->-r requirements.txt (line 1)) (2.8.0)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.5/dist-packages (from python-dateutil>=2.5.0->pandas->-r requirements.txt (line 1)) (1.12.0)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Building wheels for collected packages: train-model\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Building wheel for train-model (setup.py) ... \u001b[?25ldone\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \u001b[?25h Created wheel for train-model: filename=train_model-1.0.0-py2.py3-none-any.whl size=6578 sha256=f2f4bac7a2d0260f534e32b3ac0341fb291f30669499adf59ead09aa62b7ccc5\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Stored in directory: /tmp/pip-ephem-wheel-cache-vdfjugbr/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Successfully built train-model\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Installing collected packages: train-model\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Successfully installed train-model-1.0.0\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:No GPUs detected (normal if no gpus installed)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:sagemaker-containers:Invoking user script\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Training Env:\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m {\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"network_interface_name\": \"eth0\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"hosts\": [\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"algo-1-x6dhm\"\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m ],\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"log_level\": 20,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"input_config_dir\": \"/opt/ml/input/config\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"framework_module\": \"sagemaker_xgboost_container.training:main\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"input_dir\": \"/opt/ml/input\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"channel_input_dirs\": {\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"training\": \"/opt/ml/input/data/training\"\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_gpus\": 0,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"job_name\": \"sagemaker-xgboost-2019-10-05-20-16-58-398\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"user_entry_point\": \"train_model.py\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"current_host\": \"algo-1-x6dhm\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"module_dir\": \"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-05-20-16-58-398/source/sourcedir.tar.gz\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"master_hostname\": \"algo-1-x6dhm\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"module_name\": \"train_model\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"resource_config\": {\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"current_host\": \"algo-1-x6dhm\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"hosts\": [\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"algo-1-x6dhm\"\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m ]\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"additional_framework_parameters\": {},\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_cpus\": 6,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"output_data_dir\": \"/opt/ml/output/data\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"input_data_config\": {\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"training\": {\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"TrainingInputMode\": \"File\"\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m }\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"is_master\": true,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"hyperparameters\": {\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"bonitoo_price_limit\": 1000,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"max_depth\": 15,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"objective\": \"multi:softmax\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_class\": 8,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"eta\": 0.5,\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"eval_metric\": \"mlogloss\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"num_round\": 15\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m },\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"output_dir\": \"/opt/ml/output\",\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"model_dir\": \"/opt/ml/model\"\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m }\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Environment variables:\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_INPUT_CONFIG_DIR=/opt/ml/input/config\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_MAX_DEPTH=15\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_LOG_LEVEL=20\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_OUTPUT_DIR=/opt/ml/output\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_NUM_CPUS=6\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_CHANNELS=[\"training\"]\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_NUM_ROUND=15\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_OBJECTIVE=multi:softmax\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_OUTPUT_DATA_DIR=/opt/ml/output/data\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_FRAMEWORK_MODULE=sagemaker_xgboost_container.training:main\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_NETWORK_INTERFACE_NAME=eth0\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_INPUT_DATA_CONFIG={\"training\":{\"TrainingInputMode\":\"File\"}}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_FRAMEWORK_PARAMS={}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HPS={\"bonitoo_price_limit\":1000,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":15,\"objective\":\"multi:softmax\"}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m PYTHONPATH=/usr/local/bin:/:/usr/local/lib/python3.5/dist-packages/xgboost/dmlc-core/tracker:/usr/lib/python35.zip:/usr/lib/python3.5:/usr/lib/python3.5/plat-x86_64-linux-gnu:/usr/lib/python3.5/lib-dynload:/usr/local/lib/python3.5/dist-packages:/usr/lib/python3/dist-packages\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_RESOURCE_CONFIG={\"current_host\":\"algo-1-x6dhm\",\"hosts\":[\"algo-1-x6dhm\"]}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_NUM_GPUS=0\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_ETA=0.5\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_NUM_CLASS=8\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_MODULE_DIR=s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-05-20-16-58-398/source/sourcedir.tar.gz\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_USER_ARGS=[\"--bonitoo_price_limit\",\"1000\",\"--eta\",\"0.5\",\"--eval_metric\",\"mlogloss\",\"--max_depth\",\"15\",\"--num_class\",\"8\",\"--num_round\",\"15\",\"--objective\",\"multi:softmax\"]\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_USER_ENTRY_POINT=train_model.py\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_CURRENT_HOST=algo-1-x6dhm\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_INPUT_DIR=/opt/ml/input\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_CHANNEL_TRAINING=/opt/ml/input/data/training\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_EVAL_METRIC=mlogloss\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_MODULE_NAME=train_model\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HP_BONITOO_PRICE_LIMIT=1000\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_HOSTS=[\"algo-1-x6dhm\"]\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1-x6dhm\",\"framework_module\":\"sagemaker_xgboost_container.training:main\",\"hosts\":[\"algo-1-x6dhm\"],\"hyperparameters\":{\"bonitoo_price_limit\":1000,\"eta\":0.5,\"eval_metric\":\"mlogloss\",\"max_depth\":15,\"num_class\":8,\"num_round\":15,\"objective\":\"multi:softmax\"},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"sagemaker-xgboost-2019-10-05-20-16-58-398\",\"log_level\":20,\"master_hostname\":\"algo-1-x6dhm\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://sagemaker-eu-central-1-029917565482/sagemaker-xgboost-2019-10-05-20-16-58-398/source/sourcedir.tar.gz\",\"module_name\":\"train_model\",\"network_interface_name\":\"eth0\",\"num_cpus\":6,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1-x6dhm\",\"hosts\":[\"algo-1-x6dhm\"]},\"user_entry_point\":\"train_model.py\"}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m SM_MODEL_DIR=/opt/ml/model\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Invoking script with the following command:\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/bin/python3 -m train_model --bonitoo_price_limit 1000 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 15 --objective multi:softmax\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n" |
||||
] |
||||
}, |
||||
{ |
||||
"name": "stdout", |
||||
"output_type": "stream", |
||||
"text": [ |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m ERROR:sagemaker-containers:ExecuteUserScriptError:\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Command \"/usr/bin/python3 -m train_model --bonitoo_price_limit 1000 --eta 0.5 --eval_metric mlogloss --max_depth 15 --num_class 8 --num_round 15 --objective multi:softmax\"\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:hyperparameters {'num_round': 15, 'num_class': 8, 'objective': 'multi:softmax', 'eta': 0.5, 'max_depth': 15, 'eval_metric': ['mlogloss']}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:channels {'training': {'TrainingInputMode': 'File'}}\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Determined delimiter of CSV input is ','\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Loading csv file export.csv\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Preprocessing start\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m A value is trying to be set on a copy of a slice from a DataFrame.\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Try using .loc[row_indexer,col_indexer] = value instead\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m self.obj[item] = s\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py:362: SettingWithCopyWarning: \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m A value is trying to be set on a copy of a slice from a DataFrame.\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Try using .loc[row_indexer,col_indexer] = value instead\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m self.obj[key] = _infer_fill_value(value)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Computing cached times\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Splitting dataset with ration 0.800000\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/xgboost/core.py:587: FutureWarning: Series.base is deprecated and will be removed in a future version\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m if getattr(data, 'base', None) is not None and \\\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m /usr/local/lib/python3.5/dist-packages/xgboost/core.py:588: FutureWarning: Series.base is deprecated and will be removed in a future version\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m data.base is not None and isinstance(data, np.ndarray) \\\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Single node training.\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Train matrix has 25393 rows\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:Validation matrix has 6314 rows\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m INFO:root:cols: ['index', 'adults', 'children', 'infants', 'input.price', 'input.tax', 'status', 'output.price', 'output.tax', 'duration', 'client.channel_codes', 'type_codes', 'flight.inboundSegments.departure_codes', 'flight.inboundSegments.arrival_codes', 'flight.inboundSegments.origin.airportCode_codes', 'flight.inboundSegments.destination.airportCode_codes', 'flight.inboundSegments.flightNumber_codes', 'flight.inboundSegments.travelClass_codes', 'flight.inboundSegments.bookingCode_codes', 'flight.inboundSegments.availability_codes', 'flight.inboundSegments.elapsedFlyingTime_codes', 'flight.outboundSegments.departure_codes', 'flight.outboundSegments.arrival_codes', 'flight.outboundSegments.origin.airportCode_codes', 'flight.outboundSegments.destination.airportCode_codes', 'flight.outboundSegments.flightNumber_codes', 'flight.outboundSegments.travelClass_codes', 'flight.outboundSegments.bookingCode_codes', 'flight.outboundSegments.availability_codes', 'flight.outboundSegments.elapsedFlyingTime_codes', 'flight.inboundEFT_codes', 'flight.outboundEFT_codes', 'input.currency_codes', 'output.currency_codes', 'oneWay_codes']\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Traceback (most recent call last):\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/usr/lib/python3.5/runpy.py\", line 184, in _run_module_as_main\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m \"__main__\", mod_spec)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/usr/lib/python3.5/runpy.py\", line 85, in _run_code\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m exec(code, run_globals)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/opt/ml/code/train_model.py\", line 417, in <module>\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m checkpoint_config=checkpoint_config\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/opt/ml/code/train_model.py\", line 320, in sagemaker_train\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m train_job(**train_args)\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m File \"/opt/ml/code/train_model.py\", line 364, in train_job\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m raise Exception(\"cols: %s\" % str(train_dmatrix.feature_names))\n", |
||||
"\u001b[36malgo-1-x6dhm_1 |\u001b[0m Exception: cols: ['index', 'adults', 'children', 'infants', 'input.price', 'input.tax', 'status', 'output.price', 'output.tax', 'duration', 'client.channel_codes', 'type_codes', 'flight.inboundSegments.departure_codes', 'flight.inboundSegments.arrival_codes', 'flight.inboundSegments.origin.airportCode_codes', 'flight.inboundSegments.destination.airportCode_codes', 'flight.inboundSegments.flightNumber_codes', 'flight.inboundSegments.travelClass_codes', 'flight.inboundSegments.bookingCode_codes', 'flight.inboundSegments.availability_codes', 'flight.inboundSegments.elapsedFlyingTime_codes', 'flight.outboundSegments.departure_codes', 'flight.outboundSegments.arrival_codes', 'flight.outboundSegments.origin.airportCode_codes', 'flight.outboundSegments.destination.airportCode_codes', 'flight.outboundSegments.flightNumber_codes', 'flight.outboundSegments.travelClass_codes', 'flight.outboundSegments.bookingCode_codes', 'flight.outboundSegments.availability_codes', 'flight.outboundSegments.elapsedFlyingTime_codes', 'flight.inboundEFT_codes', 'flight.outboundEFT_codes', 'input.currency_codes', 'output.currency_codes', 'oneWay_codes']\n", |
||||
"\u001b[36mtmptao5hpuc_algo-1-x6dhm_1 exited with code 1\n", |
||||
"\u001b[0mAborting on container exit...\n" |
||||
] |
||||
}, |
||||
{ |
||||
"ename": "RuntimeError", |
||||
"evalue": "Failed to run: ['docker-compose', '-f', '/tmp/tmptao5hpuc/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1", |
||||
"output_type": "error", |
||||
"traceback": [ |
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
||||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/image.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, input_data_config, output_data_config, hyperparameters, job_name)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 148\u001b[0;31m \u001b[0m_stream_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocess\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 149\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mRuntimeError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/image.py\u001b[0m in \u001b[0;36m_stream_output\u001b[0;34m(process)\u001b[0m\n\u001b[1;32m 656\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mexit_code\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 657\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Process exited with code: %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mexit_code\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 658\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;31mRuntimeError\u001b[0m: Process exited with code: 1", |
||||
"\nDuring handling of the above exception, another exception occurred:\n", |
||||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
||||
"\u001b[0;32m<ipython-input-51-ffc1ca8d95fd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mestimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'training'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtrain_input\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;31m#estimator = sklearn.attach('sagemaker-scikit-learn-2019-01-25-16-34-38-829')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/estimator.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, inputs, wait, logs, job_name)\u001b[0m\n\u001b[1;32m 337\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_for_training\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjob_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 339\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlatest_training_job\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_TrainingJob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_new\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 340\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mwait\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 341\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlatest_training_job\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlogs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlogs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/estimator.py\u001b[0m in \u001b[0;36mstart_new\u001b[0;34m(cls, estimator, inputs)\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_add_spot_checkpoint_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlocal_mode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 862\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 863\u001b[0;31m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msagemaker_session\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mtrain_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 864\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 865\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msagemaker_session\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_current_job_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/session.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, input_mode, input_config, role, job_name, output_config, resource_config, vpc_config, hyperparameters, stop_condition, tags, metric_definitions, enable_network_isolation, image, algorithm_arn, encrypt_inter_container_traffic, train_use_spot_instances, checkpoint_s3_uri, checkpoint_local_path)\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0mLOGGER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Creating training-job with name: %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 391\u001b[0m \u001b[0mLOGGER\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"train request: %s\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_request\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 392\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msagemaker_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_training_job\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mtrain_request\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 393\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 394\u001b[0m def compile_model(\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/local_session.py\u001b[0m in \u001b[0;36mcreate_training_job\u001b[0;34m(self, TrainingJobName, AlgorithmSpecification, OutputDataConfig, ResourceConfig, InputDataConfig, **kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mtraining_job\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_LocalTrainingJob\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontainer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0mhyperparameters\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"HyperParameters\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"HyperParameters\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0mtraining_job\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mInputDataConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mOutputDataConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhyperparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTrainingJobName\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0mLocalSagemakerClient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_training_jobs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mTrainingJobName\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_job\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/entities.py\u001b[0m in \u001b[0;36mstart\u001b[0;34m(self, input_data_config, output_data_config, hyperparameters, job_name)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m self.model_artifacts = self.container.train(\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0minput_data_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_data_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhyperparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m )\n\u001b[1;32m 91\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mend_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;32m~/aprar/bonitoo/.venv/lib/python3.7/site-packages/sagemaker/local/image.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, input_data_config, output_data_config, hyperparameters, job_name)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;31m# which contains the exit code and append the command line to it.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0mmsg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Failed to run: %s, %s\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcompose_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 154\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0martifacts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve_artifacts\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcompose_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_data_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", |
||||
"\u001b[0;31mRuntimeError\u001b[0m: Failed to run: ['docker-compose', '-f', '/tmp/tmptao5hpuc/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1" |
||||
] |
||||
} |
||||
], |
||||
"source": [ |
||||
"estimator = tf.fit({'training': train_input})\n", |
||||
"#estimator = sklearn.attach('sagemaker-scikit-learn-2019-01-25-16-34-38-829')" |
||||
] |
||||
}, |
||||
{ |
||||
"cell_type": "code", |
||||
"execution_count": null, |
||||
"metadata": {}, |
||||
"outputs": [], |
||||
"source": [] |
||||
} |
||||
], |
||||
"metadata": { |
||||
"kernelspec": { |
||||
"display_name": "Python 3", |
||||
"language": "python", |
||||
"name": "python3" |
||||
}, |
||||
"language_info": { |
||||
"codemirror_mode": { |
||||
"name": "ipython", |
||||
"version": 3 |
||||
}, |
||||
"file_extension": ".py", |
||||
"mimetype": "text/x-python", |
||||
"name": "python", |
||||
"nbconvert_exporter": "python", |
||||
"pygments_lexer": "ipython3", |
||||
"version": "3.7.3" |
||||
} |
||||
}, |
||||
"nbformat": 4, |
||||
"nbformat_minor": 2 |
||||
} |
@ -0,0 +1,2 @@ |
||||
pandas |
||||
numpy |
@ -0,0 +1,420 @@ |
||||
import os |
||||
import json |
||||
import logging |
||||
import argparse |
||||
import pandas as pd |
||||
import xgboost as xgb |
||||
import numpy as np |
||||
|
||||
from sagemaker_algorithm_toolkit import exceptions as exc |
||||
from sagemaker_xgboost_container.constants import sm_env_constants |
||||
from sagemaker_xgboost_container.data_utils import get_content_type, get_dmatrix, get_size, validate_data_file_path |
||||
from sagemaker_xgboost_container import distributed |
||||
from sagemaker_xgboost_container import checkpointing |
||||
from sagemaker_xgboost_container.algorithm_mode import channel_validation as cv |
||||
from sagemaker_xgboost_container.algorithm_mode import hyperparameter_validation as hpv |
||||
from sagemaker_xgboost_container.algorithm_mode import metrics as metrics_mod |
||||
from sagemaker_xgboost_container.algorithm_mode import train_utils |
||||
from sagemaker_xgboost_container.constants.xgb_constants import CUSTOMER_ERRORS |
||||
|
||||
columns = [ |
||||
'timestamp', |
||||
'client.channel', |
||||
'type', |
||||
'flight.inboundSegments.departure', |
||||
'flight.inboundSegments.arrival', |
||||
'flight.inboundSegments.origin.airportCode', |
||||
'flight.inboundSegments.destination.airportCode', |
||||
'flight.inboundSegments.flightNumber', |
||||
'flight.inboundSegments.travelClass', |
||||
'flight.inboundSegments.bookingCode', |
||||
'flight.inboundSegments.availability', |
||||
'flight.inboundSegments.elapsedFlyingTime', |
||||
'flight.outboundSegments.departure', |
||||
'flight.outboundSegments.arrival', |
||||
'flight.outboundSegments.origin.airportCode', |
||||
'flight.outboundSegments.destination.airportCode', |
||||
'flight.outboundSegments.flightNumber', |
||||
'flight.outboundSegments.travelClass', |
||||
'flight.outboundSegments.bookingCode', |
||||
'flight.outboundSegments.availability', |
||||
'flight.outboundSegments.elapsedFlyingTime', |
||||
'flight.inboundEFT', |
||||
'flight.outboundEFT', |
||||
'oneWay', |
||||
'adults', |
||||
'children', |
||||
'infants', |
||||
'input.price', |
||||
'input.tax', |
||||
'input.currency', |
||||
'success', |
||||
'status', |
||||
'output.price', |
||||
'output.tax', |
||||
'output.currency', |
||||
'duration' |
||||
] |
||||
|
||||
catcolumns = [ |
||||
'client.channel', |
||||
'type', |
||||
'flight.inboundSegments.departure', |
||||
'flight.inboundSegments.arrival', |
||||
'flight.inboundSegments.origin.airportCode', |
||||
'flight.inboundSegments.destination.airportCode', |
||||
'flight.inboundSegments.flightNumber', |
||||
'flight.inboundSegments.travelClass', |
||||
'flight.inboundSegments.bookingCode', |
||||
'flight.inboundSegments.availability', |
||||
'flight.inboundSegments.elapsedFlyingTime', |
||||
'flight.outboundSegments.departure', |
||||
'flight.outboundSegments.arrival', |
||||
'flight.outboundSegments.origin.airportCode', |
||||
'flight.outboundSegments.destination.airportCode', |
||||
'flight.outboundSegments.flightNumber', |
||||
'flight.outboundSegments.travelClass', |
||||
'flight.outboundSegments.bookingCode', |
||||
'flight.outboundSegments.availability', |
||||
'flight.outboundSegments.elapsedFlyingTime', |
||||
'flight.inboundEFT', |
||||
'flight.outboundEFT', |
||||
'input.currency', |
||||
'output.currency', |
||||
'oneWay' |
||||
] |
||||
|
||||
floatcolumns = [ |
||||
'input.price', |
||||
'input.tax', |
||||
'output.price', |
||||
'output.tax' |
||||
] |
||||
|
||||
intcolumns = [ |
||||
'adults', |
||||
'children', |
||||
'infants', |
||||
'status', |
||||
'duration' |
||||
] |
||||
|
||||
pkcolumns = [ |
||||
'flight.inboundSegments.departure', |
||||
'flight.inboundSegments.origin.airportCode', |
||||
'flight.inboundSegments.destination.airportCode', |
||||
'flight.outboundSegments.departure', |
||||
'flight.outboundSegments.origin.airportCode', |
||||
'flight.outboundSegments.destination.airportCode' |
||||
] |
||||
|
||||
def expected_value(row, cachetime_df, price_limit=1000): |
||||
# TODO sum tax + price ? |
||||
inprice, outprice = row['input.price'], row['output.price'] |
||||
pricestatus = abs(inprice - outprice) |
||||
|
||||
# TODO correct in cache time ? |
||||
timestamps = cachetime_df.loc[row[pkcolumns].fillna(''),'timestamp'] |
||||
tdiff = timestamps['max'] - timestamps['min'] |
||||
if tdiff.shape[0] > 0: |
||||
incachetime = tdiff[0] |
||||
else: |
||||
incachetime = np.timedelta64('NaT') |
||||
|
||||
modifier = 0 |
||||
if pricestatus > price_limit: |
||||
modifier = -1 |
||||
if pricestatus < 0.1: |
||||
if not incachetime: |
||||
return 3 |
||||
modifier = 1 |
||||
|
||||
if pd.isnull(incachetime): |
||||
return 0 |
||||
|
||||
if incachetime <= np.timedelta64(12,'h'): |
||||
return 1 + modifier |
||||
if incachetime <= np.timedelta64(1,'D'): |
||||
return 2 + modifier |
||||
if incachetime <= np.timedelta64(2,'D'): |
||||
return 3 + modifier |
||||
if incachetime <= np.timedelta64(3,'D'): |
||||
return 4 + modifier |
||||
if incachetime <= np.timedelta64(7,'D'): |
||||
return 5 + modifier |
||||
if incachetime <= np.timedelta64(14,'D'): |
||||
return 6 + modifier |
||||
|
||||
return min(7, 7 + modifier) |
||||
|
||||
def compute_cached_time(df): |
||||
logging.info('Computing cached times') |
||||
return df.set_index(pkcolumns).groupby(pkcolumns).agg({'timestamp': ['min', 'max']}) |
||||
|
||||
def preprocess_data(df): |
||||
logging.info('Preprocessing start') |
||||
df = df[df.loc[:, 'success'] == True] |
||||
|
||||
df.loc[:, 'timestamp'] = df.loc[:, 'timestamp'].apply(lambda x: pd.to_datetime(x)) |
||||
|
||||
booleanDictionary = {True: 'TRUE', False: 'FALSE'} |
||||
df.loc[:, 'oneWay'] = df.loc[:, 'oneWay'].replace(booleanDictionary) |
||||
|
||||
for cc in catcolumns: |
||||
df.loc[:, cc] = df.loc[:, cc].astype('category') |
||||
df.loc[:, '%s_codes' % cc] = df[cc].cat.codes |
||||
|
||||
df.loc[:, floatcolumns] = df.loc[:, floatcolumns].astype('float64') |
||||
df.loc[:, intcolumns] = df.loc[:, intcolumns].fillna(-1).astype('int32') |
||||
|
||||
return df |
||||
|
||||
def remove_non_features(df): |
||||
return df.drop(['timestamp', 'success'] + catcolumns, axis=1), df |
||||
|
||||
def train_test_split(df, label, ratio): |
||||
logging.info('Splitting dataset with ration %f', ratio) |
||||
|
||||
msk = np.random.rand(len(df)) < ratio |
||||
train_data = df[msk] |
||||
test_data = df[~msk] |
||||
train_label = label[msk] |
||||
test_label = label[~msk] |
||||
|
||||
train_data = train_data.reset_index() |
||||
test_data = test_data.reset_index() |
||||
train_label = train_label.reset_index() |
||||
test_label = test_label.reset_index() |
||||
|
||||
return train_data, test_data, train_label, test_label |
||||
|
||||
def get_csv_pandas(files_path): |
||||
csv_file = files_path if os.path.isfile(files_path) else [ |
||||
f for f in os.listdir(files_path) if os.path.isfile(os.path.join(files_path, f))][0] |
||||
|
||||
try: |
||||
logging.info('Loading csv file %s', csv_file) |
||||
|
||||
df = pd.read_csv(os.path.join(files_path, csv_file), header=None) |
||||
df.columns = columns |
||||
#raise "cols: " + df.info() |
||||
return df |
||||
|
||||
except Exception as e: |
||||
raise exc.UserError("Failed to load csv data with exception:\n{}".format(e)) |
||||
|
||||
def get_pandas_df(data_path): |
||||
if not os.path.exists(data_path): |
||||
return None |
||||
else: |
||||
if os.path.isfile(data_path): |
||||
files_path = data_path |
||||
else: |
||||
for root, dirs, files in os.walk(data_path): |
||||
if dirs == []: |
||||
files_path = root |
||||
break |
||||
df = get_csv_pandas(files_path) |
||||
|
||||
return df |
||||
|
||||
|
||||
def get_df(train_path, validate_path, content_type='text/csv'): |
||||
train_files_size = get_size(train_path) if train_path else 0 |
||||
val_files_size = get_size(validate_path) if validate_path else 0 |
||||
|
||||
logging.debug("File size need to be processed in the node: {}mb.".format( |
||||
round((train_files_size + val_files_size) / (1024 * 1024), 2))) |
||||
|
||||
if train_files_size > 0: |
||||
validate_data_file_path(train_path, content_type) |
||||
if val_files_size > 0: |
||||
validate_data_file_path(validate_path, content_type) |
||||
|
||||
train_pandas = get_pandas_df(train_path) if train_files_size > 0 else None |
||||
val_pandas = get_pandas_df(validate_path) if val_files_size > 0 else None |
||||
|
||||
return train_pandas, val_pandas |
||||
|
||||
def get_dmatrices(train_pandas, train_label_pandas, val_pandas, val_label_pandas, ratio=0.8): |
||||
if val_pandas: |
||||
train_dmatrix = xgb.DMatrix(train_pandas, label=train_label_pandas.loc[:, 'label']) |
||||
val_dmatrix = xgb.DMatrix(val_pandas, label=val_label_pandas.loc[:, 'label']) |
||||
else: |
||||
train_data, test_data, train_label, test_label = train_test_split(train_pandas, train_label_pandas, ratio) |
||||
train_dmatrix = xgb.DMatrix(train_data, label=train_label.loc[:, 'label']) |
||||
val_dmatrix = xgb.DMatrix(test_data, label=test_label.loc[:, 'label']) |
||||
|
||||
return train_dmatrix, val_dmatrix |
||||
|
||||
def save_encoders(encoder_location, df): |
||||
logging.info('Saving encoders') |
||||
|
||||
jsondata = {} |
||||
for cc in catcolumns: |
||||
jsondata[cc] = {cat: idx for idx, cat in enumerate(df[cc].cat.categories)} |
||||
|
||||
with open(encoder_location, 'w') as f: |
||||
json.dump(jsondata, f) |
||||
|
||||
def sagemaker_train(train_config, data_config, train_path, val_path, model_dir, sm_hosts, sm_current_host, |
||||
checkpoint_config): |
||||
metrics = metrics_mod.initialize() |
||||
hyperparameters = hpv.initialize(metrics) |
||||
|
||||
price_limit = int(train_config.get('bonitoo_price_limit', 1000)) |
||||
train_config = {k:v.replace('"', '') for k,v in train_config.items() if not k.startswith('sagemaker_') and not k.startswith('bonitoo_')} |
||||
train_config = hyperparameters.validate(train_config) |
||||
|
||||
if train_config.get("updater"): |
||||
train_config["updater"] = ",".join(train_config["updater"]) |
||||
|
||||
logging.info("hyperparameters {}".format(train_config)) |
||||
logging.info("channels {}".format(data_config)) |
||||
|
||||
# Get Training and Validation Data Matrices |
||||
validation_channel = data_config.get('validation', None) |
||||
checkpoint_dir = checkpoint_config.get("LocalPath", None) |
||||
|
||||
train_df, val_df = get_df(train_path, val_path) |
||||
|
||||
train_df = preprocess_data(train_df) |
||||
cachetime_df = compute_cached_time(train_df) |
||||
train_label_df = train_df.apply(lambda x: expected_value(x, cachetime_df, price_limit), axis=1).to_frame(name='label') |
||||
train_df, train_df_orig = remove_non_features(train_df) |
||||
val_label_df = None |
||||
|
||||
if val_df: |
||||
val_df = preprocess_data(val_df) |
||||
val_label_df = val_df.apply(lambda x: expected_value(x, cachetime_df, price_limit), axis=1).to_frame(name='label') |
||||
val_df, val_df_orig = remove_non_features(val_df) |
||||
train_dmatrix, val_dmatrix = get_dmatrices(train_df, train_label_df, val_df, val_label_df) |
||||
|
||||
train_args = dict( |
||||
train_cfg=train_config, |
||||
train_dmatrix=train_dmatrix, |
||||
train_df=train_df_orig, |
||||
val_dmatrix=val_dmatrix, |
||||
model_dir=model_dir, |
||||
checkpoint_dir=checkpoint_dir) |
||||
|
||||
# Obtain information about training resources to determine whether to set up Rabit or not |
||||
num_hosts = len(sm_hosts) |
||||
|
||||
if num_hosts > 1: |
||||
# Wait for hosts to find each other |
||||
logging.info("Distributed node training with {} hosts: {}".format(num_hosts, sm_hosts)) |
||||
distributed.wait_hostname_resolution(sm_hosts) |
||||
|
||||
if not train_dmatrix: |
||||
logging.warning("Host {} does not have data. Will broadcast to cluster and will not be used in distributed" |
||||
" training.".format(sm_current_host)) |
||||
distributed.rabit_run(exec_fun=train_job, args=train_args, include_in_training=(train_dmatrix is not None), |
||||
hosts=sm_hosts, current_host=sm_current_host, update_rabit_args=True) |
||||
elif num_hosts == 1: |
||||
if train_dmatrix: |
||||
if validation_channel: |
||||
if not val_dmatrix: |
||||
raise exc.UserError("No data in validation channel path {}".format(val_path)) |
||||
logging.info("Single node training.") |
||||
train_args.update({'is_master': True}) |
||||
train_job(**train_args) |
||||
else: |
||||
raise exc.UserError("No data in training channel path {}".format(train_path)) |
||||
else: |
||||
raise exc.PlatformError("Number of hosts should be an int greater than or equal to 1") |
||||
|
||||
|
||||
def train_job(train_cfg, train_dmatrix, val_dmatrix, train_df, model_dir, checkpoint_dir, is_master): |
||||
# Parse arguments for train() API |
||||
early_stopping_rounds = train_cfg.get('early_stopping_rounds') |
||||
num_round = int(train_cfg["num_round"]) |
||||
|
||||
# Evaluation metrics to use with train() API |
||||
tuning_objective_metric_param = train_cfg.get("_tuning_objective_metric") |
||||
eval_metric = train_cfg.get("eval_metric") |
||||
cleaned_eval_metric, configured_feval = train_utils.get_eval_metrics_and_feval( |
||||
tuning_objective_metric_param, eval_metric) |
||||
if cleaned_eval_metric: |
||||
train_cfg['eval_metric'] = cleaned_eval_metric |
||||
else: |
||||
train_cfg.pop('eval_metric', None) |
||||
|
||||
# Set callback evals |
||||
watchlist = [(train_dmatrix, 'train')] |
||||
if val_dmatrix is not None: |
||||
watchlist.append((val_dmatrix, 'validation')) |
||||
|
||||
xgb_model, iteration = checkpointing.load_checkpoint(checkpoint_dir) |
||||
num_round -= iteration |
||||
if xgb_model is not None: |
||||
logging.info("Checkpoint loaded from %s", xgb_model) |
||||
logging.info("Resuming from iteration %s", iteration) |
||||
|
||||
callbacks = [] |
||||
callbacks.append(checkpointing.print_checkpointed_evaluation(start_iteration=iteration)) |
||||
if checkpoint_dir: |
||||
save_checkpoint = checkpointing.save_checkpoint(checkpoint_dir, start_iteration=iteration) |
||||
callbacks.append(save_checkpoint) |
||||
|
||||
logging.info("Train matrix has {} rows".format(train_dmatrix.num_row())) |
||||
if val_dmatrix: |
||||
logging.info("Validation matrix has {} rows".format(val_dmatrix.num_row())) |
||||
|
||||
# TODO remove |
||||
#logging.info("cols: %s", str(train_dmatrix.feature_names)) |
||||
#raise Exception("cols: %s" % str(train_dmatrix.feature_names)) |
||||
|
||||
try: |
||||
bst = xgb.train(train_cfg, train_dmatrix, num_boost_round=num_round, evals=watchlist, feval=configured_feval, |
||||
early_stopping_rounds=early_stopping_rounds, callbacks=callbacks, xgb_model=xgb_model, |
||||
verbose_eval=False) |
||||
except Exception as e: |
||||
for customer_error_message in CUSTOMER_ERRORS: |
||||
if customer_error_message in str(e): |
||||
raise exc.UserError(str(e)) |
||||
|
||||
exception_prefix = "XGB train call failed with exception" |
||||
raise exc.AlgorithmError("{}:\n {}".format(exception_prefix, str(e))) |
||||
|
||||
if not os.path.exists(model_dir): |
||||
os.makedirs(model_dir) |
||||
|
||||
if is_master: |
||||
encoder_location = model_dir + '/encoder.json' |
||||
save_encoders(encoder_location, train_df) |
||||
logging.info("Stored encoders at {}".format(encoder_location)) |
||||
|
||||
model_location = model_dir + '/xgboost-model.bin' |
||||
bst.save_model(model_location) |
||||
logging.info("Stored trained model at {}".format(model_location)) |
||||
|
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
with open(os.getenv(sm_env_constants.SM_INPUT_TRAINING_CONFIG_FILE), "r") as f: |
||||
train_config = json.load(f) |
||||
with open(os.getenv(sm_env_constants.SM_INPUT_DATA_CONFIG_FILE), "r") as f: |
||||
data_config = json.load(f) |
||||
|
||||
checkpoint_config_file = os.getenv(sm_env_constants.SM_CHECKPOINT_CONFIG_FILE) |
||||
if os.path.exists(checkpoint_config_file): |
||||
with open(checkpoint_config_file, "r") as f: |
||||
checkpoint_config = json.load(f) |
||||
else: |
||||
checkpoint_config = {} |
||||
|
||||
train_path = os.environ['SM_CHANNEL_TRAINING'] |
||||
val_path = os.environ.get(sm_env_constants.SM_CHANNEL_VALIDATION) |
||||
|
||||
sm_hosts = json.loads(os.environ[sm_env_constants.SM_HOSTS]) |
||||
sm_current_host = os.environ[sm_env_constants.SM_CURRENT_HOST] |
||||
|
||||
model_dir = os.getenv(sm_env_constants.SM_MODEL_DIR) |
||||
|
||||
sagemaker_train( |
||||
train_config=train_config, data_config=data_config, |
||||
train_path=train_path, val_path=val_path, model_dir=model_dir, |
||||
sm_hosts=sm_hosts, sm_current_host=sm_current_host, |
||||
checkpoint_config=checkpoint_config |
||||
) |
File diff suppressed because one or more lines are too long
Loading…
Reference in new issue