Bonitoo cache ttl estimation
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

115 lines
5.1 KiB

from pymongo import MongoClient
from pprint import pprint
from datetime import datetime
import csv
client = MongoClient()
db=client.bonitoo
fieldnames = [
'timestamp',
'client.channel',
'type',
'flight.inboundSegments.departure',
'flight.inboundSegments.arrival',
'flight.inboundSegments.origin.airportCode',
'flight.inboundSegments.destination.airportCode',
'flight.inboundSegments.flightNumber',
'flight.inboundSegments.travelClass',
'flight.inboundSegments.bookingCode',
'flight.inboundSegments.availability',
'flight.inboundSegments.elapsedFlyingTime',
'flight.outboundSegments.departure',
'flight.outboundSegments.arrival',
'flight.outboundSegments.origin.airportCode',
'flight.outboundSegments.destination.airportCode',
'flight.outboundSegments.flightNumber',
'flight.outboundSegments.travelClass',
'flight.outboundSegments.bookingCode',
'flight.outboundSegments.availability',
'flight.outboundSegments.elapsedFlyingTime',
'flight.inboundEFT', # elapsed flying time
'flight.outboundEFT',
'oneWay',
'adults', # pocet osob = (adults + children)
'children',
'infants',
'input.price',
'input.tax',
'input.currency',
'success',
'status',
'output.price',
'output.tax',
'output.currency',
'duration' # delka volani do nadrazeneho systemu
]
# 5% nebo 200 kc rozdil nahoru
# -200 kc dolu
# abs(+-10kc) ignorovat
# timestamp + ok price - ma byt v cache od cacheat
# timestamp + notok price - nema byt v cache od cacheat
# delka pobytu prilet-odlet
# delka letu ?
# pokud je chyba tak nocache (= chybi priceout)
# brat v uvahu in/out kody aerolinek (mcx ?) - mirek jeste zjisti
# vypocitat uspesnost je/neni v cache v %
counter = 0
with open('export.csv', mode='w') as ef:
writer = csv.DictWriter(ef, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
# do not write header for s3 files
# writer.writeheader()
for it in db.pricing_audit.find():
counter += 1
if counter % 1000 == 0:
print('Iterace %d' % counter)
d = {
'timestamp': datetime.fromtimestamp(it['timestamp']/1000).isoformat(),
'client.channel': it['client']['channel'],
'type': it['type'],
'flight.outboundSegments.departure': '|'.join([x['departure'].isoformat() for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.arrival': '|'.join([x['arrival'].isoformat() for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.origin.airportCode': '|'.join([x['origin']['airportCode'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.destination.airportCode': '|'.join([x['destination']['airportCode'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.flightNumber': '|'.join([x['flightNumber'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.travelClass': '|'.join([x['travelClass'] for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.bookingCode': '|'.join([x.get('bookingCode','') for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.availability': '|'.join([str(x.get('availability','')) for x in it['flight']['outboundSegments']]),
'flight.outboundSegments.elapsedFlyingTime': '|'.join([str(x.get('elapsedFlyingTime','')) for x in it['flight']['outboundSegments']]),
'flight.inboundEFT': it['flight'].get('inboundEFT',''),
'flight.outboundEFT': it['flight'].get('outboundEFT',''),
'oneWay': it['oneWay'],
'adults': it['adults'],
'children': it['children'],
'infants': it['infants'],
'input.price': it['input']['price'],
'input.tax': it['input']['tax'],
'input.currency': it['input']['currency'],
'success': it['success'],
'status': it.get('status',''),
'output.price': it.get('output', {'price': 0})['price'],
'output.tax': it.get('output', {'tax': 0})['tax'],
'output.currency': it.get('output', {'currency': 0})['currency'],
'duration': it['duration']
}
if 'inboundSegments' in it['flight']:
inb = {
'flight.inboundSegments.departure': '|'.join([x['departure'].isoformat() for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.arrival': '|'.join([x['arrival'].isoformat() for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.origin.airportCode': '|'.join([x['origin']['airportCode'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.destination.airportCode': '|'.join([x['destination']['airportCode'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.flightNumber': '|'.join([x['flightNumber'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.travelClass': '|'.join([x['travelClass'] for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.bookingCode': '|'.join([x.get('bookingCode', '') for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.availability': '|'.join([str(x.get('availability','')) for x in it['flight']['inboundSegments']]),
'flight.inboundSegments.elapsedFlyingTime': '|'.join([str(x.get('elapsedFlyingTime','')) for x in it['flight']['inboundSegments']])
}
d = {**d, **inb}
writer.writerow(d)