from pymongo import MongoClient from pprint import pprint from datetime import datetime import csv client = MongoClient() db=client.bonitoo fieldnames = [ 'timestamp', 'client.channel', 'type', 'flight.inboundSegments.departure', 'flight.inboundSegments.arrival', 'flight.inboundSegments.origin.airportCode', 'flight.inboundSegments.destination.airportCode', 'flight.inboundSegments.flightNumber', 'flight.inboundSegments.travelClass', 'flight.inboundSegments.bookingCode', 'flight.inboundSegments.availability', 'flight.inboundSegments.elapsedFlyingTime', 'flight.outboundSegments.departure', 'flight.outboundSegments.arrival', 'flight.outboundSegments.origin.airportCode', 'flight.outboundSegments.destination.airportCode', 'flight.outboundSegments.flightNumber', 'flight.outboundSegments.travelClass', 'flight.outboundSegments.bookingCode', 'flight.outboundSegments.availability', 'flight.outboundSegments.elapsedFlyingTime', 'flight.inboundEFT', # elapsed flying time 'flight.outboundEFT', 'oneWay', 'adults', # pocet osob = (adults + children) 'children', 'infants', 'input.price', 'input.tax', 'input.currency', 'success', 'status', 'output.price', 'output.tax', 'output.currency', 'duration' # delka volani do nadrazeneho systemu ] # 5% nebo 200 kc rozdil nahoru # -200 kc dolu # abs(+-10kc) ignorovat # timestamp + ok price - ma byt v cache od cacheat # timestamp + notok price - nema byt v cache od cacheat # delka pobytu prilet-odlet # delka letu ? # pokud je chyba tak nocache (= chybi priceout) # brat v uvahu in/out kody aerolinek (mcx ?) - mirek jeste zjisti # vypocitat uspesnost je/neni v cache v % counter = 0 with open('export.csv', mode='w') as ef: writer = csv.DictWriter(ef, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # do not write header for s3 files # writer.writeheader() for it in db.pricing_audit.find(): counter += 1 if counter % 1000 == 0: print('Iterace %d' % counter) d = { 'timestamp': datetime.fromtimestamp(it['timestamp']/1000).isoformat(), 'client.channel': it['client']['channel'], 'type': it['type'], 'flight.outboundSegments.departure': '|'.join([x['departure'].isoformat() for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.arrival': '|'.join([x['arrival'].isoformat() for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.origin.airportCode': '|'.join([x['origin']['airportCode'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.destination.airportCode': '|'.join([x['destination']['airportCode'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.flightNumber': '|'.join([x['flightNumber'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.travelClass': '|'.join([x['travelClass'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.bookingCode': '|'.join([x.get('bookingCode','') for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.availability': '|'.join([str(x.get('availability','')) for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.elapsedFlyingTime': '|'.join([str(x.get('elapsedFlyingTime','')) for x in it['flight']['outboundSegments']]), 'flight.inboundEFT': it['flight'].get('inboundEFT',''), 'flight.outboundEFT': it['flight'].get('outboundEFT',''), 'oneWay': it['oneWay'], 'adults': it['adults'], 'children': it['children'], 'infants': it['infants'], 'input.price': it['input']['price'], 'input.tax': it['input']['tax'], 'input.currency': it['input']['currency'], 'success': it['success'], 'status': it.get('status',''), 'output.price': it.get('output', {'price': 0})['price'], 'output.tax': it.get('output', {'tax': 0})['tax'], 'output.currency': it.get('output', {'currency': 0})['currency'], 'duration': it['duration'] } if 'inboundSegments' in it['flight']: inb = { 'flight.inboundSegments.departure': '|'.join([x['departure'].isoformat() for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.arrival': '|'.join([x['arrival'].isoformat() for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.origin.airportCode': '|'.join([x['origin']['airportCode'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.destination.airportCode': '|'.join([x['destination']['airportCode'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.flightNumber': '|'.join([x['flightNumber'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.travelClass': '|'.join([x['travelClass'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.bookingCode': '|'.join([x.get('bookingCode', '') for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.availability': '|'.join([str(x.get('availability','')) for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.elapsedFlyingTime': '|'.join([str(x.get('elapsedFlyingTime','')) for x in it['flight']['inboundSegments']]) } d = {**d, **inb} writer.writerow(d)