from pymongo import MongoClient from datetime import datetime import csv client = MongoClient() db = client.bonitoo fieldnames = [ 'timestamp', 'client.channel', 'type', 'flight.inboundSegments.departure', 'flight.inboundSegments.arrival', 'flight.inboundSegments.origin.airportCode', 'flight.inboundSegments.destination.airportCode', 'flight.outboundSegments.departure', 'flight.outboundSegments.arrival', 'flight.outboundSegments.origin.airportCode', 'flight.outboundSegments.destination.airportCode', 'input.price', 'input.tax', 'input.currency', 'success', 'status', 'output.price', 'output.tax', 'output.currency', 'duration' ] counter = 0 with open('export.csv', mode='w') as ef: writer = csv.DictWriter(ef, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # do not write header for s3 files # writer.writeheader() for it in db.pricing_audit.find(): counter += 1 if counter % 1000 == 0: print('Iterace %d' % counter) d = { 'timestamp': datetime.fromtimestamp(it['timestamp'] / 1000).isoformat(), 'client.channel': it['client']['channel'], 'type': it['type'], 'flight.outboundSegments.departure': '|'.join( [x['departure'].isoformat() for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.arrival': '|'.join( [x['arrival'].isoformat() for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.origin.airportCode': '|'.join( [x['origin']['airportCode'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.destination.airportCode': '|'.join( [x['destination']['airportCode'] for x in it['flight']['outboundSegments']]), 'input.price': it['input']['price'], 'input.tax': it['input']['tax'], 'input.currency': it['input']['currency'], 'success': it['success'], 'status': it.get('status', ''), 'output.price': it.get('output', {'price': 0})['price'], 'output.tax': it.get('output', {'tax': 0})['tax'], 'output.currency': it.get('output', {'currency': 0})['currency'], 'duration': it['duration'] } if 'inboundSegments' in it['flight']: inb = { 'flight.inboundSegments.departure': '|'.join( [x['departure'].isoformat() for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.arrival': '|'.join( [x['arrival'].isoformat() for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.origin.airportCode': '|'.join( [x['origin']['airportCode'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.destination.airportCode': '|'.join( [x['destination']['airportCode'] for x in it['flight']['inboundSegments']]), } d = {**d, **inb} writer.writerow(d)