from pymongo import MongoClient from datetime import datetime import csv client = MongoClient() db = client.bonitoo fieldnames = [ 'timestamp', 'type', 'flight.inboundSegments.departure', 'flight.inboundSegments.arrival', 'flight.inboundSegments.origin.airportCode', 'flight.inboundSegments.destination.airportCode', 'flight.inboundSegments.airline.code', 'flight.inboundMCX.code', 'flight.outboundSegments.departure', 'flight.outboundSegments.arrival', 'flight.outboundSegments.origin.airportCode', 'flight.outboundSegments.destination.airportCode', 'flight.outboundSegments.airline.code', 'flight.outboundMCX.code', 'input.price', 'success', 'output.price', 'cacheAt', 'cacheExp' ] counter = 0 with open('export.csv', mode='w') as ef: writer = csv.DictWriter(ef, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # do not write header for s3 files # writer.writeheader() for it in db.pricing_audit.find(): counter += 1 if counter % 1000 == 0: print('Iterace %d' % counter) d = { 'timestamp': datetime.fromtimestamp(it['timestamp'] / 1000).isoformat(), 'type': it['type'], 'flight.outboundSegments.departure': '|'.join( [x['departure'].isoformat() for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.arrival': '|'.join( [x['arrival'].isoformat() for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.origin.airportCode': '|'.join( [x['origin']['airportCode'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.destination.airportCode': '|'.join( [x['destination']['airportCode'] for x in it['flight']['outboundSegments']]), 'flight.outboundSegments.airline.code': '|'.join( [x['airline']['code'] for x in it['flight']['outboundSegments']]), 'flight.outboundMCX.code': it['flight']['outboundMCX']['code'], 'input.price': it.get('input', {'price': 0})['price'], 'success': it['success'], 'output.price': it.get('output', {'price': 0})['price'], 'cacheAt': it.get('cacheAt').isoformat() if it.get('cacheAt', None) else '', 'cacheExp': it.get('cacheExp').isoformat() if it.get('cacheExp', None) else '' } if 'inboundSegments' in it['flight']: inb = { 'flight.inboundSegments.departure': '|'.join( [x['departure'].isoformat() for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.arrival': '|'.join( [x['arrival'].isoformat() for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.origin.airportCode': '|'.join( [x['origin']['airportCode'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.destination.airportCode': '|'.join( [x['destination']['airportCode'] for x in it['flight']['inboundSegments']]), 'flight.inboundSegments.airline.code': '|'.join( [x['airline']['code'] for x in it['flight']['inboundSegments']]), 'flight.inboundMCX.code': it['flight']['inboundMCX']['code'] } d = {**d, **inb} writer.writerow(d)