You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
76 lines
3.4 KiB
76 lines
3.4 KiB
from pymongo import MongoClient |
|
from datetime import datetime |
|
import csv |
|
|
|
client = MongoClient() |
|
db = client.bonitoo |
|
|
|
fieldnames = [ |
|
'timestamp', |
|
'type', |
|
'flight.inboundSegments.departure', |
|
'flight.inboundSegments.arrival', |
|
'flight.inboundSegments.origin.airportCode', |
|
'flight.inboundSegments.destination.airportCode', |
|
'flight.inboundSegments.airline.code', |
|
'flight.inboundMCX.code', |
|
'flight.outboundSegments.departure', |
|
'flight.outboundSegments.arrival', |
|
'flight.outboundSegments.origin.airportCode', |
|
'flight.outboundSegments.destination.airportCode', |
|
'flight.outboundSegments.airline.code', |
|
'flight.outboundMCX.code', |
|
'input.price', |
|
'success', |
|
'output.price', |
|
'cacheAt', |
|
'cacheExp' |
|
] |
|
|
|
counter = 0 |
|
with open('export.csv', mode='w') as ef: |
|
writer = csv.DictWriter(ef, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) |
|
# do not write header for s3 files |
|
# writer.writeheader() |
|
|
|
for it in db.pricing_audit.find(): |
|
counter += 1 |
|
if counter % 1000 == 0: |
|
print('Iterace %d' % counter) |
|
d = { |
|
'timestamp': datetime.fromtimestamp(it['timestamp'] / 1000).isoformat(), |
|
'type': it['type'], |
|
'flight.outboundSegments.departure': '|'.join( |
|
[x['departure'].isoformat() for x in it['flight']['outboundSegments']]), |
|
'flight.outboundSegments.arrival': '|'.join( |
|
[x['arrival'].isoformat() for x in it['flight']['outboundSegments']]), |
|
'flight.outboundSegments.origin.airportCode': '|'.join( |
|
[x['origin']['airportCode'] for x in it['flight']['outboundSegments']]), |
|
'flight.outboundSegments.destination.airportCode': '|'.join( |
|
[x['destination']['airportCode'] for x in it['flight']['outboundSegments']]), |
|
'flight.outboundSegments.airline.code': '|'.join( |
|
[x['airline']['code'] for x in it['flight']['outboundSegments']]), |
|
'flight.outboundMCX.code': it['flight']['outboundMCX']['code'], |
|
'input.price': it.get('input', {'price': 0})['price'], |
|
'success': it['success'], |
|
'output.price': it.get('output', {'price': 0})['price'], |
|
'cacheAt': it.get('cacheAt').isoformat() if it.get('cacheAt', None) else '', |
|
'cacheExp': it.get('cacheExp').isoformat() if it.get('cacheExp', None) else '' |
|
} |
|
|
|
if 'inboundSegments' in it['flight']: |
|
inb = { |
|
'flight.inboundSegments.departure': '|'.join( |
|
[x['departure'].isoformat() for x in it['flight']['inboundSegments']]), |
|
'flight.inboundSegments.arrival': '|'.join( |
|
[x['arrival'].isoformat() for x in it['flight']['inboundSegments']]), |
|
'flight.inboundSegments.origin.airportCode': '|'.join( |
|
[x['origin']['airportCode'] for x in it['flight']['inboundSegments']]), |
|
'flight.inboundSegments.destination.airportCode': '|'.join( |
|
[x['destination']['airportCode'] for x in it['flight']['inboundSegments']]), |
|
'flight.inboundSegments.airline.code': '|'.join( |
|
[x['airline']['code'] for x in it['flight']['inboundSegments']]), |
|
'flight.inboundMCX.code': it['flight']['inboundMCX']['code'] |
|
} |
|
d = {**d, **inb} |
|
writer.writerow(d)
|
|
|