Support Board
Date/Time: Thu, 25 Apr 2024 21:19:34 +0000
Post From: Python SCID to TSV / CSV and Pandas DataFrame Conversion
[2013-04-22 00:20:17] |
Kiwi - Posts: 374 |
This is a modified version of the TSV code that adds the option to use a different datetime format: """ Returns excel date time in format YYYYMMDD HHMMSS """ import csv
import re import sys import numpy as np import struct from time import ctime from datetime import date, datetime, time, timedelta # import ipdb # import ipdb; ipdb.set_trace(); ## *** ## def deserialize(excelDateAndTime): """ Returns excel date time as Python datetime object """ date_tok = int(excelDateAndTime) time_tok = round(86400*(excelDateAndTime - date_tok)) d = date(1899, 12, 31) + timedelta(date_tok) t = time(*helper(time_tok, (24, 60, 60, 1000000))) return datetime.combine(d, t) def helper(factor, units): factor /= 86399.99 result = list() for unit in units: value, factor = divmod(factor * unit, 1) result.append(int(value)) result[3] = int(0) return result def datetimeTwo(excelDateAndTime, tzAdjust): """ Returns excel date time in format YYYYMMDD HHMMSS """ t = str(deserialize(excelDateAndTime + tzAdjust.seconds/86400.0)) return (re.sub(r'(:|-)', '', t)[:8], re.sub(r'(:|-)', '', t)[9:15]) def excelTimeAdjust(date_and_time, tzAdjust): return date_and_time + tzAdjust.seconds/86400.0 def getRecords(filename, fileoutput, tzvar, dtformat): """ Read in records from SierraChart .scid data filename dtformat determines the datetime representation in column 1 (and 2) """ sizeHeader = 0x38 sizeRecord = 0x28 if tzvar == 99999: tzAdjust = datetime.fromtimestamp(0) - datetime.utcfromtimestamp(0) else: tzAdjust = timedelta(hours=tzvar) header = ('Date', 'Time', 'O', 'H', 'L', 'C', 'V', 'T') ftsv = open(fileoutput, 'w') fileout = csv.writer(ftsv, delimiter='\t') with open(filename, 'rb') as fscid: fscid.read(sizeHeader) # discard header fileout.writerow(header) for i in xrange(300000): data = fscid.read(sizeRecord) if data != "": dataRow = struct.unpack('d4f4I', data) if dtformat == 0: adjustedTime = dataRow[0] + tzAdjust.seconds/86400.0 outrow = (str(adjustedTime), str(adjustedTime), str(dataRow[1]), str(dataRow[2]), str(dataRow[3]), str(dataRow[4]), str(dataRow[5]), str(dataRow[6])) fileout.writerow(outrow) elif dtformat == 1: date, time = datetimeTwo(dataRow[0], tzAdjust) outrow = (date, time, str(dataRow[1]), str(dataRow[2]), str(dataRow[3]), str(dataRow[4]), str(dataRow[5]), str(dataRow[6])) fileout.writerow(outrow) else: break return if __name__ == '__main__': """ Takes a SierraChart scid file (input argument 1) and converts it to a tab separated file (tsv) Timezone conversion can follow the users local timezone, or a specified integer (input l or an integer but if the default filename is being used, '' must be specified for the filename) """ filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201303-HKFE-TD.scid' tzvar = 0 if len(sys.argv) > 1: if len(sys.argv[1]): filename = sys.argv[1] if len(sys.argv) > 2 and len(sys.argv[2]): print sys.argv[2], type(sys.argv[2]) if sys.argv[2] == 'l': tzvar = 99999 print tzvar, type(tzvar) else: tzvar = float(sys.argv[2]) print tzvar, type(tzvar) fileoutput = filename[:-4] + 'tsv' getRecords(filename, fileoutput, tzvar, 1) |