#!/usr/bin/env python # -*- encoding: UTF-8 -*- # Copyright: Thuswise Ltd # Author: D. Haynes import os import sys from time import strptime, mktime import cProfile, pstats import struct from optparse import OptionParser fmt = "2IB" blocksize = struct.calcsize(fmt) class FilmFile: def __init__(self, fObj): self.fObj = fObj l = fObj.readline() self.filmId = int(l[:l.find(':')]) def __iter__(self): return self def next(self): """ Generate tuples of ticks, reviewerId, score """ l = self.fObj.next() id, score, date = l.split(',') return int(mktime(strptime(date, "%Y-%m-%d\n"))), int(id), int(score) def data(fObj): while True: s = fObj.read(blocksize) if not s: raise StopIteration yield struct.unpack(fmt, s) def parser(): p = OptionParser("%prog textfile binary") p.add_option("-u", help = "unpack a packed file", dest = "unpack", action = "store_true", default = False) return p def main(opts, args): try: fIn = file(args[0],'rU') fOut = file(args[1],'wb') except IndexError: sys.stderr.write("Not enough arguments\n") return 2 if opts.unpack: fOut.writelines(("%010d\t%07d\t%d\n" % i for i in data(fIn))) else: ff = FilmFile(fIn) fOut.writelines((struct.pack(fmt,*i) for i in ff)) fIn.close() fOut.close() return 0 # Normal operation # c:\Python25\python.exe pack.py d:\data\netflix\download\subset\mv_0000001.txt # d:\data\netflix\download\subset\mv_0000001.bin # Test mode # c:\Python25\python.exe pack.py d:\data\netflix\download\subset\mv_0000001.txt # d:\data\netflix\download\subset\mv_0000001.bin # c:\Python25\python.exe pack.py -u d:\data\netflix\download\subset\mv_0000001.bin # d:\data\netflix\download\subset\mv_0000001.tsv if __name__ == "__main__": p = parser() opts, args = p.parse_args() sys.exit(main(opts, args))