# -*- Mode: Python -*- # Author: Sam Rushing (http://www.nightmare.com/rushing) # parse the hidden datecode from a sony HDV transport stream, # and use this information to split the stream into clips. # # is this some kind of standard? # will it work with non-sony HDV streams? # got a clue on where to look from the linux 'dvgrab' project, # which refers to two sony private data fields tagged as 0xa0 and 0xa1. # the stream with PID==2065 looks like an adaptation field... # # the BCD sets unused bits to one, which made it a little harder # to see the pattern... # Motivation: I want to back up my HDV tapes. However, I want to # back up the *original* mpeg-2 data, not some transcoded version. # However, my long-term backup solution will be to a hard drive. # The most useful filesystem to use on that drive is FAT32. # [for example, I can plug it into a PS3 and watch the video] # FAT32 can't store large files, so I need to split the 11G into # reasonably-sized chunks. # On windows I would use HDVSplit. By I don't want to run windows # just to split HDV files. # # 2009 03 31: had to put in a hack to try to cut off looooong clips # before they hit the 4GB FAT32 limit. import os import struct import sys import time # 11:34:51 => 11:35:03 # d1b4d130 => 84b5d130 # # 11010001 10110100 11010001 0011 0000 # 5 1 3 4 1 1 # 10000100 10110101 11010001 0011 0000 # 0 4 3 5 1 1 # # def bcd (ch, mask0, mask1): return ((ord(ch)>>4) & mask0) * 10 + (ord(ch) & mask1) def grok_hms (hms): ss = bcd (hms[0], 0x7, 0xf) mm = bcd (hms[1], 0x7, 0xf) hh = bcd (hms[2], 0x3, 0xf) return hh, mm, ss # d4e706 # 11010100 11100111 00000110 # dd mm yy # 14 07 06 # xx001111 xxx23333 44445555 # def grok_ymd (ymd): dd = bcd (ymd[0], 0x3, 0xf) mm = bcd (ymd[1], 0x1, 0xf) yy = bcd (ymd[2], 0xf, 0xf) return 2000 + yy, mm, dd class clip: # a clip, known by its start time def __init__ (self, time_tuple): self.time_tuple = time_tuple self.time_t = time.mktime (time_tuple) self.name = 'clip_%s.m2t' % (time.strftime ('%Y-%m-%d_%H_%M_%S', time_tuple),) self.file = open (self.name, 'wb') self.size = 0 def write (self, packet): self.file.write (packet) self.size += len(packet) def close (self): self.file.close() # set the modification time to the timestamp os.utime (self.name, (time.time(), self.time_t)) MB = 1024 * 1024 LIMIT = 4 * 1024 * MB BIG = (90 * LIMIT) / 100 def split_by_datecode (f, threshold, scan_only): last = 0 n = 0 fo = None with_timecode = False block = f.read (128) if block[0] == 'G': with_timecode = False packet_size = 188 elif block[4] == 'G': with_timecode = True packet_size = 192 else: raise ValueError ("doesn't look like an mpeg transport stream") f.seek (0) while 1: if with_timecode: # check timecode timecode = f.read (4) packet = f.read (188) n += packet_size if not packet: break elif packet[0] != 'G': break else: byte23 = struct.unpack ('>h', packet[1:3])[0] pid = byte23 & 0x1fff # uncomfortable with this hard-coded PID if pid == 2065: ymd = grok_ymd (packet[109:112]) hms = grok_hms (packet[113:117]) time_tuple = ymd + hms + (0, 0, -1) time_t = int (time.mktime (time_tuple)) #sys.stderr.write ('%5d %s %d\n' % (n/MB, time.ctime (time_t), time_t)) delta = time_t - last if delta > threshold or (fo and fo.size >= BIG): sys.stderr.write ('%5dM %s\n' % (n/MB, time.ctime (time_t))) if fo: fo.close() if not scan_only: fo = clip (time_tuple) last = time_t if fo: fo.write (packet) if fo: fo.close() if __name__ == '__main__': scan_only = '-s' in sys.argv split_by_datecode ( open (sys.argv[1], 'rb'), # 15 minutes 15 * 60, scan_only )