# # reprocess.py # # (C) Jack Whitham 2009 # # Use with Python 2.4 or higher. Run without parameters for instructions. # This program requires you the SoX tools, mpg123 and wget # # $Id: reprocess.py,v 1.1 2009/10/25 00:54:14 jack Exp jack $ # import os, pickle, sys, shutil HEADER = 0x30 REF_RATE = 1e5 SAMPLE_RATE = 44100 BYTES_PER_SAMPLE = 4 DUMP = True DATA = "subdat" BASE_URL = "http://www.jwhitham.org.uk/magrathea" #BASE_URL = "http://localhost/magrathea" BS = 65536 def Samples(x): return int(SAMPLE_RATE * x) def Main(): if ( 4 <= len(sys.argv) <= 5 ): return Normal_Mode(sys.argv) if ( len(sys.argv) > 1 ): if ( sys.argv[ 1 ] == 'diff' ): return Diff_Mode() elif ( sys.argv[ 1 ] == 'patch' ): return Patch_Mode() print "Usage 1 - patch:" print " %s patch" % sys.argv[ 0 ] print " Automatically patches episodes copied off your store-bought CDs" print " to return them to the original broadcast version." print " See %s/adiff.html for instructions." % BASE_URL print "" print "Usage 2 - align an audio file based on waypoint data" print " %s " % sys.argv[ 0 ] print " " print " Used to generate files for a listening test, comparing" print " off-air recordings to the CD recordings." return 1 def Grab_File(dir_name, file_name): print 'Downloading: %s' % file_name out = "copy_%s" % file_name rc = os.system("wget --no-verbose -c -O %s %s/%s/%s" % ( out, BASE_URL, dir_name, file_name)) if ((rc != 0) or (not os.path.exists(out))): print "" print "Failed to download file %s." % file_name sys.exit(1) return out def Sort_Substitution(a, b): ((a_episode_nr, _), (a_begin, _, _)) = a ((b_episode_nr, _), (b_begin, _, _)) = b a = (a_episode_nr, a_begin) b = (b_episode_nr, b_begin) return cmp(a, b) def Patch_Mode(): subdat_file = Grab_File(".", DATA) try: data = pickle.load(file(subdat_file, "rb")) subdata = data[ 'subdata' ].items() except: print "" print "Error reading substitution data file." print "You probably need to download a new copy of this program." print "Please visit %s." % BASE_URL sys.exit(1) subdata.sort(cmp=Sort_Substitution) subdata.reverse() episodes = set() for ((episode_nr, dir_name), (begin, end, size)) in subdata: episodes.add(episode_nr) episode_name = dict() episodes = list(episodes) episodes.sort() for episode_nr in episodes: name = "%02d.wav" % episode_nr assert os.path.exists(name) episode_name[ episode_nr ] = name clip = SAMPLE_RATE * 2 # clip 2 seconds from the beginning and the end uid = 0 for ((episode_nr, dir_name), (begin, end, size)) in subdata: print 'Processing episode %d' % episode_nr mp3 = Grab_File(dir_name, "%s-original.mp3" % dir_name) wav = "decoded_%s.wav" % dir_name print 'Decoding: %s' % mp3 rc = os.system("mpg123 -q -w %s %s" % (wav, mp3)) assert rc == 0 begin += clip end -= clip size -= clip * 2 name = "temp_%u.wav" % uid uid += 1 print 'Patching: %s' % dir_name old_name = episode_name[ episode_nr ] cd_in = file(old_name, "rb") cd_out = file(name, "wb") patch2 = Gen_Header(cd_out) offair_in = file(wav, "rb") offair_in.seek(HEADER + (clip * BYTES_PER_SAMPLE)) Copy(cd_in, cd_out, begin * BYTES_PER_SAMPLE) Copy(offair_in, cd_out, size * BYTES_PER_SAMPLE) cd_in.seek((end - begin) * BYTES_PER_SAMPLE, 1) Copy(cd_in, cd_out) Patch_Header(cd_out, patch2) cd_out.close() cd_in.close() offair_in.close() if (old_name.startswith("temp")): os.unlink(old_name) episode_name[ episode_nr ] = name for (episode_nr, name) in episode_name.iteritems(): print 'Completed episode %d' % episode_nr os.rename(name, "%02d_original.wav" % episode_nr) print 'All done.' def Copy(cd_in, cd_out, size=-1): while size != 0: if size > 0: to_copy = min(BS, size) else: to_copy = BS data = cd_in.read(to_copy) cd_out.write(data) if size > 0: size -= len(data) if (( to_copy == 0 ) or ( len(data) != to_copy )): break def Diff_Mode(): def X(m, s, plus=0): return int(((m * 60.0) + s + plus) * SAMPLE_RATE * 2) e = Make_Patch("magrathea", "Missing Magrathea Scene", 3, 86325083, 86587831, 10, 0) e = Make_Patch("mice", "Dinner with Frankie and Benjy Mouse", 4, 2*52454879, 2*64349117, 6, 0) e = Make_Patch("micecredits", "Credits", 4, 2*74214127, 2*76542003, 4, e) e = Make_Patch("allitnils", "Lintilla and the Allitnils", 12, X(4, 35.692), X(10, 20), 5, 0) e = Make_Patch("passengercabin", "Escape from the Passenger Cabin", 12, X(10, 58.148), X(10, 58.148, 27), 5, e) e = Make_Patch("autopilot", '"Return to your seats!"', 12, X(12, 53.585), X(12, 53.585, 30), 5, e) e = Make_Patch("autobiography", "Marvin's Autobiography", 12, X(14, 4.511), X(14, 4.511, 10), 5, e) e = Make_Patch("flashback", '"Reality is on the blink again"', 12, X(15, 44.619), X(15, 44.619, 20), 5, e) e = Make_Patch("shack", "The Man in the Shack", 12, X(19, 39), X(20, 10), 10, e) e = Make_Patch("ending", "The End", 12, X(24, 55.301), X(25, 12.155, 11), 10, e) def Make_Patch(dir_name, english_name, episode_nr, begin, end, margin_size, offset): print dir_name relation_list = Get_Relation_List("tracks/%02d_lineup.txt" % episode_nr) gradient = Calc_Gradient(relation_list) cd_locations = (begin / 2, end / 2) offair_locations = [ Translate(relation_list, x, False) for x in cd_locations ] cd_locations = Expand(cd_locations, margin_size) offair_locations = Expand(offair_locations, margin_size * gradient) (begin, end) = offair_locations try: shutil.rmtree(dir_name) except: pass os.mkdir(dir_name) html = file("%s/info.html" % dir_name, "wt") html.write('') html.write('
') html.write("" % (english_name, episode_nr)) html.write("\n\n") offair_begin = offair_locations[ 0 ] / gradient cd_begin = cd_locations[ 0 ] * 1.0 correction = cd_begin - offair_begin size = [] for (colour, symbol, version, f, locations, c) in [ ("red", "<<", "original", 1.0 / gradient, offair_locations, correction + offset), ("blue", ">>", "CD", 1.0, cd_locations, 0) ]: (begin, end) = locations begin = ( begin * f ) + c end = ( end * f ) + c size.append(end - begin) href = "%s/%s-%s.mp3" % (dir_name, dir_name, version.lower()) html.write("" '' '' '' '' '' '' '' % ( href, colour, symbol, version, href, Get_Stamp(begin).strip(), Get_Stamp(end).strip(), Get_Stamp(end - begin).strip())) html.write("") html.write("
%s" " (episode %u)
  
    %s %s version    Location: %s - %s    Length: %s    
  
\n\n") html.close() offset += size[ 0 ] # original offset -= size[ 1 ] # CD try: data = pickle.load(file(DATA, "rb")) except: data = dict() data[ 'subdata' ] = dict() (begin, end) = offair_locations size = int(( end - begin ) / gradient) data[ 'subdata' ][ (episode_nr, dir_name) ] = tuple( list(cd_locations) + [ size ]) pickle.dump(data, file(DATA, "wb")) Extract_Clip("tracks/%02d_reference.wav" % episode_nr, "%s/%s-cd.wav" % (dir_name, dir_name), cd_locations) Extract_Clip("tracks/%02d_offair.wav" % episode_nr, "%s/%s-original.wav" % (dir_name, dir_name), offair_locations, gradient) for x in ("cd", "original"): rc = os.system("lame -h -V 3 %s/%s-%s.wav %s/%s-%s.mp3" % ( dir_name, dir_name, x, dir_name, dir_name, x)) assert rc == 0 os.unlink("%s/%s-%s.wav" % (dir_name, dir_name, x)) return offset def Expand(locations, margin_seconds): (begin, end) = locations size = end - begin assert size > 0 begin -= int(margin_seconds * SAMPLE_RATE) end += int(margin_seconds * SAMPLE_RATE) return (begin, end) def Extract_Clip(input_audio, output_audio, locations, new_rate=1.0): offair_in = file(input_audio, "rb") offair_out = file(output_audio, "wb") (begin, end) = locations size = end - begin assert size > 0 patch2 = Gen_Header(offair_out) offair_in.seek(begin * BYTES_PER_SAMPLE, 0) data = offair_in.read(size * BYTES_PER_SAMPLE) if new_rate != 1.0: data = Resample(data, new_rate) offair_out.write(data) Patch_Header(offair_out, patch2) def Normal_Mode(argv): assert ( 4 <= len(argv) <= 5 ) input_audio = argv[ 1 ] input_matching = argv[ 2 ] output_audio = argv[ 3 ] dump = None if len(argv) > 4: dump = file(argv[ 4 ], "rb") return Do_Work(input_audio=input_audio, input_matching=input_matching, output_audio=output_audio, dump=dump) def Calc_Gradient(relation_list): relation_list = relation_list[:] relation_list.reverse() vectorX = vectorY = 0 for i in xrange(len(relation_list) - 1): (x1, y1) = relation_list[ i ] (x2, y2) = relation_list[ i + 1 ] assert x2 > x1 assert y2 > y1 sizeX = x2 - x1 sizeY = y2 - y1 if (( sizeX > ( SAMPLE_RATE * 10 )) and ( sizeY > ( SAMPLE_RATE * 10 ))): gradient = float(sizeY) / float(sizeX) if ( 0.96 < gradient < 1.04 ): vectorX += sizeX vectorY += sizeY return float(vectorY) / float(vectorX) def Translate(relation_list, sample, reverse_direction): if reverse_direction: relation_list = [ (y, x) for (x, y) in relation_list ] else: relation_list = relation_list[:] relation_list.reverse() for i in xrange(len(relation_list) - 1): (x1, y1) = relation_list[ i ] (x2, y2) = relation_list[ i + 1 ] assert x2 > x1 assert y2 > y1 if (x1 <= sample < x2): interp = float(sample - x1) / float(x2 - x1) return (int(float(y2 - y1) * interp) + y1) & ~3 return None # Not translatable def Get_Relation_List(input_matching): cd0 = cd1 = offair0 = offair1 = 0 relation_list = [] for line in file(input_matching): (cd, offair) = line.split() cd0 = int(cd) / 2 offair0 = int(offair) / 2 if ( cd0 < 0 ): offair0 = offair1 - ( cd1 * ( offair1 - offair0 )) / ( cd1 - cd0 ) cd0 = 0 relation_list.append((cd0, offair0)) cd1 = cd0 offair1 = offair0 return relation_list def Do_Work(input_audio, input_matching, output_audio, dump): if ( os.path.exists(output_audio) ): print 'Output file already exists - refusing to overwrite' return 1 relation_list = Get_Relation_List(input_matching) offair_in = file(input_audio, "rb") offair_out = file(output_audio, "wb") offair_in.read(HEADER) patch2 = Gen_Header(offair_out) (cd0, offair0) = relation_list.pop() if ( offair0 > 0 ): offair_in.read(offair0 * BYTES_PER_SAMPLE) if ( cd0 > 0 ): offair_out.write('\0' * (cd0 * BYTES_PER_SAMPLE)) if ( dump != None ): dump.read(HEADER) dump.read(cd0 * BYTES_PER_SAMPLE) error = 0 serial = 1 previous_rate = SAMPLE_RATE while ( len(relation_list) != 0 ): tstamp = Get_Stamp(cd0).strip() cd1 = cd0 offair1 = offair0 (cd0, offair0) = relation_list.pop() print '%7.2e %7.2e ' % (offair0 - offair1, cd0 - cd1), reference = None if dump != None: reference = dump.read((cd0 - cd1) * BYTES_PER_SAMPLE) data = offair_in.read((offair0 - offair1) * BYTES_PER_SAMPLE) new_rate = 1.0 / (float(cd0 - cd1) / float(offair0 - offair1)) edit = 100.0 * (( new_rate ) - 1.0 ) print '%10.2f' % ( new_rate * REF_RATE ), if ((dump != None) and not (-3.0 < edit < 3.0)): temp1 = file("temp1.raw", "wb") temp1.write(data) temp1.close() temp3 = file("temp3.raw", "wb") temp3.write(reference) temp3.close() cmd = ("sox -r %1.0f -c 2 -s -2 -t raw temp3.raw " "%03d_reference_%s.wav" % (SAMPLE_RATE, serial, tstamp)) rc = os.system(cmd) assert rc == 0 cmd = ("sox -r %1.0f -c 2 -s -2 -t raw temp1.raw -r %1.0f " "%03d_offair_%s.wav" % (previous_rate, SAMPLE_RATE, serial, tstamp)) rc = os.system(cmd) assert rc == 0 serial += 1 else: previous_rate = SAMPLE_RATE / (float(cd0 - cd1) / float(offair0 - offair1)) data = Resample(data, new_rate) cd0 = (len(data) / BYTES_PER_SAMPLE) + cd1 offair_out.write(data) tstamp = Get_Stamp(cd0) print '%s %4.1f%%' % (tstamp, edit) temp1 = file("temp1.raw", "wb") temp1.write(offair_in.read()) temp1.close() rc = os.system(cmd) assert rc == 0 temp2 = file("temp2.raw", "rb") data = temp2.read() temp2.close() offair_out.write(data) print len(data), 'extra bytes' Patch_Header(offair_out, patch2) offair_out.close() return 0 def Resample(data, new_rate): temp1 = file("temp1.raw", "wb") temp1.write(data) temp1.close() cmd = ("sox -r %1.0f -c 2 -s -2 -t raw temp1.raw " "-r %1.0f temp2.raw" % (REF_RATE * new_rate, REF_RATE)) rc = os.system(cmd) assert rc == 0 temp2 = file("temp2.raw", "rb") data = temp2.read() temp2.close() return data def Get_Stamp(cd0): if type(cd0) == float: cd0 = int(cd0) assert type(cd0) == int ms = ( cd0 * 1000 ) / SAMPLE_RATE s = ms / 1000 m = s / 60 ms %= 1000 s %= 60 return '%5d:%02d.%03d' % (m, s, ms) def Gen_Header(offair_out): offair_out.write("RIFF----WAVEfmt " "\x10\x00\x00\x00\x01\x00\x02\x00") for x in (SAMPLE_RATE, SAMPLE_RATE * 4): for y in xrange(0, 32, 8): offair_out.write(chr(( int(x) >> y ) & 0xff)) offair_out.write("\x04\x00\x10\x00data----") patch2 = offair_out.tell() - 4 return patch2 def Patch_Header(offair_out, patch2): for (p, x) in [ (4, offair_out.tell() - 8), (patch2, offair_out.tell() - 44) ]: offair_out.seek(p, 0) for y in xrange(0, 32, 8): offair_out.write(chr(( int(x) >> y ) & 0xff)) if ( __name__ == "__main__" ): sys.exit(Main())