Pbox2avi update!
Thanks to Phil’s advice on how to use exiftool more effectively, I’ve decided to update the script so it’s a little shorter.
Download:pbox2avi.py
# python script to extract lecture slides and mp3's from UCSF lecture files for conversion #requires swfextract import commands,sys,re def parse(filename): #check filename for .swf extension if not filename.find('.swf'): # not a comprehensive check return #open file status, output = commands.getstatusoutput("swfextract %s" % (filename,)) print output #find "JPEGs: ID(s)" slide_identifier = "JPEGs: ID(s)" start=output.find(slide_identifier) slide_extract=[] if start: start += len(slide_identifier)+1 slide_end = output.find("[-s]",start) print start,slide_end slide_extract=output[start:slide_end-2].replace(" ","") #find "Sounds: ID(s)" sound_identifier = "Sounds: ID(s)" start=output.find(sound_identifier) sound_extract=[] if start: start += len(sound_identifier)+1 sound_end = output.find("[-f]",start) sound_extract=output[start:sound_end-2].replace(" ","") # now extract all the data print "swfextract %s -P -j %s -s %s" % (filename,slide_extract,sound_extract) status, output = commands.getstatusoutput("swfextract %s -P -j %s -s %s" % (filename,slide_extract,sound_extract)) return slide_extract,sound_extract def create(slide_extract,sound_extract,outputfile): #first throw out every other picture because second picture is always a thumbnail slides=slide_extract.replace(","," ").split(" "); slides=[slides[i] for i in range(len(slides)) if i%2 ==0] sounds=sound_extract.replace(","," ").split(" "); print "Number of slides: %s, number of mp3's: %s" % (len(slides),len(sounds)) for i in range(len(sounds)): vidcmd="jpeg2yuv -n %d -I p -f 2 -j %s | yuv2lav -o temp.avi" % (int(round(2*10*(duration("sound%s.mp3" % (sounds[i],))))/10),"pic%s.jpg" % (slides[i],)) # print vidcmd status,output=commands.getstatusoutput(vidcmd) sndcmd="mencoder temp.avi -o slide%d.avi -ovc lavc -lavcopts vcodec=msmpeg4 -oac copy -audiofile sound%s.mp3" %(i,sounds[i]) # print sndcmd commands.getstatusoutput(sndcmd) print "Finished processing slide %d" % (i,) print "Cleaning up..." # remove unnecessary data commands.getstatusoutput("rm *.jpg *.mp3 temp.avi") slidevids=["slide%s.avi" % (i,) for i in range(len(slides))] slidevids=" ".join(slidevids) print "Combining slides..." # combine all of them commands.getstatusoutput("mencoder -oac copy -ovc copy %s -o %s" % (slidevids,outputfile)) print "Cleaning up..." commands.getstatusoutput("rm slide*.avi") def duration(mp3): # find duration of mp3 using exiftool cmd="exiftool %s -Duration -n -S -s" % (mp3,) status,output = commands.getstatusoutput(cmd) return float(output) if __name__=="__main__": slides,sounds=parse(sys.argv[1]) if len(sys.argv)==2: create(slides,sounds,"output.avi") elif len(sys.argv)==3: create(slides,sounds,sys.argv[2]) |