From 82b5d189ad384d46f5d343e850063c2514251bcd Mon Sep 17 00:00:00 2001
From: Dongmao Zhang <deanraccoon@gmail.com>
Date: Tue, 9 Jul 2013 09:52:35 +0800
Subject: [PATCH] Add new PostProcessor:FlvMergeFile, this is used for website
 such as http://www.youku.com which split video into several parts

---
 youtube_dl/PostProcessor.py   |  19 ++
 youtube_dl/__init__.py        |   8 +-
 youtube_dl/extractor/youku.py |   4 +
 youtube_dl/flvconcat.py       | 489 ++++++++++++++++++++++++++++++++++
 4 files changed, 518 insertions(+), 2 deletions(-)
 create mode 100644 youtube_dl/flvconcat.py

diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index 8c5e53991..e2e0b1215 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -231,3 +231,22 @@ class FFmpegVideoConvertor(FFmpegPostProcessor):
         information['format'] = self._preferedformat
         information['ext'] = self._preferedformat
         return False,information
+
+
+class FlvFileMerge(PostProcessor):
+    def __init__(self):
+        self.input_files = []
+    def run(self, information):
+        self.input_files.append(information.get('filepath').encode('utf8'))
+        file_format =  information.get('format')
+        if information.get('last') and (file_format == "flv" or file_format == "f4v"):
+            try:
+                merge_title = information.get('title').encode('utf8') + "-MERGED.flv"
+            except:
+                merge_title = "merged.flv"
+            import flvconcat
+            self._downloader.to_screen(u'Start to merge files')
+            ret = flvconcat.concat_flv(self.input_files, merge_title)
+        return True, information
+
+
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index db63d0adb..e9b23f9a6 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -320,6 +320,8 @@ def parseOpts(overrideArguments=None):
             help='keeps the video file on disk after the post-processing; the video is erased by default')
     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
             help='do not overwrite post-processed files; the post-processed files are overwritten by default')
+    postproc.add_option('-m', '--merge-video', action='store_true', dest='mergevideo', default=False,
+            help='video files in some sites(such as www.youku.com) are split, use this option to merge files')
 
 
     parser.add_option_group(general)
@@ -343,7 +345,7 @@ def parseOpts(overrideArguments=None):
             userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
         systemConf = _readOptions('/etc/youtube-dl.conf')
         userConf = _readOptions(userConfFile)
-        commandLineConf = sys.argv[1:] 
+        commandLineConf = sys.argv[1:]
         argv = systemConf + userConf + commandLineConf
         opts, args = parser.parse_args(argv)
         if opts.verbose:
@@ -377,7 +379,7 @@ def _real_main(argv=None):
     # Set user agent
     if opts.user_agent is not None:
         std_headers['User-Agent'] = opts.user_agent
-    
+
     # Set referer
     if opts.referer is not None:
         std_headers['Referer'] = opts.referer
@@ -602,6 +604,8 @@ def _real_main(argv=None):
         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
     if opts.recodevideo:
         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
+    if opts.mergevideo:
+        ydl.add_post_processor(FlvFileMerge())
 
     # Update version
     if opts.update_self:
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index eb9829801..c553d14e6 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -113,4 +113,8 @@ class YoukuIE(InfoExtractor):
             }
             files_info.append(info)
 
+        #add an indicator to tell the last video
+        if len(files_info) > 1:
+            files_info[-1]['last'] = True
+
         return files_info
diff --git a/youtube_dl/flvconcat.py b/youtube_dl/flvconcat.py
new file mode 100644
index 000000000..5d41eb26f
--- /dev/null
+++ b/youtube_dl/flvconcat.py
@@ -0,0 +1,489 @@
+#!/bin/env python
+#-*- coding: utf-8 -*-
+
+
+#use code from https://github.com/shinohane/FLVCopyCat
+#http://www.apache.org/licenses/LICENSE-2.0
+
+
+
+#Code for Binary Data Read
+import struct
+
+class EndOfFile(Exception):
+    pass
+
+# UI32
+def get_ui32(f):
+    try:
+        ret = struct.unpack(">I", f.read(4))[0]
+    except struct.error:
+        raise EndOfFile
+    return ret
+
+def make_ui32(num):
+    return struct.pack(">I", num)
+
+# SI32 extended
+def get_si32_extended(f):
+    # The last 8 bits are the high 8 bits of the whole number
+    # That's how Adobe likes it. Go figure...
+    low_high = f.read(4)
+    if len(low_high) < 4:
+        raise EndOfFile
+    combined = low_high[3] + low_high[:3]
+    return struct.unpack(">i", combined)[0]
+
+def make_si32_extended(num):
+    ret = struct.pack(">i", num)
+    return ret[1:] + ret[0]
+
+# UI24
+def get_ui24(f):
+    try:
+        high, low = struct.unpack(">BH", f.read(3))
+    except struct.error:
+        raise EndOfFile
+    ret = (high << 16) + low
+    return ret
+
+def make_ui24(num):
+    ret = struct.pack(">I", num)
+    return ret[1:]
+
+# UI16
+def get_ui16(f):
+    try:
+        ret = struct.unpack(">H", f.read(2))[0]
+    except struct.error:
+        raise EndOfFile
+    return ret
+
+def make_ui16(num):
+    return struct.pack(">H", num)
+
+# SI16
+def get_si16(f):
+    try:
+        ret = struct.unpack(">h", f.read(2))[0]
+    except struct.error:
+        raise EndOfFile
+    return ret
+
+def make_si16(num):
+    return struct.pack(">h", num)
+
+# UI8
+def get_ui8(f):
+    try:
+        ret = struct.unpack("B", f.read(1))[0]
+    except struct.error:
+        raise EndOfFile
+    return ret
+
+def make_ui8(num):
+    return struct.pack("B", num)
+
+# DOUBLE
+def get_double(f):
+    data = f.read(8)
+    try:
+        ret = struct.unpack(">d", data)[0]
+    except struct.error:
+        raise EndOfFile
+    return ret
+
+def make_double(num):
+    return struct.pack(">d", num)
+
+#ScriptDataString
+def get_sd_string(f):
+    size = get_ui16(f)
+    return f.read(size)
+
+def make_sd_string(string):
+    data = make_ui16(len(string))
+    data += string.encode()
+    return data
+
+def get_sd_long_string(f):
+    size = get_ui32(f)
+    return f.read(size)
+
+def make_sd_long_string(string):
+    data = make_ui32(len(string))
+    data += string.encode()
+    return data
+
+#ScriptDataDate
+from datetime import datetime
+import time
+
+def get_sd_date(f):
+    date = get_double(f)
+    f.read(2)
+    return datetime.fromtimestamp(date)
+
+def make_sd_date(date):
+    data = make_double(time.mktime(date.timetuple()))
+    data += mk_ui16(8)
+    return data
+
+#Code for analyse and join flv file
+import argparse, os
+import pprint
+import struct
+import sys
+
+
+#Open Input Files & Check File Headers
+
+def get_header(f):
+    sign = f.read(3)
+    if not str(sign) == 'FLV':
+        return {'error':'Unrecognized FLV Header'}
+    if get_ui8(f) != 1:
+        return {'error':'Unsupported File Version'}
+    flags = get_ui8(f)
+    if not flags in [5,4,1]:
+        return {'error':'Neither Video Nor Audio stream flag is set'}
+    return {'signature':sign,
+            'offset':get_ui32(f),
+            'flags':flags,
+            'error':None
+            }
+
+
+#Load Metadata From files
+import io
+
+class ScriptObject(object):
+    def __init__(self, f, size):
+        self.data = f.read(size)
+        #Parse the metaData
+        script = io.BytesIO(self.data)
+        if script.read(1) != '\2':
+            return
+        self.name = get_sd_string(script)
+        #print self.name
+        if(self.name != r"onMetaData"):
+            return
+        self.valuetype = get_ui8(script)
+        if(self.valuetype != 8):
+            return
+        string = str(self.data)
+        #Metadata the script can be recognized
+        self.metadata = {
+                'creator'               : None, #Static
+                'metadatacreator'       : None, #Static
+                'hasKeyframes'          : None, #Static
+                'hasVideo'              : None, #Static
+                'hasAudio'              : None, #Static
+                'hasMetadata'           : None, #Static
+                'canSeekToEnd'          : None, #Static
+                'duration'              : None, #Sum_Up
+                'videosize'             : None, #Sum_Up
+                'framerate'             : None, #Static
+                'videodatarate'         : None, #Static
+                'videocodecid'          : None, #Static
+                'width'                 : None, #Static
+                'height'                : None, #Static
+                'audiosize'             : None, #Sum_Up
+                'audiodatarate'         : None, #Static
+                'audiocodecid'          : None, #Static
+                'audiosamplerate'       : None, #Static
+                'audiosamplesize'       : None, #Static
+                'stereo'                : None, #Static
+                'filesize'              : None, #Final Check
+                'datasize'              : None, #Accumulation
+                'lasttimestamp'         : None, #Accumulation
+                'lastkeyframetimestamp' : None, #Accumulation
+                'lastkeyframelocation'  : None  #Accumulation
+                }
+        funcs = {
+                0 : get_double,
+                1 : get_ui8,
+                2 : get_sd_string,
+                3 : None,
+                4 : get_sd_string,
+                7 : get_ui16,
+                8 : None,
+                10: None,
+                11: get_sd_date,
+                12: get_sd_long_string
+                }
+        for i in self.metadata.keys():
+            position = string.find(i)
+            if position == -1:
+                continue
+            position += len(i)
+            script.seek(position)
+            fieldtype = get_ui8(script)
+            func = funcs[fieldtype]
+            if callable(func):
+                self.metadata[i] = (fieldtype, func(script))
+
+        self.metadata['metadatacreator'] = \
+            (2,r"FLVCopyCat - Shinohane".encode())
+
+    def generate(self):
+        funcs = {
+                0 : make_double,
+                1 : make_ui8,
+                2 : make_sd_string,
+                3 : None,
+                4 : make_sd_string,
+                7 : make_ui16,
+                8 : None,
+                10: None,
+                11: make_sd_date,
+                12: make_sd_long_string
+                }
+        out = io.BytesIO()
+        out.write(make_ui8(2)) #Object Type: String
+        out.write(make_sd_string("onMetaData"))
+        out.write(make_ui8(8)) #Object Type: ECMA Array
+        out.write(make_ui32(len(self.metadata))) #Array Size
+        for k,v in self.metadata.items():
+            if not v: continue
+            out.write(make_sd_string(k))
+            out.write(make_ui8(v[0]))
+            out.write(funcs[v[0]](v[1]))
+        out.write(make_ui24(9)) #End flag for ECMAArray
+        out.write(make_ui24(9)) #End flag for DataObject
+        out.seek(0)
+        self.data = out.read()
+        out.close()
+        #pprint.PrettyPrinter().pprint(self.metadata)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __eq__(self, so):
+        chklst = [
+                'hasKeyframes',
+                'hasVideo',
+                'hasAudio',
+                'hasMetadata',
+                'canSeekToEnd',
+                'framerate',
+                'videodatarate',
+                'videocodecid',
+                'width',
+                'height',
+                'audiodatarate',
+                'audiocodecid',
+                'audiosamplerate',
+                'audiosamplesize',
+                'stereo'
+                ]
+        for i in chklst:
+            if self.metadata[i] != so.metadata[i]:
+                print ">>>>>>>%s mismatch.\n%s\n-------\n%s" % \
+                        (i,self.metadata[i],so.metadata[i])
+                return False
+        return True
+
+    def __add__(self, so):
+        acculst = {
+                'duration'  : 0,
+                'videosize' : 0,
+                'audiosize' : 0,
+                'datasize' : 0,
+                }
+        for i in acculst:
+            if not self.metadata[i]: continue
+            self.metadata[i] = (self.metadata[i][0],so.metadata[i][1] + self.metadata[i][1])
+        return self
+
+    def write(self, f):
+        #Do not write the script object to file
+        #Use generate then write
+        f.write(self.data)
+
+class VideoTag(object):
+    def __init__(self, f, size):
+        self.data = f.read(size)
+        self.frametype = struct.unpack("B",self.data[0])[0] >> 4
+    def write(self, f):
+        f.write(self.data)
+
+class AudioTag(object):
+    def __init__(self, f, size):
+        self.data = f.read(size)
+    def write(self, f):
+        f.write(self.data)
+
+class FLVTag(object):
+    def __init__(self, f):
+        #Read but not use this value
+        self.pts = get_ui32(f)
+        self.tagtype = get_ui8(f)
+        self.datasize = get_ui24(f)
+        self.timestamp = get_si32_extended(f)
+        self.streamid = get_ui24(f)
+        if self.tagtype == 18:
+            #Script Type Tag
+            self.data = ScriptObject(f, self.datasize)
+            self.data.generate()
+            #Recalc the datasize
+            self.datasize = len(self.data.data)
+        elif self.tagtype == 9:
+            #Video Type Tag
+            self.data = VideoTag(f, self.datasize)
+        elif self.tagtype == 8:
+            #Audio Type Tag
+            self.data = AudioTag(f, self.datasize)
+        self.tagsize = self.datasize + 14
+
+    def write(self, f):
+        #ignore previous tag size but write current size to file
+        #f.write(make_ui32(self.pts))
+        f.write(make_ui8(self.tagtype))
+        f.write(make_ui24(self.datasize))
+        f.write(make_si32_extended(self.timestamp))
+        f.write(make_ui24(self.streamid))
+        self.data.write(f)
+        f.write(make_ui32(self.tagsize))
+
+
+def concat_flv(inputs_files, output):
+
+  if len(inputs_files) == 0:
+    print "no file list"
+    return -1
+
+  header = None
+  metadata = None
+
+  print "Header Check =>",
+  fs = []
+
+  for i in inputs_files:
+      fs.append(open(i, 'rb'))
+      h = get_header(fs[len(fs)-1])
+      #print h
+      if not header:
+          header = h
+      if h['error'] != None:
+          print h['error']
+          return -1
+      if h != header:
+          print '%s : video type vary from others' % i
+          return -1
+
+  print "OK!"
+
+
+
+  if os.path.exists(output):
+        os.remove(output)
+
+  #Open output file
+  fo = open(output, 'wb')
+
+  #Build FLV Header For Output File
+  fo.write(header['signature'])
+  fo.write(make_ui8(1))
+  fo.write(make_ui8(header['flags']))
+  fo.write(make_ui32(header['offset']))
+  #PreviousTagSize 0
+  fo.write(make_ui32(0))
+
+
+  metadata = None
+  metaposition = 13 #Default metadata position
+  for f in fs:
+      tag = FLVTag(f)
+      if type(metadata) != FLVTag and type(tag.data) == ScriptObject:
+          metadata = tag
+          if metaposition != fo.tell():
+              metaposition = fo.tell()
+              print ("Warning: Metadata position %d in file is not default "+\
+                     "position") % metaposition
+          metadata.write(fo)
+      elif type(tag.data) == ScriptObject:
+          if tag.data != metadata.data:
+              print "<<<<<<<Media type mismatch"
+              print tag.data.metadata
+              exit(-1)
+          metadata.data += tag.data
+
+  datasize = float(metadata.datasize)
+  lasttimestamp = float(0.0)
+  lastkeyframetimestamp = float(0.0)
+  lastkeyframelocation = float(0.0)
+  timestampbase = float(0.0)
+  framegap = 1000 / metadata.data.metadata["framerate"][1]
+
+  for f in fs:
+      try:
+          while True:
+              tag = FLVTag(f)
+              datasize += tag.datasize
+              tag.timestamp += timestampbase
+              if lasttimestamp < tag.timestamp:
+                  lasttimestamp = tag.timestamp
+              if type(tag.data) == VideoTag:
+                  if tag.data.frametype == 5:
+                      continue
+                  if tag.data.frametype == 1 or tag.data.frametype == 4:
+                      #Is Keyframe
+                      if lastkeyframetimestamp < tag.timestamp:
+                          lastkeyframetimestamp = tag.timestamp
+                          lastkeyframelocation = float(fo.tell() + 1)
+              tag.write(fo)
+      except Exception,e:
+          if metadata.data.metadata['datasize']:
+              print >>sys.stderr,datasize,"/",metadata.data.metadata['datasize'][1],"\r",
+          else:
+              print >>sys.stderr,datasize,"/?","\r",
+          pass
+      finally:
+          f.close()
+      timestampbase = lasttimestamp + framegap
+
+  fo.flush()
+  #Finally Update the metadata
+  if type(metadata) == FLVTag:
+
+      #Update values accumulate while iterate files
+      if metadata.data.metadata['datasize']:
+          metadata.data.metadata['datasize'] = \
+              (0, datasize)
+      if metadata.data.metadata['lasttimestamp']:
+          metadata.data.metadata['lasttimestamp'] = \
+              (0, lasttimestamp)
+      if metadata.data.metadata['lastkeyframetimestamp']:
+          metadata.data.metadata['lastkeyframetimestamp'] = \
+              (0, lastkeyframetimestamp)
+      if metadata.data.metadata['lastkeyframelocation']:
+          metadata.data.metadata['lastkeyframelocation'] = \
+              (0, lastkeyframelocation)
+      #Finally check the `filesize' field
+      #Total file size minus the size of one additional prevTagSz field
+      if metadata.data.metadata['filesize']:
+          metadata.data.metadata['filesize'] = \
+              (0, float(os.path.getsize(fo.name) - 4))
+      fo.seek(metaposition)
+      metadata.data.generate()
+      metadata.write(fo)
+
+  for f in fs:
+      f.close()
+
+  fo.close()
+
+  return 0
+
+  #Open use ffmpeg fix a problem of original output
+  # os.system(
+  #     'ffmpeg -f flv -i "%s" -f flv -acodec copy -vcodec copy "%s" && rm "%s"' % \
+  #         (output,args.output,output))
+
+
+  #End the statics
+
+if __name__ == "__main__":
+  concat_flv(["00.flv", "01.flv"], "output.flv")
+