Merge remote-tracking branch 'naglis/sockshare'
authorPhilipp Hagemeister <phihag@phihag.de>
Mon, 21 Jul 2014 11:24:15 +0000 (13:24 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Mon, 21 Jul 2014 11:24:15 +0000 (13:24 +0200)
Conflicts:
youtube_dl/extractor/__init__.py

49 files changed:
README.md
test/helper.py
test/swftests/.gitignore [new file with mode: 0644]
test/swftests/ArrayAccess.as [new file with mode: 0644]
test/swftests/ClassCall.as [new file with mode: 0644]
test/swftests/ClassConstruction.as [new file with mode: 0644]
test/swftests/LocalVars.as [new file with mode: 0644]
test/swftests/PrivateCall.as [new file with mode: 0644]
test/swftests/StaticAssignment.as [new file with mode: 0644]
test/swftests/StaticRetrieval.as [new file with mode: 0644]
test/test_playlists.py
test/test_swfinterp.py [new file with mode: 0644]
test/test_youtube_signature.py
youtube_dl/YoutubeDL.py
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/adultswim.py [new file with mode: 0644]
youtube_dl/extractor/allocine.py
youtube_dl/extractor/ard.py
youtube_dl/extractor/chilloutzone.py
youtube_dl/extractor/cnet.py
youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/common.py
youtube_dl/extractor/cracked.py [new file with mode: 0644]
youtube_dl/extractor/dfb.py [new file with mode: 0644]
youtube_dl/extractor/dropbox.py
youtube_dl/extractor/firedrive.py
youtube_dl/extractor/francetv.py
youtube_dl/extractor/funnyordie.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/mlb.py [new file with mode: 0644]
youtube_dl/extractor/npo.py
youtube_dl/extractor/redtube.py
youtube_dl/extractor/rtbf.py
youtube_dl/extractor/rtve.py
youtube_dl/extractor/sapo.py [new file with mode: 0644]
youtube_dl/extractor/savefrom.py
youtube_dl/extractor/snotr.py [new file with mode: 0644]
youtube_dl/extractor/steam.py
youtube_dl/extractor/tagesschau.py
youtube_dl/extractor/teachertube.py
youtube_dl/extractor/tenplay.py
youtube_dl/extractor/vimeo.py
youtube_dl/extractor/wdr.py
youtube_dl/extractor/youtube.py
youtube_dl/swfinterp.py [new file with mode: 0644]
youtube_dl/utils.py
youtube_dl/version.py

index bc5e0f76df2759b004be225d4419d021a5bcf3a0..fb2f776c9a9395a5aa9629ddcc347ebe95d74804 100644 (file)
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ To install it right away for all UNIX users (Linux, OS X, etc.), type:
 
 If you do not have curl, you can alternatively use a recent wget:
 
-    sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl
+    sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
     sudo chmod a+x /usr/local/bin/youtube-dl
 
 Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
index 230d2bd67ab06b4db552bff30c5620f83673ca93..84b16f770edb3a41512504bee614ea158f4fcd4f 100644 (file)
@@ -148,3 +148,10 @@ def assertRegexpMatches(self, text, regexp, msg=None):
             else:
                 msg = note + ', ' + msg
             self.assertTrue(m, msg)
+
+
+def assertGreaterEqual(self, got, expected, msg=None):
+    if not (got >= expected):
+        if msg is None:
+            msg = '%r not greater than or equal to %r' % (got, expected)
+        self.assertTrue(got >= expected, msg)
diff --git a/test/swftests/.gitignore b/test/swftests/.gitignore
new file mode 100644 (file)
index 0000000..da97ff7
--- /dev/null
@@ -0,0 +1 @@
+*.swf
diff --git a/test/swftests/ArrayAccess.as b/test/swftests/ArrayAccess.as
new file mode 100644 (file)
index 0000000..e22caa3
--- /dev/null
@@ -0,0 +1,19 @@
+// input: [["a", "b", "c", "d"]]
+// output: ["c", "b", "a", "d"]
+
+package {
+public class ArrayAccess {
+    public static function main(ar:Array):Array {
+       var aa:ArrayAccess = new ArrayAccess();
+       return aa.f(ar, 2);
+    }
+
+    private function f(ar:Array, num:Number):Array{
+        var x:String = ar[0];
+        var y:String = ar[num % ar.length];
+        ar[0] = y;
+        ar[num] = x;
+        return ar;
+    }
+}
+}
diff --git a/test/swftests/ClassCall.as b/test/swftests/ClassCall.as
new file mode 100644 (file)
index 0000000..aef58da
--- /dev/null
@@ -0,0 +1,17 @@
+// input: []
+// output: 121
+
+package {
+public class ClassCall {
+    public static function main():int{
+       var f:OtherClass = new OtherClass();
+        return f.func(100,20);
+    }
+}
+}
+
+class OtherClass {
+       public function func(x: int, y: int):int {
+               return x+y+1;
+       }
+}
diff --git a/test/swftests/ClassConstruction.as b/test/swftests/ClassConstruction.as
new file mode 100644 (file)
index 0000000..436479f
--- /dev/null
@@ -0,0 +1,15 @@
+// input: []
+// output: 0
+
+package {
+public class ClassConstruction {
+    public static function main():int{
+       var f:Foo = new Foo();
+        return 0;
+    }
+}
+}
+
+class Foo {
+
+}
diff --git a/test/swftests/LocalVars.as b/test/swftests/LocalVars.as
new file mode 100644 (file)
index 0000000..b2911a9
--- /dev/null
@@ -0,0 +1,13 @@
+// input: [1, 2]
+// output: 3
+
+package {
+public class LocalVars {
+    public static function main(a:int, b:int):int{
+        var c:int = a + b + b;
+        var d:int = c - b;
+        var e:int = d;
+        return e;
+    }
+}
+}
diff --git a/test/swftests/PrivateCall.as b/test/swftests/PrivateCall.as
new file mode 100644 (file)
index 0000000..f1c110a
--- /dev/null
@@ -0,0 +1,21 @@
+// input: []
+// output: 9
+
+package {
+public class PrivateCall {
+    public static function main():int{
+       var f:OtherClass = new OtherClass();
+        return f.func();
+    }
+}
+}
+
+class OtherClass {
+       private function pf():int {
+               return 9;
+       }
+
+       public function func():int {
+               return this.pf();
+       }
+}
diff --git a/test/swftests/StaticAssignment.as b/test/swftests/StaticAssignment.as
new file mode 100644 (file)
index 0000000..b061c21
--- /dev/null
@@ -0,0 +1,13 @@
+// input: [1]
+// output: 1
+
+package {
+public class StaticAssignment {
+       public static var v:int;
+
+    public static function main(a:int):int{
+        v = a;
+        return v;
+    }
+}
+}
diff --git a/test/swftests/StaticRetrieval.as b/test/swftests/StaticRetrieval.as
new file mode 100644 (file)
index 0000000..c8352d8
--- /dev/null
@@ -0,0 +1,16 @@
+// input: []
+// output: 1
+
+package {
+public class StaticRetrieval {
+       public static var v:int;
+
+    public static function main():int{
+        if (v) {
+               return 0;
+        } else {
+               return 1;
+        }
+    }
+}
+}
index 1a38a667b1391ab744fa88e91d79e4eea742bbf6..4789200e9f2450bc5cd10e4b53e292677b3ad1df 100644 (file)
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from test.helper import (
     assertRegexpMatches,
+    assertGreaterEqual,
     expect_info_dict,
     FakeYDL,
 )
@@ -71,8 +72,8 @@ class TestPlaylists(unittest.TestCase):
         ie = DailymotionUserIE(dl)
         result = ie.extract('https://www.dailymotion.com/user/nqtv')
         self.assertIsPlaylist(result)
+        assertGreaterEqual(self, len(result['entries']), 100)
         self.assertEqual(result['title'], 'Rémi Gaillard')
-        self.assertTrue(len(result['entries']) >= 100)
 
     def test_vimeo_channel(self):
         dl = FakeYDL()
@@ -111,7 +112,7 @@ class TestPlaylists(unittest.TestCase):
         ie = VineUserIE(dl)
         result = ie.extract('https://vine.co/Visa')
         self.assertIsPlaylist(result)
-        self.assertTrue(len(result['entries']) >= 47)
+        assertGreaterEqual(self, len(result['entries']), 47)
 
     def test_ustream_channel(self):
         dl = FakeYDL()
@@ -119,7 +120,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://www.ustream.tv/channel/channeljapan')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], '10874166')
-        self.assertTrue(len(result['entries']) >= 54)
+        assertGreaterEqual(self, len(result['entries']), 54)
 
     def test_soundcloud_set(self):
         dl = FakeYDL()
@@ -127,7 +128,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep')
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], 'The Royal Concept EP')
-        self.assertTrue(len(result['entries']) >= 6)
+        assertGreaterEqual(self, len(result['entries']), 6)
 
     def test_soundcloud_user(self):
         dl = FakeYDL()
@@ -135,7 +136,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('https://soundcloud.com/the-concept-band')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], '9615865')
-        self.assertTrue(len(result['entries']) >= 12)
+        assertGreaterEqual(self, len(result['entries']), 12)
 
     def test_soundcloud_likes(self):
         dl = FakeYDL()
@@ -143,7 +144,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('https://soundcloud.com/the-concept-band/likes')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], '9615865')
-        self.assertTrue(len(result['entries']) >= 1)
+        assertGreaterEqual(self, len(result['entries']), 1)
 
     def test_soundcloud_playlist(self):
         dl = FakeYDL()
@@ -162,7 +163,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://new.livestream.com/tedx/cityenglish')
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], 'TEDCity2.0 (English)')
-        self.assertTrue(len(result['entries']) >= 4)
+        assertGreaterEqual(self, len(result['entries']), 4)
 
     def test_livestreamoriginal_folder(self):
         dl = FakeYDL()
@@ -170,7 +171,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
-        self.assertTrue(len(result['entries']) >= 28)
+        assertGreaterEqual(self, len(result['entries']), 28)
 
     def test_nhl_videocenter(self):
         dl = FakeYDL()
@@ -187,7 +188,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://bambuser.com/channel/pixelversity')
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], 'pixelversity')
-        self.assertTrue(len(result['entries']) >= 60)
+        assertGreaterEqual(self, len(result['entries']), 60)
 
     def test_bandcamp_album(self):
         dl = FakeYDL()
@@ -195,7 +196,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep')
         self.assertIsPlaylist(result)
         self.assertEqual(result['title'], 'Nightmare Night EP')
-        self.assertTrue(len(result['entries']) >= 4)
+        assertGreaterEqual(self, len(result['entries']), 4)
         
     def test_smotri_community(self):
         dl = FakeYDL()
@@ -204,7 +205,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'kommuna')
         self.assertEqual(result['title'], 'КПРФ')
-        self.assertTrue(len(result['entries']) >= 4)
+        assertGreaterEqual(self, len(result['entries']), 4)
         
     def test_smotri_user(self):
         dl = FakeYDL()
@@ -213,7 +214,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'inspector')
         self.assertEqual(result['title'], 'Inspector')
-        self.assertTrue(len(result['entries']) >= 9)
+        assertGreaterEqual(self, len(result['entries']), 9)
 
     def test_AcademicEarthCourse(self):
         dl = FakeYDL()
@@ -232,7 +233,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'dvoe_iz_lartsa')
         self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)')
-        self.assertTrue(len(result['entries']) >= 24)
+        assertGreaterEqual(self, len(result['entries']), 24)
 
     def test_ivi_compilation_season(self):
         dl = FakeYDL()
@@ -241,7 +242,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1')
         self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон')
-        self.assertTrue(len(result['entries']) >= 12)
+        assertGreaterEqual(self, len(result['entries']), 12)
         
     def test_imdb_list(self):
         dl = FakeYDL()
@@ -260,7 +261,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], 'cryptography')
         self.assertEqual(result['title'], 'Journey into cryptography')
         self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?')
-        self.assertTrue(len(result['entries']) >= 3)
+        assertGreaterEqual(self, len(result['entries']), 3)
 
     def test_EveryonesMixtape(self):
         dl = FakeYDL()
@@ -277,7 +278,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://rutube.ru/tags/video/1800/')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], '1800')
-        self.assertTrue(len(result['entries']) >= 68)
+        assertGreaterEqual(self, len(result['entries']), 68)
 
     def test_rutube_person(self):
         dl = FakeYDL()
@@ -285,7 +286,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://rutube.ru/video/person/313878/')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], '313878')
-        self.assertTrue(len(result['entries']) >= 37)
+        assertGreaterEqual(self, len(result['entries']), 37)
 
     def test_multiple_brightcove_videos(self):
         # https://github.com/rg3/youtube-dl/issues/2283
@@ -322,7 +323,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], '10')
         self.assertEqual(result['title'], 'Who are the hackers?')
-        self.assertTrue(len(result['entries']) >= 6)
+        assertGreaterEqual(self, len(result['entries']), 6)
 
     def test_toypics_user(self):
         dl = FakeYDL()
@@ -330,7 +331,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://videos.toypics.net/Mikey')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'Mikey')
-        self.assertTrue(len(result['entries']) >= 17)
+        assertGreaterEqual(self, len(result['entries']), 17)
 
     def test_xtube_user(self):
         dl = FakeYDL()
@@ -338,7 +339,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'greenshowers')
-        self.assertTrue(len(result['entries']) >= 155)
+        assertGreaterEqual(self, len(result['entries']), 155)
 
     def test_InstagramUser(self):
         dl = FakeYDL()
@@ -346,7 +347,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://instagram.com/porsche')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'porsche')
-        self.assertTrue(len(result['entries']) >= 2)
+        assertGreaterEqual(self, len(result['entries']), 2)
         test_video = next(
             e for e in result['entries']
             if e['id'] == '614605558512799803_462752227')
@@ -385,7 +386,7 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['id'], '152147')
         self.assertEqual(
             result['title'], 'Brace Yourself - Today\'s Weirdest News')
-        self.assertTrue(len(result['entries']) >= 10)
+        assertGreaterEqual(self, len(result['entries']), 10)
 
     def test_TeacherTubeUser(self):
         dl = FakeYDL()
@@ -393,7 +394,7 @@ class TestPlaylists(unittest.TestCase):
         result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2')
         self.assertIsPlaylist(result)
         self.assertEqual(result['id'], 'rbhagwati2')
-        self.assertTrue(len(result['entries']) >= 179)
+        assertGreaterEqual(self, len(result['entries']), 179)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
new file mode 100644 (file)
index 0000000..b42cd74
--- /dev/null
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import errno
+import io
+import json
+import re
+import subprocess
+
+from youtube_dl.swfinterp import SWFInterpreter
+
+
+TEST_DIR = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), 'swftests')
+
+
+class TestSWFInterpreter(unittest.TestCase):
+    pass
+
+
+def _make_testfunc(testfile):
+    m = re.match(r'^(.*)\.(as)$', testfile)
+    if not m:
+        return
+    test_id = m.group(1)
+
+    def test_func(self):
+        as_file = os.path.join(TEST_DIR, testfile)
+        swf_file = os.path.join(TEST_DIR, test_id + '.swf')
+        if ((not os.path.exists(swf_file))
+                or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+            # Recompile
+            try:
+                subprocess.check_call(['mxmlc', '-output', swf_file, as_file])
+            except OSError as ose:
+                if ose.errno == errno.ENOENT:
+                    print('mxmlc not found! Skipping test.')
+                    return
+                raise
+
+        with open(swf_file, 'rb') as swf_f:
+            swf_content = swf_f.read()
+        swfi = SWFInterpreter(swf_content)
+
+        with io.open(as_file, 'r', encoding='utf-8') as as_f:
+            as_content = as_f.read()
+
+        def _find_spec(key):
+            m = re.search(
+                r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content)
+            if not m:
+                raise ValueError('Cannot find %s in %s' % (key, testfile))
+            return json.loads(m.group(1))
+
+        input_args = _find_spec('input')
+        output = _find_spec('output')
+
+        swf_class = swfi.extract_class(test_id)
+        func = swfi.extract_function(swf_class, 'main')
+        res = func(input_args)
+        self.assertEqual(res, output)
+
+    test_func.__name__ = str('test_swf_' + test_id)
+    setattr(TestSWFInterpreter, test_func.__name__, test_func)
+
+
+for testfile in os.listdir(TEST_DIR):
+    _make_testfunc(testfile)
+
+if __name__ == '__main__':
+    unittest.main()
index d95533959481df9b458f56c14d4857d3c5230252..e8a67c4c0141efad9d6475af7835a8ee2fd95697 100644 (file)
@@ -45,6 +45,18 @@ _TESTS = [
         u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
         u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
     ),
+    (
+        u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf',
+        u'swf',
+        86,
+        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?'
+    ),
+    (
+        u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf',
+        u'swf',
+        u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9',
+        u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F'
+    ),
 ]
 
 
@@ -57,12 +69,12 @@ class TestSignature(unittest.TestCase):
 
 
 def make_tfunc(url, stype, sig_input, expected_sig):
-    basename = url.rpartition('/')[2]
-    m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
-    assert m, '%r should follow URL format' % basename
+    m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url)
+    assert m, '%r should follow URL format' % url
     test_id = m.group(1)
 
     def test_func(self):
+        basename = 'player-%s.%s' % (test_id, stype)
         fn = os.path.join(self.TESTDATA_DIR, basename)
 
         if not os.path.exists(fn):
index 3dff723b81fff6947ac8cf08c62a275843f359f9..686988fe5bdaa71f8dd346b3b90795d213b92b7c 100755 (executable)
@@ -1197,6 +1197,10 @@ class YoutubeDL(object):
             if res:
                 res += ', '
             res += format_bytes(fdict['filesize'])
+        elif fdict.get('filesize_approx') is not None:
+            if res:
+                res += ', '
+            res += '~' + format_bytes(fdict['filesize_approx'])
         return res
 
     def list_formats(self, info_dict):
index 5e16a549177255a7bca62292d3ca87233835269c..0e7b9ddaff658fdeacaa0c5d45f85740f145d43a 100644 (file)
@@ -64,17 +64,17 @@ __authors__  = (
     'Adam Malcontenti-Wilson',
     'Tobias Bell',
     'Naglis Jonaitis',
+    'Charles Chen',
+    'Hassaan Ali',
 )
 
 __license__ = 'Public Domain'
 
 import codecs
 import io
-import locale
 import optparse
 import os
 import random
-import re
 import shlex
 import sys
 
index f3575b6c9730fb20912dc409ec04855044925335..8d63d9281d68d496b65d025666fd4d8a2c863b06 100644 (file)
@@ -1,5 +1,6 @@
 from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
+from .adultswim import AdultSwimIE
 from .aftonbladet import AftonbladetIE
 from .anitube import AnitubeIE
 from .aol import AolIE
@@ -52,6 +53,7 @@ from .cnn import (
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
 from .condenast import CondeNastIE
+from .cracked import CrackedIE
 from .criterion import CriterionIE
 from .crunchyroll import CrunchyrollIE
 from .cspan import CSpanIE
@@ -62,6 +64,7 @@ from .dailymotion import (
     DailymotionUserIE,
 )
 from .daum import DaumIE
+from .dfb import DFBIE
 from .dotsub import DotsubIE
 from .dreisat import DreiSatIE
 from .drtv import DRTVIE
@@ -170,6 +173,7 @@ from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mixcloud import MixcloudIE
+from .mlb import MLBIE
 from .mpora import MporaIE
 from .mofosex import MofosexIE
 from .mooshare import MooshareIE
@@ -249,6 +253,7 @@ from .rutube import (
     RutubePersonIE,
 )
 from .rutv import RUTVIE
+from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
@@ -262,6 +267,7 @@ from .smotri import (
     SmotriUserIE,
     SmotriBroadcastIE,
 )
+from .snotr import SnotrIE
 from .sockshare import SockshareIE
 from .sohu import SohuIE
 from .soundcloud import (
@@ -397,6 +403,7 @@ from .youtube import (
     YoutubeUserIE,
     YoutubeWatchLaterIE,
 )
+
 from .zdf import ZDFIE
 
 
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
new file mode 100644 (file)
index 0000000..a00bfcb
--- /dev/null
@@ -0,0 +1,139 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+class AdultSwimIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
+    _TEST = {
+        'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
+        'playlist': [
+            {
+                'md5': '4da359ec73b58df4575cd01a610ba5dc',
+                'info_dict': {
+                    'id': '8a250ba1450996e901453d7f02ca02f5',
+                    'ext': 'flv',
+                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
+                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+                    'uploader': 'Rick and Morty',
+                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+                }
+            },
+            {
+                'md5': 'ffbdf55af9331c509d95350bd0cc1819',
+                'info_dict': {
+                    'id': '8a250ba1450996e901453d7f4bd102f6',
+                    'ext': 'flv',
+                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
+                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+                    'uploader': 'Rick and Morty',
+                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+                }
+            },
+            {
+                'md5': 'b92409635540304280b4b6c36bd14a0a',
+                'info_dict': {
+                    'id': '8a250ba1450996e901453d7fa73c02f7',
+                    'ext': 'flv',
+                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
+                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+                    'uploader': 'Rick and Morty',
+                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+                }
+            },
+            {
+                'md5': 'e8818891d60e47b29cd89d7b0278156d',
+                'info_dict': {
+                    'id': '8a250ba1450996e901453d7fc8ba02f8',
+                    'ext': 'flv',
+                    'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
+                    'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+                    'uploader': 'Rick and Morty',
+                    'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+                }
+            }
+        ]
+    }
+
+    _video_extensions = {
+        '3500': 'flv',
+        '640': 'mp4',
+        '150': 'mp4',
+        'ipad': 'm3u8',
+        'iphone': 'm3u8'
+    }
+    _video_dimensions = {
+        '3500': (1280, 720),
+        '640': (480, 270),
+        '150': (320, 180)
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_path = mobj.group('path')
+
+        webpage = self._download_webpage(url, video_path)
+        episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
+        title = self._og_search_title(webpage)
+
+        index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
+        idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
+
+        episode_el = idoc.find('.//episode')
+        show_title = episode_el.attrib.get('collectionTitle')
+        episode_title = episode_el.attrib.get('title')
+        thumbnail = episode_el.attrib.get('thumbnailUrl')
+        description = episode_el.find('./description').text.strip()
+
+        entries = []
+        segment_els = episode_el.findall('./segments/segment')
+
+        for part_num, segment_el in enumerate(segment_els):
+            segment_id = segment_el.attrib.get('id')
+            segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
+            thumbnail = segment_el.attrib.get('thumbnailUrl')
+            duration = segment_el.attrib.get('duration')
+
+            segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
+            idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
+
+            formats = []
+            file_els = idoc.findall('.//files/file')
+
+            for file_el in file_els:
+                bitrate = file_el.attrib.get('bitrate')
+                type = file_el.attrib.get('type')
+                width, height = self._video_dimensions.get(bitrate, (None, None))
+                formats.append({
+                    'format_id': '%s-%s' % (bitrate, type),
+                    'url': file_el.text,
+                    'ext': self._video_extensions.get(bitrate, 'mp4'),
+                    # The bitrate may not be a number (for example: 'iphone')
+                    'tbr': int(bitrate) if bitrate.isdigit() else None,
+                    'height': height,
+                    'width': width
+                })
+
+            self._sort_formats(formats)
+
+            entries.append({
+                'id': segment_id,
+                'title': segment_title,
+                'formats': formats,
+                'uploader': show_title,
+                'thumbnail': thumbnail,
+                'duration': duration,
+                'description': description
+            })
+
+        return {
+            '_type': 'playlist',
+            'id': episode_id,
+            'display_id': video_path,
+            'entries': entries,
+            'title': '%s %s' % (show_title, episode_title),
+            'description': description,
+            'thumbnail': thumbnail
+        }
index 34f0cd49bafa104d3e3c175a4bd0ab6bf2494c1c..7bd7978841d06747145feeda56624de84747fcc1 100644 (file)
@@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor):
             'id': '19540403',
             'ext': 'mp4',
             'title': 'Planes 2 Bande-annonce VF',
-            'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
+            'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
             'thumbnail': 're:http://.*\.jpg',
         },
     }, {
@@ -42,7 +42,7 @@ class AllocineIE(InfoExtractor):
             'id': '19544709',
             'ext': 'mp4',
             'title': 'Dragons 2 - Bande annonce finale VF',
-            'description': 'md5:e74a4dc750894bac300ece46c7036490',
+            'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
             'thumbnail': 're:http://.*\.jpg',
         },
     }]
index b36a4d46a6dd435883eb911de2e3530604476c07..30a85c8c1c8d1b3a10a40ac55a577e3402cb487a 100644 (file)
@@ -7,23 +7,32 @@ from .common import InfoExtractor
 from ..utils import (
     determine_ext,
     ExtractorError,
+    qualities,
 )
 
 
 class ARDIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
 
-    _TEST = {
-        'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
-        'file': '19288786.mp4',
-        'md5': '515bf47ce209fb3f5a61b7aad364634c',
+    _TESTS = [{
+        'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+        'file': '22429276.mp4',
+        'md5': '469751912f1de0816a9fc9df8336476c',
         'info_dict': {
-            'title': 'Edward Snowden im Interview - Held oder Verräter?',
-            'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
-            'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
+            'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
+            'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
         },
         'skip': 'Blocked outside of Germany',
-    }
+    }, {
+        'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
+        'info_dict': {
+            'id': '22490580',
+            'ext': 'mp4',
+            'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
+            'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
+        },
+        'skip': 'Blocked outside of Germany',
+    }]
 
     def _real_extract(self, url):
         # determine video id from url
@@ -43,40 +52,64 @@ class ARDIE(InfoExtractor):
              r'<h4 class="headline">(.*?)</h4>'],
             webpage, 'title')
         description = self._html_search_meta(
-            'dcterms.abstract', webpage, 'description')
-        thumbnail = self._og_search_thumbnail(webpage)
-
-
-        media_info = self._download_json(
-            'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
-        # The second element of the _mediaArray contains the standard http urls
-        streams = media_info['_mediaArray'][1]['_mediaStreamArray']
-        if not streams:
-            if '"fsk"' in webpage:
-                raise ExtractorError('This video is only available after 20:00')
-
-        formats = []
-
-        for s in streams:
-            if type(s['_stream']) == list:
-                for index, url in enumerate(s['_stream'][::-1]):
-                    quality = s['_quality'] + index
-                    formats.append({
-                        'quality': quality,
-                        'url': url,
-                        'format_id': '%s-%s' % (determine_ext(url), quality)
+            'dcterms.abstract', webpage, 'description', default=None)
+        if description is None:
+            description = self._html_search_meta(
+                'description', webpage, 'meta description')
+
+        # Thumbnail is sometimes not present.
+        # It is in the mobile version, but that seems to use a different URL
+        # structure altogether.
+        thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+        media_streams = re.findall(r'''(?x)
+            mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
+            "([^"]+)"''', webpage)
+
+        if media_streams:
+            QUALITIES = qualities(['lo', 'hi', 'hq'])
+            formats = []
+            for furl in set(media_streams):
+                if furl.endswith('.f4m'):
+                    fid = 'f4m'
+                else:
+                    fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
+                    fid = fid_m.group(1) if fid_m else None
+                formats.append({
+                    'quality': QUALITIES(fid),
+                    'format_id': fid,
+                    'url': furl,
+                })
+        else:  # request JSON file
+            media_info = self._download_json(
+                'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+            # The second element of the _mediaArray contains the standard http urls
+            streams = media_info['_mediaArray'][1]['_mediaStreamArray']
+            if not streams:
+                if '"fsk"' in webpage:
+                    raise ExtractorError('This video is only available after 20:00')
+
+            formats = []
+            for s in streams:
+                if type(s['_stream']) == list:
+                    for index, url in enumerate(s['_stream'][::-1]):
+                        quality = s['_quality'] + index
+                        formats.append({
+                            'quality': quality,
+                            'url': url,
+                            'format_id': '%s-%s' % (determine_ext(url), quality)
                         })
-                continue
+                    continue
 
-            format = {
-                'quality': s['_quality'],
-                'url': s['_stream'],
-            }
+                format = {
+                    'quality': s['_quality'],
+                    'url': s['_stream'],
+                }
 
-            format['format_id'] = '%s-%s' % (
-                determine_ext(format['url']), format['quality'])
+                format['format_id'] = '%s-%s' % (
+                    determine_ext(format['url']), format['quality'])
 
-            formats.append(format)
+                formats.append(format)
 
         self._sort_formats(formats)
 
index 02d5ba52713f27412f85989749fde91970e48e36..a62395d4b727ce917f1ea946b63940b3f52b6bdd 100644 (file)
@@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor):
             'id': '85523671',
             'ext': 'mp4',
             'title': 'The Sunday Times - Icons',
-            'description': 'md5:3e1c0dc6047498d6728dcdaad0891762',
+            'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84',
             'uploader': 'Us',
             'uploader_id': 'usfilms',
             'upload_date': '20140131'
index a94f42571746f21cc70ba2527e661952caca1c6f..710d5009b71aafe0da901771048b8c0ba68def04 100644 (file)
@@ -43,7 +43,11 @@ class CNETIE(InfoExtractor):
             raise ExtractorError('Cannot find video data')
 
         video_id = vdata['id']
-        title = vdata['headline']
+        title = vdata.get('headline')
+        if title is None:
+            title = vdata.get('title')
+        if title is None:
+            raise ExtractorError('Cannot find title!')
         description = vdata.get('dek')
         thumbnail = vdata.get('image', {}).get('path')
         author = vdata.get('author')
index 8af0abade8c88fea3fa7fc4e7329e10802b43a5a..c81ce5a96f03b539d2f5e98975218fcdd0ed861d 100644 (file)
@@ -14,13 +14,13 @@ from ..utils import (
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
-        (video-clips|episodes|cc-studios|video-collections)
+    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
+        (video-clips|episodes|cc-studios|video-collections|full-episodes)
         /(?P<title>.*)'''
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
 
     _TEST = {
-        'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+        'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
         'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
         'info_dict': {
             'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
index e68657314ecde5406ec2d27fef005f899341daf1..9b36e07891524c627be2b20a1adbd5269d57930a 100644 (file)
@@ -69,6 +69,7 @@ class InfoExtractor(object):
                     * vcodec     Name of the video codec in use
                     * container  Name of the container format
                     * filesize   The number of bytes, if known in advance
+                    * filesize_approx  An estimate for the number of bytes
                     * player_url SWF Player URL (used for rtmpdump).
                     * protocol   The protocol that will be used for the actual
                                  download, lower-case.
@@ -468,7 +469,7 @@ class InfoExtractor(object):
             display_name = name
         return self._html_search_regex(
             r'''(?ix)<meta
-                    (?=[^>]+(?:itemprop|name|property)=["\']%s["\'])
+                    (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?)
                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
             html, display_name, fatal=fatal, **kwargs)
 
@@ -555,6 +556,7 @@ class InfoExtractor(object):
                 f.get('abr') if f.get('abr') is not None else -1,
                 audio_ext_preference,
                 f.get('filesize') if f.get('filesize') is not None else -1,
+                f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
                 f.get('format_id'),
             )
         formats.sort(key=_formats_key)
diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py
new file mode 100644 (file)
index 0000000..74b880f
--- /dev/null
@@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_iso8601,
+    str_to_int,
+)
+
+
+class CrackedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
+    _TEST = {
+        'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
+        'md5': '4b29a5eeec292cd5eca6388c7558db9e',
+        'info_dict': {
+            'id': '19006',
+            'ext': 'mp4',
+            'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
+            'description': 'md5:3b909e752661db86007d10e5ec2df769',
+            'timestamp': 1405659600,
+            'upload_date': '20140718',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
+
+        title = self._og_search_title(webpage)
+        description = self._og_search_description(webpage)
+
+        timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
+        if timestamp:
+            timestamp = parse_iso8601(timestamp[:-6])
+
+        view_count = str_to_int(self._html_search_regex(
+            r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
+        comment_count = str_to_int(self._html_search_regex(
+            r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
+
+        m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
+        if m:
+            width = int(m.group('width'))
+            height = int(m.group('height'))
+        else:
+            width = height = None
+
+        return {
+            'id': video_id,
+            'url':video_url,
+            'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'height': height,
+            'width': width,
+        }
\ No newline at end of file
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
new file mode 100644 (file)
index 0000000..cb8e068
--- /dev/null
@@ -0,0 +1,44 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DFBIE(InfoExtractor):
+    IE_NAME = 'tv.dfb.de'
+    _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+
+    _TEST = {
+        'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+        # The md5 is different each time
+        'info_dict': {
+            'id': '9070',
+            'ext': 'flv',
+            'title': 'Highlights des Empfangs in Berlin',
+            'upload_date': '20140716',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        player_info = self._download_xml(
+            'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
+            video_id)
+        video_info = player_info.find('video')
+
+        f4m_info = self._download_xml(video_info.find('url').text, video_id)
+        token_el = f4m_info.find('token')
+        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+
+        return {
+            'id': video_id,
+            'title': video_info.find('title').text,
+            'url': manifest_url,
+            'ext': 'flv',
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+        }
index 41208c97691aafc1c2c96ed06d4a326bca8886a6..1711f0263bb3d2ee4656890ed09dd06c3d46f928 100644 (file)
@@ -5,24 +5,26 @@ import os.path
 import re
 
 from .common import InfoExtractor
+from ..utils import compat_urllib_parse
 
 
 class DropboxIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)'
     _TEST = {
-        'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4',
-        'md5': '8ae17c51172fb7f93bdd6a214cc8c896',
+        'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4',
+        'md5': '8a3d905427a6951ccb9eb292f154530b',
         'info_dict': {
-            'id': '0qr9sai2veej4f8',
+            'id': 'nelirfsxnmcfbfh',
             'ext': 'mp4',
-            'title': 'THE_DOCTOR_GAMES'
+            'title': 'youtube-dl test video \'ä"BaW_jenozKc'
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        title = os.path.splitext(mobj.group('title'))[0]
+        fn = compat_urllib_parse.unquote(mobj.group('title'))
+        title = os.path.splitext(fn)[0]
         video_url = url + '?dl=1'
 
         return {
index d26145db1cc399e1202ef5ca41ce5a167e4bdaeb..6d73c8a4a32f83975025a0b1ed932fc291176f8a 100644 (file)
@@ -8,7 +8,6 @@ from ..utils import (
     ExtractorError,
     compat_urllib_parse,
     compat_urllib_request,
-    determine_ext,
 )
 
 
index f3e0f38b7200a70c897dd561b45a275cf42f7193..1fbe6d1759b8900160b7bc94b0a2396406acc016 100644 (file)
@@ -48,7 +48,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
 
 class FranceTvInfoIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'francetvinfo.fr'
-    _VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
+    _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
 
     _TESTS = [{
         'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -211,7 +211,7 @@ class GenerationQuoiIE(InfoExtractor):
 
 class CultureboxIE(FranceTVBaseInfoExtractor):
     IE_NAME = 'culturebox.francetvinfo.fr'
-    _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
+    _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
 
     _TEST = {
         'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
index 6e6b6666003d0837bffd6c25ddd12fe1ce892e50..721e5fce011e113bf8c413543df496fc3eeca17d 100644 (file)
@@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor):
             'id': 'e402820827',
             'ext': 'mp4',
             'title': 'Please Use This Song (Jon Lajoie)',
-            'description': 'md5:2ed27d364f5a805a6dba199faaf6681d',
+            'description': 'Please use this to sell something.  www.jonlajoie.com',
             'thumbnail': 're:^http:.*\.jpg$',
         },
     }]
index f97b59845706b8e33d4438b5cab968a2251b1ad9..9db27f9aa32730460af728f690ec131014667185 100644 (file)
@@ -402,7 +402,7 @@ class GenericIE(InfoExtractor):
             elif default_search == 'error':
                 raise ExtractorError(
                     ('%r is not a valid URL. '
-                     'Set --default-search "ytseach" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
+                     'Set --default-search "ytsearch" (or run  youtube-dl "ytsearch:%s" ) to search YouTube'
                     ) % (url, url), expected=True)
             else:
                 assert ':' in default_search
index 2c100d424650fed5d98330b1e5124df117296c75..1ea1bbab4dc31123d8c45669315226bfe2fdda68 100644 (file)
@@ -28,11 +28,13 @@ class LivestreamIE(InfoExtractor):
     }
 
     def _extract_video_info(self, video_data):
-        video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
+        video_url = (
+            video_data.get('progressive_url_hd') or
+            video_data.get('progressive_url')
+        )
         return {
             'id': compat_str(video_data['id']),
             'url': video_url,
-            'ext': 'mp4',
             'title': video_data['caption'],
             'thumbnail': video_data['thumbnail_url'],
             'upload_date': video_data['updated_at'].replace('-', '')[:8],
@@ -50,7 +52,8 @@ class LivestreamIE(InfoExtractor):
                 r'window.config = ({.*?});', webpage, 'window config')
             info = json.loads(config_json)['event']
             videos = [self._extract_video_info(video_data['data'])
-                for video_data in info['feed']['data'] if video_data['type'] == 'video']
+                for video_data in info['feed']['data']
+                if video_data['type'] == 'video']
             return self.playlist_result(videos, info['id'], info['full_name'])
         else:
             og_video = self._og_search_video_url(webpage, 'player url')
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py
new file mode 100644 (file)
index 0000000..c28be3a
--- /dev/null
@@ -0,0 +1,102 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+    find_xpath_attr,
+)
+
+
+class MLBIE(InfoExtractor):
+    _VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+    _TESTS = [
+        {
+            'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
+            'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
+            'info_dict': {
+                'id': '34496663',
+                'ext': 'mp4',
+                'title': 'Stanton prepares for Derby',
+                'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
+                'duration': 46,
+                'timestamp': 1405105800,
+                'upload_date': '20140711',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
+            'md5': '0e6e73d509321e142409b695eadd541f',
+            'info_dict': {
+                'id': '34578115',
+                'ext': 'mp4',
+                'title': 'Cespedes repeats as Derby champ',
+                'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
+                'duration': 488,
+                'timestamp': 1405399936,
+                'upload_date': '20140715',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+        {
+            'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
+            'md5': 'b8fd237347b844365d74ea61d4245967',
+            'info_dict': {
+                'id': '34577915',
+                'ext': 'mp4',
+                'title': 'Bautista on Home Run Derby',
+                'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
+                'duration': 52,
+                'timestamp': 1405390722,
+                'upload_date': '20140715',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        detail = self._download_xml(
+            'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
+            % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
+
+        title = detail.find('./headline').text
+        description = detail.find('./big-blurb').text
+        duration = parse_duration(detail.find('./duration').text)
+        timestamp = parse_iso8601(detail.attrib['date'][:-5])
+
+        thumbnail = find_xpath_attr(
+            detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text
+
+        formats = []
+        for media_url in detail.findall('./url'):
+            playback_scenario = media_url.attrib['playback_scenario']
+            fmt = {
+                'url': media_url.text,
+                'format_id': playback_scenario,
+            }
+            m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
+            if m:
+                fmt.update({
+                    'vbr': int(m.group('vbr')) * 1000,
+                    'width': int(m.group('width')),
+                    'height': int(m.group('height')),
+                })
+            formats.append(fmt)
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+            'thumbnail': thumbnail,
+        }
index fbcbe1f40c3c637c205686c09d7131c94f2771e6..12e85a716fec900cf01d72157ab4159bc69ae8f8 100644 (file)
@@ -32,7 +32,7 @@ class NPOIE(InfoExtractor):
             'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
             video_id,
             # We have to remove the javascript callback
-            transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
+            transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
         )
         token_page = self._download_webpage(
             'http://ida.omroep.nl/npoplayer/i.js',
index 4295cf93a75188844bfe0838789e2efcb4363229..d1e12dd8d5a6699ba3caa8d041c8bc039e996fc7 100644 (file)
@@ -35,9 +35,7 @@ class RedTubeIE(InfoExtractor):
             r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
             webpage, u'title')
 
-        video_thumbnail = self._html_search_regex(
-            r'playerInnerHTML.+?<img\s+src="(.+?)"',
-            webpage, u'thumbnail', fatal=False)
+        video_thumbnail = self._og_search_thumbnail(webpage)
 
         # No self-labeling, but they describe themselves as
         # "Home of Videos Porno"
index 205f8a167601f9f7c6a20ccf721439a7b070516f..dce64e1517003015722db1097ac83b106cc91136 100644 (file)
@@ -30,7 +30,7 @@ class RTBFIE(InfoExtractor):
         page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
 
         data = json.loads(self._html_search_regex(
-            r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
+            r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data']
 
         video_url = data.get('downloadUrl') or data.get('url')
 
index 77fd08ddec09c11d518fc502c509f321ffd525df..c2228b2f0f6a1fc9bba02cddcb5a1740cc85038d 100644 (file)
@@ -17,7 +17,7 @@ class RTVEALaCartaIE(InfoExtractor):
 
     _TEST = {
         'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
-        'md5': '18fcd45965bdd076efdb12cd7f6d7b9e',
+        'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
         'info_dict': {
             'id': '2491869',
             'ext': 'mp4',
diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py
new file mode 100644 (file)
index 0000000..172cc12
--- /dev/null
@@ -0,0 +1,119 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    unified_strdate,
+)
+
+
+class SapoIE(InfoExtractor):
+    IE_DESC = 'SAPO Vídeos'
+    _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
+
+    _TESTS = [
+        {
+            'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
+            'md5': '79ee523f6ecb9233ac25075dee0eda83',
+            'note': 'SD video',
+            'info_dict': {
+                'id': 'UBz95kOtiWYUMTA5Ghfi',
+                'ext': 'mp4',
+                'title': 'Benfica - Marcas na Hitória',
+                'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
+                'duration': 264,
+                'uploader': 'tiago_1988',
+                'upload_date': '20080229',
+                'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
+            },
+        },
+        {
+            'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
+            'md5': '90a2f283cfb49193fe06e861613a72aa',
+            'note': 'HD video',
+            'info_dict': {
+                'id': 'IyusNAZ791ZdoCY5H5IF',
+                'ext': 'mp4',
+                'title': 'Codebits VII - Report',
+                'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
+                'duration': 144,
+                'uploader': 'codebits',
+                'upload_date': '20140427',
+                'categories': ['codebits', 'codebits2014'],
+            },
+        },
+        {
+            'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
+            'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
+            'note': 'v2 video',
+            'info_dict': {
+                'id': 'yLqjzPtbTimsn2wWBKHz',
+                'ext': 'mp4',
+                'title': 'Hipnose Condicionativa 4',
+                'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
+                'duration': 692,
+                'uploader': 'sapozen',
+                'upload_date': '20090609',
+                'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
+            },
+        },
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        item = self._download_xml(
+            'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
+
+        title = item.find('./title').text
+        description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
+        thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
+        duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
+        uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
+        upload_date = unified_strdate(item.find('./pubDate').text)
+        view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
+        comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
+        tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
+        categories = tags.split() if tags else []
+        age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
+
+        video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
+        video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
+
+        formats = [{
+            'url': video_url,
+            'ext': 'mp4',
+            'format_id': 'sd',
+            'width': int(video_size[0]),
+            'height': int(video_size[1]),
+        }]
+
+        if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
+            formats.append({
+                'url': re.sub(r'/mov/1$', '/mov/39', video_url),
+                'ext': 'mp4',
+                'format_id': 'hd',
+                'width': 1280,
+                'height': 720,
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'uploader': uploader,
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'comment_count': comment_count,
+            'categories': categories,
+            'age_limit': age_limit,
+            'formats': formats,
+        }
index 198a08c1c9ea1032b130903cd1c013f6ed22c31e..ccd545971f2525749451c467a132511657b3d9fb 100644 (file)
@@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor):
             'upload_date': '20120816',
             'uploader': 'Howcast',
             'uploader_id': 'Howcast',
-            'description': 'md5:4f0aac94361a12e1ce57d74f85265175',
+            'description': 'md5:727900f130df3dc9a25e2721497c7910',
         },
         'params': {
             'skip_download': True
diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py
new file mode 100644 (file)
index 0000000..da3b05a
--- /dev/null
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    str_to_int,
+    parse_duration,
+)
+
+
+class SnotrIE(InfoExtractor):
+    _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)'
+    _TESTS = [{
+        'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks',
+        'info_dict': {
+            'id': '13708',
+            'ext': 'flv',
+            'title': 'Drone flying through fireworks!',
+            'duration': 247,
+            'filesize_approx': 98566144,
+            'description': 'A drone flying through Fourth of July Fireworks',
+        }
+    }, {
+        'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10',
+        'info_dict': {
+            'id': '530',
+            'ext': 'flv',
+            'title': 'David Letteman - George W. Bush Top 10',
+            'duration': 126,
+            'filesize_approx': 8912896,
+            'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!',
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._og_search_title(webpage)
+
+        description = self._og_search_description(webpage)
+        video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id
+
+        view_count = str_to_int(self._html_search_regex(
+            r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',
+            webpage, 'view count', fatal=False))
+
+        duration = parse_duration(self._html_search_regex(
+            r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>',
+            webpage, 'duration', fatal=False))
+
+        filesize_approx = float_or_none(self._html_search_regex(
+            r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>',
+            webpage, 'filesize', fatal=False), invscale=1024 * 1024)
+
+        return {
+            'id': video_id,
+            'description': description,
+            'title': title,
+            'url': video_url,
+            'view_count': view_count,
+            'duration': duration,
+            'filesize_approx': filesize_approx,
+        }
index af689e2c20411ef4e8ce1badc82a9d24f9a6da31..183dcb03cccb61a2f843d5c1b511050fc4bce75d 100644 (file)
@@ -53,7 +53,7 @@ class SteamIE(InfoExtractor):
             'ext': 'mp4',
             'upload_date': '20140329',
             'title': 'FRONTIERS - Final Greenlight Trailer',
-            'description': 'md5:6df4fe8dd494ae811869672b0767e025',
+            'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
             'uploader': 'AAD Productions',
             'uploader_id': 'AtomicAgeDogGames',
         }
index 25b9864add9dc8422a5948111d25ea8243e10441..b870474515ba61ee33641c86554d53d68a6bf46d 100644 (file)
@@ -19,16 +19,6 @@ class TagesschauIE(InfoExtractor):
             'description': 'md5:69da3c61275b426426d711bde96463ab',
             'thumbnail': 're:^http:.*\.jpg$',
         },
-    }, {
-        'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html',
-        'md5': '66652566900963a3f962333579eeffcf',
-        'info_dict': {
-            'id': '5964',
-            'ext': 'mp4',
-            'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland',
-            'description': 'md5:07bfc78c48eec3145ed4805299a1900a',
-            'thumbnail': 're:http://.*\.jpg',
-        },
     }]
 
     _FORMATS = {
index 2c2113b1404fb3631126636bdb38ba839008a404..46d727d1de6743edcb99109b77caa49ebc1bf0c6 100644 (file)
@@ -62,7 +62,7 @@ class TeacherTubeIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        title = self._html_search_meta('title', webpage, 'title')
+        title = self._html_search_meta('title', webpage, 'title', fatal=True)
         TITLE_SUFFIX = ' - TeacherTube'
         if title.endswith(TITLE_SUFFIX):
             title = title[:-len(TITLE_SUFFIX)].strip()
@@ -101,7 +101,11 @@ class TeacherTubeUserIE(InfoExtractor):
 
     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?'
 
-    _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">'
+    _MEDIA_RE = r'''(?sx)
+        class="?sidebar_thumb_time"?>[0-9:]+</div>
+        \s*
+        <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
+    '''
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -111,14 +115,12 @@ class TeacherTubeUserIE(InfoExtractor):
         webpage = self._download_webpage(url, user_id)
         urls.extend(re.findall(self._MEDIA_RE, webpage))
         
-        pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1]
+        pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
         for p in pages:
             more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
-            webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1))
-            urls.extend(re.findall(self._MEDIA_RE, webpage))
-
-        entries = []
-        for url in urls:
-            entries.append(self.url_result(url, 'TeacherTube'))
+            webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
+            video_urls = re.findall(self._MEDIA_RE, webpage)
+            urls.extend(video_urls)
 
+        entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
         return self.playlist_result(entries, user_id)
index 8477840fc65ad377f96219033fc656bb6676f52d..81ba169fbec68c9bd7fea395c8bb135d73b3e828 100644 (file)
@@ -1,8 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 
 
index 255855558cf64ddfe847db56e00e029f4bbbdf22..a3c6e83b01194d37b683912d131c93664dbf0680 100644 (file)
@@ -98,7 +98,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
             'info_dict': {
                 'id': '54469442',
                 'ext': 'mp4',
-                'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software',
+                'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
                 'uploader': 'The BLN & Business of Software',
                 'uploader_id': 'theblnbusinessofsoftware',
                 'duration': 3610,
index f741ba54007737e132f327178be283a478b6527f..ab28ef6fe4d3fb16413e72718b80115e9e96ad85 100644 (file)
@@ -55,14 +55,14 @@ class WDRIE(InfoExtractor):
             },
         },
         {
-            'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html',
-            'md5': '24e83813e832badb0a8d7d1ef9ef0691',
+            'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html',
+            'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa',
             'info_dict': {
-                'id': 'mdb-463528',
+                'id': 'mdb-478135',
                 'ext': 'mp3',
-                'title': 'Süpersong: Soul Bossa Nova',
+                'title': 'Flavia Coelho: Amar é Amar',
                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
-                'upload_date': '20140630',
+                'upload_date': '20140717',
             },
         },
     ]
index 6123e12564b7934032ed619b672b6277a75bace0..072e711c2e0105c3a69908cff70bf3758542587d 100644 (file)
@@ -1,19 +1,17 @@
 # coding: utf-8
 
-import collections
 import errno
 import io
 import itertools
 import json
 import os.path
 import re
-import struct
 import traceback
-import zlib
 
 from .common import InfoExtractor, SearchInfoExtractor
 from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
 from ..utils import (
     compat_chr,
     compat_parse_qs,
@@ -347,8 +345,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         self.to_screen(u'RTMP download detected')
 
     def _extract_signature_function(self, video_id, player_url, slen):
-        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
-                        player_url)
+        id_m = re.match(
+            r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
+            player_url)
         player_type = id_m.group('ext')
         player_id = id_m.group('id')
 
@@ -449,417 +448,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         return lambda s: initial_function([s])
 
     def _parse_sig_swf(self, file_contents):
-        if file_contents[1:3] != b'WS':
-            raise ExtractorError(
-                u'Not an SWF file; header is %r' % file_contents[:3])
-        if file_contents[:1] == b'C':
-            content = zlib.decompress(file_contents[8:])
-        else:
-            raise NotImplementedError(u'Unsupported compression format %r' %
-                                      file_contents[:1])
-
-        def extract_tags(content):
-            pos = 0
-            while pos < len(content):
-                header16 = struct.unpack('<H', content[pos:pos+2])[0]
-                pos += 2
-                tag_code = header16 >> 6
-                tag_len = header16 & 0x3f
-                if tag_len == 0x3f:
-                    tag_len = struct.unpack('<I', content[pos:pos+4])[0]
-                    pos += 4
-                assert pos+tag_len <= len(content)
-                yield (tag_code, content[pos:pos+tag_len])
-                pos += tag_len
-
-        code_tag = next(tag
-                        for tag_code, tag in extract_tags(content)
-                        if tag_code == 82)
-        p = code_tag.index(b'\0', 4) + 1
-        code_reader = io.BytesIO(code_tag[p:])
-
-        # Parse ABC (AVM2 ByteCode)
-        def read_int(reader=None):
-            if reader is None:
-                reader = code_reader
-            res = 0
-            shift = 0
-            for _ in range(5):
-                buf = reader.read(1)
-                assert len(buf) == 1
-                b = struct.unpack('<B', buf)[0]
-                res = res | ((b & 0x7f) << shift)
-                if b & 0x80 == 0:
-                    break
-                shift += 7
-            return res
-
-        def u30(reader=None):
-            res = read_int(reader)
-            assert res & 0xf0000000 == 0
-            return res
-        u32 = read_int
-
-        def s32(reader=None):
-            v = read_int(reader)
-            if v & 0x80000000 != 0:
-                v = - ((v ^ 0xffffffff) + 1)
-            return v
-
-        def read_string(reader=None):
-            if reader is None:
-                reader = code_reader
-            slen = u30(reader)
-            resb = reader.read(slen)
-            assert len(resb) == slen
-            return resb.decode('utf-8')
-
-        def read_bytes(count, reader=None):
-            if reader is None:
-                reader = code_reader
-            resb = reader.read(count)
-            assert len(resb) == count
-            return resb
-
-        def read_byte(reader=None):
-            resb = read_bytes(1, reader=reader)
-            res = struct.unpack('<B', resb)[0]
-            return res
-
-        # minor_version + major_version
-        read_bytes(2 + 2)
-
-        # Constant pool
-        int_count = u30()
-        for _c in range(1, int_count):
-            s32()
-        uint_count = u30()
-        for _c in range(1, uint_count):
-            u32()
-        double_count = u30()
-        read_bytes((double_count-1) * 8)
-        string_count = u30()
-        constant_strings = [u'']
-        for _c in range(1, string_count):
-            s = read_string()
-            constant_strings.append(s)
-        namespace_count = u30()
-        for _c in range(1, namespace_count):
-            read_bytes(1)  # kind
-            u30()  # name
-        ns_set_count = u30()
-        for _c in range(1, ns_set_count):
-            count = u30()
-            for _c2 in range(count):
-                u30()
-        multiname_count = u30()
-        MULTINAME_SIZES = {
-            0x07: 2,  # QName
-            0x0d: 2,  # QNameA
-            0x0f: 1,  # RTQName
-            0x10: 1,  # RTQNameA
-            0x11: 0,  # RTQNameL
-            0x12: 0,  # RTQNameLA
-            0x09: 2,  # Multiname
-            0x0e: 2,  # MultinameA
-            0x1b: 1,  # MultinameL
-            0x1c: 1,  # MultinameLA
-        }
-        multinames = [u'']
-        for _c in range(1, multiname_count):
-            kind = u30()
-            assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
-            if kind == 0x07:
-                u30()  # namespace_idx
-                name_idx = u30()
-                multinames.append(constant_strings[name_idx])
-            else:
-                multinames.append('[MULTINAME kind: %d]' % kind)
-                for _c2 in range(MULTINAME_SIZES[kind]):
-                    u30()
-
-        # Methods
-        method_count = u30()
-        MethodInfo = collections.namedtuple(
-            'MethodInfo',
-            ['NEED_ARGUMENTS', 'NEED_REST'])
-        method_infos = []
-        for method_id in range(method_count):
-            param_count = u30()
-            u30()  # return type
-            for _ in range(param_count):
-                u30()  # param type
-            u30()  # name index (always 0 for youtube)
-            flags = read_byte()
-            if flags & 0x08 != 0:
-                # Options present
-                option_count = u30()
-                for c in range(option_count):
-                    u30()  # val
-                    read_bytes(1)  # kind
-            if flags & 0x80 != 0:
-                # Param names present
-                for _ in range(param_count):
-                    u30()  # param name
-            mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
-            method_infos.append(mi)
-
-        # Metadata
-        metadata_count = u30()
-        for _c in range(metadata_count):
-            u30()  # name
-            item_count = u30()
-            for _c2 in range(item_count):
-                u30()  # key
-                u30()  # value
-
-        def parse_traits_info():
-            trait_name_idx = u30()
-            kind_full = read_byte()
-            kind = kind_full & 0x0f
-            attrs = kind_full >> 4
-            methods = {}
-            if kind in [0x00, 0x06]:  # Slot or Const
-                u30()  # Slot id
-                u30()  # type_name_idx
-                vindex = u30()
-                if vindex != 0:
-                    read_byte()  # vkind
-            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
-                u30()  # disp_id
-                method_idx = u30()
-                methods[multinames[trait_name_idx]] = method_idx
-            elif kind == 0x04:  # Class
-                u30()  # slot_id
-                u30()  # classi
-            elif kind == 0x05:  # Function
-                u30()  # slot_id
-                function_idx = u30()
-                methods[function_idx] = multinames[trait_name_idx]
-            else:
-                raise ExtractorError(u'Unsupported trait kind %d' % kind)
-
-            if attrs & 0x4 != 0:  # Metadata present
-                metadata_count = u30()
-                for _c3 in range(metadata_count):
-                    u30()  # metadata index
-
-            return methods
-
-        # Classes
+        swfi = SWFInterpreter(file_contents)
         TARGET_CLASSNAME = u'SignatureDecipher'
-        searched_idx = multinames.index(TARGET_CLASSNAME)
-        searched_class_id = None
-        class_count = u30()
-        for class_id in range(class_count):
-            name_idx = u30()
-            if name_idx == searched_idx:
-                # We found the class we're looking for!
-                searched_class_id = class_id
-            u30()  # super_name idx
-            flags = read_byte()
-            if flags & 0x08 != 0:  # Protected namespace is present
-                u30()  # protected_ns_idx
-            intrf_count = u30()
-            for _c2 in range(intrf_count):
-                u30()
-            u30()  # iinit
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        if searched_class_id is None:
-            raise ExtractorError(u'Target class %r not found' %
-                                 TARGET_CLASSNAME)
-
-        method_names = {}
-        method_idxs = {}
-        for class_id in range(class_count):
-            u30()  # cinit
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                trait_methods = parse_traits_info()
-                if class_id == searched_class_id:
-                    method_names.update(trait_methods.items())
-                    method_idxs.update(dict(
-                        (idx, name)
-                        for name, idx in trait_methods.items()))
-
-        # Scripts
-        script_count = u30()
-        for _c in range(script_count):
-            u30()  # init
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        # Method bodies
-        method_body_count = u30()
-        Method = collections.namedtuple('Method', ['code', 'local_count'])
-        methods = {}
-        for _c in range(method_body_count):
-            method_idx = u30()
-            u30()  # max_stack
-            local_count = u30()
-            u30()  # init_scope_depth
-            u30()  # max_scope_depth
-            code_length = u30()
-            code = read_bytes(code_length)
-            if method_idx in method_idxs:
-                m = Method(code, local_count)
-                methods[method_idxs[method_idx]] = m
-            exception_count = u30()
-            for _c2 in range(exception_count):
-                u30()  # from
-                u30()  # to
-                u30()  # target
-                u30()  # exc_type
-                u30()  # var_name
-            trait_count = u30()
-            for _c2 in range(trait_count):
-                parse_traits_info()
-
-        assert p + code_reader.tell() == len(code_tag)
-        assert len(methods) == len(method_idxs)
-
-        method_pyfunctions = {}
-
-        def extract_function(func_name):
-            if func_name in method_pyfunctions:
-                return method_pyfunctions[func_name]
-            if func_name not in methods:
-                raise ExtractorError(u'Cannot find function %r' % func_name)
-            m = methods[func_name]
-
-            def resfunc(args):
-                registers = ['(this)'] + list(args) + [None] * m.local_count
-                stack = []
-                coder = io.BytesIO(m.code)
-                while True:
-                    opcode = struct.unpack('!B', coder.read(1))[0]
-                    if opcode == 36:  # pushbyte
-                        v = struct.unpack('!B', coder.read(1))[0]
-                        stack.append(v)
-                    elif opcode == 44:  # pushstring
-                        idx = u30(coder)
-                        stack.append(constant_strings[idx])
-                    elif opcode == 48:  # pushscope
-                        # We don't implement the scope register, so we'll just
-                        # ignore the popped value
-                        stack.pop()
-                    elif opcode == 70:  # callproperty
-                        index = u30(coder)
-                        mname = multinames[index]
-                        arg_count = u30(coder)
-                        args = list(reversed(
-                            [stack.pop() for _ in range(arg_count)]))
-                        obj = stack.pop()
-                        if mname == u'split':
-                            assert len(args) == 1
-                            assert isinstance(args[0], compat_str)
-                            assert isinstance(obj, compat_str)
-                            if args[0] == u'':
-                                res = list(obj)
-                            else:
-                                res = obj.split(args[0])
-                            stack.append(res)
-                        elif mname == u'slice':
-                            assert len(args) == 1
-                            assert isinstance(args[0], int)
-                            assert isinstance(obj, list)
-                            res = obj[args[0]:]
-                            stack.append(res)
-                        elif mname == u'join':
-                            assert len(args) == 1
-                            assert isinstance(args[0], compat_str)
-                            assert isinstance(obj, list)
-                            res = args[0].join(obj)
-                            stack.append(res)
-                        elif mname in method_pyfunctions:
-                            stack.append(method_pyfunctions[mname](args))
-                        else:
-                            raise NotImplementedError(
-                                u'Unsupported property %r on %r'
-                                % (mname, obj))
-                    elif opcode == 72:  # returnvalue
-                        res = stack.pop()
-                        return res
-                    elif opcode == 79:  # callpropvoid
-                        index = u30(coder)
-                        mname = multinames[index]
-                        arg_count = u30(coder)
-                        args = list(reversed(
-                            [stack.pop() for _ in range(arg_count)]))
-                        obj = stack.pop()
-                        if mname == u'reverse':
-                            assert isinstance(obj, list)
-                            obj.reverse()
-                        else:
-                            raise NotImplementedError(
-                                u'Unsupported (void) property %r on %r'
-                                % (mname, obj))
-                    elif opcode == 93:  # findpropstrict
-                        index = u30(coder)
-                        mname = multinames[index]
-                        res = extract_function(mname)
-                        stack.append(res)
-                    elif opcode == 97:  # setproperty
-                        index = u30(coder)
-                        value = stack.pop()
-                        idx = stack.pop()
-                        obj = stack.pop()
-                        assert isinstance(obj, list)
-                        assert isinstance(idx, int)
-                        obj[idx] = value
-                    elif opcode == 98:  # getlocal
-                        index = u30(coder)
-                        stack.append(registers[index])
-                    elif opcode == 99:  # setlocal
-                        index = u30(coder)
-                        value = stack.pop()
-                        registers[index] = value
-                    elif opcode == 102:  # getproperty
-                        index = u30(coder)
-                        pname = multinames[index]
-                        if pname == u'length':
-                            obj = stack.pop()
-                            assert isinstance(obj, list)
-                            stack.append(len(obj))
-                        else:  # Assume attribute access
-                            idx = stack.pop()
-                            assert isinstance(idx, int)
-                            obj = stack.pop()
-                            assert isinstance(obj, list)
-                            stack.append(obj[idx])
-                    elif opcode == 128:  # coerce
-                        u30(coder)
-                    elif opcode == 133:  # coerce_s
-                        assert isinstance(stack[-1], (type(None), compat_str))
-                    elif opcode == 164:  # modulo
-                        value2 = stack.pop()
-                        value1 = stack.pop()
-                        res = value1 % value2
-                        stack.append(res)
-                    elif opcode == 208:  # getlocal_0
-                        stack.append(registers[0])
-                    elif opcode == 209:  # getlocal_1
-                        stack.append(registers[1])
-                    elif opcode == 210:  # getlocal_2
-                        stack.append(registers[2])
-                    elif opcode == 211:  # getlocal_3
-                        stack.append(registers[3])
-                    elif opcode == 214:  # setlocal_2
-                        registers[2] = stack.pop()
-                    elif opcode == 215:  # setlocal_3
-                        registers[3] = stack.pop()
-                    else:
-                        raise NotImplementedError(
-                            u'Unsupported opcode %d' % opcode)
-
-            method_pyfunctions[func_name] = resfunc
-            return resfunc
-
-        initial_function = extract_function(u'decipher')
+        searched_class = swfi.extract_class(TARGET_CLASSNAME)
+        initial_function = swfi.extract_function(searched_class, u'decipher')
         return lambda s: initial_function([s])
 
     def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
@@ -1014,14 +606,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             age_gate = True
             # We simulate the access to the video from www.youtube.com/v/{video_id}
             # this can be viewed without login into Youtube
-            data = compat_urllib_parse.urlencode({'video_id': video_id,
-                                                  'el': 'player_embedded',
-                                                  'gl': 'US',
-                                                  'hl': 'en',
-                                                  'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                                                  'asv': 3,
-                                                  'sts':'1588',
-                                                  })
+            data = compat_urllib_parse.urlencode({
+                'video_id': video_id,
+                'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+                'sts':'16268',
+            })
             video_info_url = proto + '://www.youtube.com/get_video_info?' + data
             video_info_webpage = self._download_webpage(video_info_url, video_id,
                                     note=False,
@@ -1220,31 +809,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                         url += '&signature=' + url_data['sig'][0]
                     elif 's' in url_data:
                         encrypted_sig = url_data['s'][0]
+
+                        if not age_gate:
+                            jsplayer_url_json = self._search_regex(
+                                r'"assets":.+?"js":\s*("[^"]+")',
+                                video_webpage, u'JS player URL')
+                            player_url = json.loads(jsplayer_url_json)
+                        if player_url is None:
+                            player_url_json = self._search_regex(
+                                r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+                                video_webpage, u'age gate player URL')
+                            player_url = json.loads(player_url_json)
+
                         if self._downloader.params.get('verbose'):
-                            if age_gate:
-                                if player_url is None:
-                                    player_version = 'unknown'
-                                else:
+                            if player_url is None:
+                                player_version = 'unknown'
+                                player_desc = 'unknown'
+                            else:
+                                if player_url.endswith('swf'):
                                     player_version = self._search_regex(
-                                        r'-(.+)\.swf$', player_url,
+                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
                                         u'flash player', fatal=False)
-                                player_desc = 'flash player %s' % player_version
-                            else:
-                                player_version = self._search_regex(
-                                    r'html5player-(.+?)\.js', video_webpage,
-                                    'html5 player', fatal=False)
-                                player_desc = u'html5 player %s' % player_version
+                                    player_desc = 'flash player %s' % player_version
+                                else:
+                                    player_version = self._search_regex(
+                                        r'html5player-(.+?)\.js', video_webpage,
+                                        'html5 player', fatal=False)
+                                    player_desc = u'html5 player %s' % player_version
 
                             parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
                             self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
                                 (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
 
-                        if not age_gate:
-                            jsplayer_url_json = self._search_regex(
-                                r'"assets":.+?"js":\s*("[^"]+")',
-                                video_webpage, u'JS player URL')
-                            player_url = json.loads(jsplayer_url_json)
-
                         signature = self._decrypt_signature(
                             encrypted_sig, video_id, player_url, age_gate)
                         url += '&signature=' + signature
diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py
new file mode 100644 (file)
index 0000000..b63c65b
--- /dev/null
@@ -0,0 +1,609 @@
+from __future__ import unicode_literals
+
+import collections
+import io
+import zlib
+
+from .utils import (
+    compat_str,
+    ExtractorError,
+    struct_unpack,
+)
+
+
+def _extract_tags(file_contents):
+    if file_contents[1:3] != b'WS':
+        raise ExtractorError(
+            'Not an SWF file; header is %r' % file_contents[:3])
+    if file_contents[:1] == b'C':
+        content = zlib.decompress(file_contents[8:])
+    else:
+        raise NotImplementedError(
+            'Unsupported compression format %r' %
+            file_contents[:1])
+
+    # Determine number of bits in framesize rectangle
+    framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3
+    framesize_len = (5 + 4 * framesize_nbits + 7) // 8
+
+    pos = framesize_len + 2 + 2
+    while pos < len(content):
+        header16 = struct_unpack('<H', content[pos:pos + 2])[0]
+        pos += 2
+        tag_code = header16 >> 6
+        tag_len = header16 & 0x3f
+        if tag_len == 0x3f:
+            tag_len = struct_unpack('<I', content[pos:pos + 4])[0]
+            pos += 4
+        assert pos + tag_len <= len(content), \
+            ('Tag %d ends at %d+%d - that\'s longer than the file (%d)'
+                % (tag_code, pos, tag_len, len(content)))
+        yield (tag_code, content[pos:pos + tag_len])
+        pos += tag_len
+
+
+class _AVMClass_Object(object):
+    def __init__(self, avm_class):
+        self.avm_class = avm_class
+
+    def __repr__(self):
+        return '%s#%x' % (self.avm_class.name, id(self))
+
+
+class _ScopeDict(dict):
+    def __init__(self, avm_class):
+        super(_ScopeDict, self).__init__()
+        self.avm_class = avm_class
+
+    def __repr__(self):
+        return '%s__Scope(%s)' % (
+            self.avm_class.name,
+            super(_ScopeDict, self).__repr__())
+
+
+class _AVMClass(object):
+    def __init__(self, name_idx, name):
+        self.name_idx = name_idx
+        self.name = name
+        self.method_names = {}
+        self.method_idxs = {}
+        self.methods = {}
+        self.method_pyfunctions = {}
+
+        self.variables = _ScopeDict(self)
+
+    def make_object(self):
+        return _AVMClass_Object(self)
+
+    def __repr__(self):
+        return '_AVMClass(%s)' % (self.name)
+
+    def register_methods(self, methods):
+        self.method_names.update(methods.items())
+        self.method_idxs.update(dict(
+            (idx, name)
+            for name, idx in methods.items()))
+
+
+class _Multiname(object):
+    def __init__(self, kind):
+        self.kind = kind
+
+    def __repr__(self):
+        return '[MULTINAME kind: 0x%x]' % self.kind
+
+
+def _read_int(reader):
+    res = 0
+    shift = 0
+    for _ in range(5):
+        buf = reader.read(1)
+        assert len(buf) == 1
+        b = struct_unpack('<B', buf)[0]
+        res = res | ((b & 0x7f) << shift)
+        if b & 0x80 == 0:
+            break
+        shift += 7
+    return res
+
+
+def _u30(reader):
+    res = _read_int(reader)
+    assert res & 0xf0000000 == 0
+    return res
+_u32 = _read_int
+
+
+def _s32(reader):
+    v = _read_int(reader)
+    if v & 0x80000000 != 0:
+        v = - ((v ^ 0xffffffff) + 1)
+    return v
+
+
+def _s24(reader):
+    bs = reader.read(3)
+    assert len(bs) == 3
+    last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00'
+    return struct_unpack('<i', bs + last_byte)[0]
+
+
+def _read_string(reader):
+    slen = _u30(reader)
+    resb = reader.read(slen)
+    assert len(resb) == slen
+    return resb.decode('utf-8')
+
+
+def _read_bytes(count, reader):
+    assert count >= 0
+    resb = reader.read(count)
+    assert len(resb) == count
+    return resb
+
+
+def _read_byte(reader):
+    resb = _read_bytes(1, reader=reader)
+    res = struct_unpack('<B', resb)[0]
+    return res
+
+
+class SWFInterpreter(object):
+    def __init__(self, file_contents):
+        code_tag = next(tag
+                        for tag_code, tag in _extract_tags(file_contents)
+                        if tag_code == 82)
+        p = code_tag.index(b'\0', 4) + 1
+        code_reader = io.BytesIO(code_tag[p:])
+
+        # Parse ABC (AVM2 ByteCode)
+
+        # Define a couple convenience methods
+        u30 = lambda *args: _u30(*args, reader=code_reader)
+        s32 = lambda *args: _s32(*args, reader=code_reader)
+        u32 = lambda *args: _u32(*args, reader=code_reader)
+        read_bytes = lambda *args: _read_bytes(*args, reader=code_reader)
+        read_byte = lambda *args: _read_byte(*args, reader=code_reader)
+
+        # minor_version + major_version
+        read_bytes(2 + 2)
+
+        # Constant pool
+        int_count = u30()
+        for _c in range(1, int_count):
+            s32()
+        uint_count = u30()
+        for _c in range(1, uint_count):
+            u32()
+        double_count = u30()
+        read_bytes(max(0, (double_count - 1)) * 8)
+        string_count = u30()
+        self.constant_strings = ['']
+        for _c in range(1, string_count):
+            s = _read_string(code_reader)
+            self.constant_strings.append(s)
+        namespace_count = u30()
+        for _c in range(1, namespace_count):
+            read_bytes(1)  # kind
+            u30()  # name
+        ns_set_count = u30()
+        for _c in range(1, ns_set_count):
+            count = u30()
+            for _c2 in range(count):
+                u30()
+        multiname_count = u30()
+        MULTINAME_SIZES = {
+            0x07: 2,  # QName
+            0x0d: 2,  # QNameA
+            0x0f: 1,  # RTQName
+            0x10: 1,  # RTQNameA
+            0x11: 0,  # RTQNameL
+            0x12: 0,  # RTQNameLA
+            0x09: 2,  # Multiname
+            0x0e: 2,  # MultinameA
+            0x1b: 1,  # MultinameL
+            0x1c: 1,  # MultinameLA
+        }
+        self.multinames = ['']
+        for _c in range(1, multiname_count):
+            kind = u30()
+            assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind
+            if kind == 0x07:
+                u30()  # namespace_idx
+                name_idx = u30()
+                self.multinames.append(self.constant_strings[name_idx])
+            else:
+                self.multinames.append(_Multiname(kind))
+                for _c2 in range(MULTINAME_SIZES[kind]):
+                    u30()
+
+        # Methods
+        method_count = u30()
+        MethodInfo = collections.namedtuple(
+            'MethodInfo',
+            ['NEED_ARGUMENTS', 'NEED_REST'])
+        method_infos = []
+        for method_id in range(method_count):
+            param_count = u30()
+            u30()  # return type
+            for _ in range(param_count):
+                u30()  # param type
+            u30()  # name index (always 0 for youtube)
+            flags = read_byte()
+            if flags & 0x08 != 0:
+                # Options present
+                option_count = u30()
+                for c in range(option_count):
+                    u30()  # val
+                    read_bytes(1)  # kind
+            if flags & 0x80 != 0:
+                # Param names present
+                for _ in range(param_count):
+                    u30()  # param name
+            mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
+            method_infos.append(mi)
+
+        # Metadata
+        metadata_count = u30()
+        for _c in range(metadata_count):
+            u30()  # name
+            item_count = u30()
+            for _c2 in range(item_count):
+                u30()  # key
+                u30()  # value
+
+        def parse_traits_info():
+            trait_name_idx = u30()
+            kind_full = read_byte()
+            kind = kind_full & 0x0f
+            attrs = kind_full >> 4
+            methods = {}
+            if kind in [0x00, 0x06]:  # Slot or Const
+                u30()  # Slot id
+                u30()  # type_name_idx
+                vindex = u30()
+                if vindex != 0:
+                    read_byte()  # vkind
+            elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter
+                u30()  # disp_id
+                method_idx = u30()
+                methods[self.multinames[trait_name_idx]] = method_idx
+            elif kind == 0x04:  # Class
+                u30()  # slot_id
+                u30()  # classi
+            elif kind == 0x05:  # Function
+                u30()  # slot_id
+                function_idx = u30()
+                methods[function_idx] = self.multinames[trait_name_idx]
+            else:
+                raise ExtractorError('Unsupported trait kind %d' % kind)
+
+            if attrs & 0x4 != 0:  # Metadata present
+                metadata_count = u30()
+                for _c3 in range(metadata_count):
+                    u30()  # metadata index
+
+            return methods
+
+        # Classes
+        class_count = u30()
+        classes = []
+        for class_id in range(class_count):
+            name_idx = u30()
+
+            cname = self.multinames[name_idx]
+            avm_class = _AVMClass(name_idx, cname)
+            classes.append(avm_class)
+
+            u30()  # super_name idx
+            flags = read_byte()
+            if flags & 0x08 != 0:  # Protected namespace is present
+                u30()  # protected_ns_idx
+            intrf_count = u30()
+            for _c2 in range(intrf_count):
+                u30()
+            u30()  # iinit
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                trait_methods = parse_traits_info()
+                avm_class.register_methods(trait_methods)
+
+        assert len(classes) == class_count
+        self._classes_by_name = dict((c.name, c) for c in classes)
+
+        for avm_class in classes:
+            u30()  # cinit
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                trait_methods = parse_traits_info()
+                avm_class.register_methods(trait_methods)
+
+        # Scripts
+        script_count = u30()
+        for _c in range(script_count):
+            u30()  # init
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        # Method bodies
+        method_body_count = u30()
+        Method = collections.namedtuple('Method', ['code', 'local_count'])
+        for _c in range(method_body_count):
+            method_idx = u30()
+            u30()  # max_stack
+            local_count = u30()
+            u30()  # init_scope_depth
+            u30()  # max_scope_depth
+            code_length = u30()
+            code = read_bytes(code_length)
+            for avm_class in classes:
+                if method_idx in avm_class.method_idxs:
+                    m = Method(code, local_count)
+                    avm_class.methods[avm_class.method_idxs[method_idx]] = m
+            exception_count = u30()
+            for _c2 in range(exception_count):
+                u30()  # from
+                u30()  # to
+                u30()  # target
+                u30()  # exc_type
+                u30()  # var_name
+            trait_count = u30()
+            for _c2 in range(trait_count):
+                parse_traits_info()
+
+        assert p + code_reader.tell() == len(code_tag)
+
+    def extract_class(self, class_name):
+        try:
+            return self._classes_by_name[class_name]
+        except KeyError:
+            raise ExtractorError('Class %r not found' % class_name)
+
+    def extract_function(self, avm_class, func_name):
+        if func_name in avm_class.method_pyfunctions:
+            return avm_class.method_pyfunctions[func_name]
+        if func_name in self._classes_by_name:
+            return self._classes_by_name[func_name].make_object()
+        if func_name not in avm_class.methods:
+            raise ExtractorError('Cannot find function %s.%s' % (
+                avm_class.name, func_name))
+        m = avm_class.methods[func_name]
+
+        def resfunc(args):
+            # Helper functions
+            coder = io.BytesIO(m.code)
+            s24 = lambda: _s24(coder)
+            u30 = lambda: _u30(coder)
+
+            registers = [avm_class.variables] + list(args) + [None] * m.local_count
+            stack = []
+            scopes = collections.deque([
+                self._classes_by_name, avm_class.variables])
+            while True:
+                opcode = _read_byte(coder)
+                if opcode == 17:  # iftrue
+                    offset = s24()
+                    value = stack.pop()
+                    if value:
+                        coder.seek(coder.tell() + offset)
+                elif opcode == 18:  # iffalse
+                    offset = s24()
+                    value = stack.pop()
+                    if not value:
+                        coder.seek(coder.tell() + offset)
+                elif opcode == 36:  # pushbyte
+                    v = _read_byte(coder)
+                    stack.append(v)
+                elif opcode == 42:  # dup
+                    value = stack[-1]
+                    stack.append(value)
+                elif opcode == 44:  # pushstring
+                    idx = u30()
+                    stack.append(self.constant_strings[idx])
+                elif opcode == 48:  # pushscope
+                    new_scope = stack.pop()
+                    scopes.append(new_scope)
+                elif opcode == 66:  # construct
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+                    res = obj.avm_class.make_object()
+                    stack.append(res)
+                elif opcode == 70:  # callproperty
+                    index = u30()
+                    mname = self.multinames[index]
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+
+                    if isinstance(obj, _AVMClass_Object):
+                        func = self.extract_function(obj.avm_class, mname)
+                        res = func(args)
+                        stack.append(res)
+                        continue
+                    elif isinstance(obj, _ScopeDict):
+                        if mname in obj.avm_class.method_names:
+                            func = self.extract_function(obj.avm_class, mname)
+                            res = func(args)
+                        else:
+                            res = obj[mname]
+                        stack.append(res)
+                        continue
+                    elif isinstance(obj, compat_str):
+                        if mname == 'split':
+                            assert len(args) == 1
+                            assert isinstance(args[0], compat_str)
+                            if args[0] == '':
+                                res = list(obj)
+                            else:
+                                res = obj.split(args[0])
+                            stack.append(res)
+                            continue
+                    elif isinstance(obj, list):
+                        if mname == 'slice':
+                            assert len(args) == 1
+                            assert isinstance(args[0], int)
+                            res = obj[args[0]:]
+                            stack.append(res)
+                            continue
+                        elif mname == 'join':
+                            assert len(args) == 1
+                            assert isinstance(args[0], compat_str)
+                            res = args[0].join(obj)
+                            stack.append(res)
+                            continue
+                    raise NotImplementedError(
+                        'Unsupported property %r on %r'
+                        % (mname, obj))
+                elif opcode == 72:  # returnvalue
+                    res = stack.pop()
+                    return res
+                elif opcode == 74:  # constructproperty
+                    index = u30()
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+
+                    mname = self.multinames[index]
+                    assert isinstance(obj, _AVMClass)
+
+                    # We do not actually call the constructor for now;
+                    # we just pretend it does nothing
+                    stack.append(obj.make_object())
+                elif opcode == 79:  # callpropvoid
+                    index = u30()
+                    mname = self.multinames[index]
+                    arg_count = u30()
+                    args = list(reversed(
+                        [stack.pop() for _ in range(arg_count)]))
+                    obj = stack.pop()
+                    if mname == 'reverse':
+                        assert isinstance(obj, list)
+                        obj.reverse()
+                    else:
+                        raise NotImplementedError(
+                            'Unsupported (void) property %r on %r'
+                            % (mname, obj))
+                elif opcode == 86:  # newarray
+                    arg_count = u30()
+                    arr = []
+                    for i in range(arg_count):
+                        arr.append(stack.pop())
+                    arr = arr[::-1]
+                    stack.append(arr)
+                elif opcode == 93:  # findpropstrict
+                    index = u30()
+                    mname = self.multinames[index]
+                    for s in reversed(scopes):
+                        if mname in s:
+                            res = s
+                            break
+                    else:
+                        res = scopes[0]
+                    stack.append(res[mname])
+                elif opcode == 94:  # findproperty
+                    index = u30()
+                    mname = self.multinames[index]
+                    for s in reversed(scopes):
+                        if mname in s:
+                            res = s
+                            break
+                    else:
+                        res = avm_class.variables
+                    stack.append(res)
+                elif opcode == 96:  # getlex
+                    index = u30()
+                    mname = self.multinames[index]
+                    for s in reversed(scopes):
+                        if mname in s:
+                            scope = s
+                            break
+                    else:
+                        scope = avm_class.variables
+                    # I cannot find where static variables are initialized
+                    # so let's just return None
+                    res = scope.get(mname)
+                    stack.append(res)
+                elif opcode == 97:  # setproperty
+                    index = u30()
+                    value = stack.pop()
+                    idx = self.multinames[index]
+                    if isinstance(idx, _Multiname):
+                        idx = stack.pop()
+                    obj = stack.pop()
+                    obj[idx] = value
+                elif opcode == 98:  # getlocal
+                    index = u30()
+                    stack.append(registers[index])
+                elif opcode == 99:  # setlocal
+                    index = u30()
+                    value = stack.pop()
+                    registers[index] = value
+                elif opcode == 102:  # getproperty
+                    index = u30()
+                    pname = self.multinames[index]
+                    if pname == 'length':
+                        obj = stack.pop()
+                        assert isinstance(obj, list)
+                        stack.append(len(obj))
+                    else:  # Assume attribute access
+                        idx = stack.pop()
+                        assert isinstance(idx, int)
+                        obj = stack.pop()
+                        assert isinstance(obj, list)
+                        stack.append(obj[idx])
+                elif opcode == 115:  # convert_
+                    value = stack.pop()
+                    intvalue = int(value)
+                    stack.append(intvalue)
+                elif opcode == 128:  # coerce
+                    u30()
+                elif opcode == 133:  # coerce_s
+                    assert isinstance(stack[-1], (type(None), compat_str))
+                elif opcode == 160:  # add
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    res = value1 + value2
+                    stack.append(res)
+                elif opcode == 161:  # subtract
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    res = value1 - value2
+                    stack.append(res)
+                elif opcode == 164:  # modulo
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    res = value1 % value2
+                    stack.append(res)
+                elif opcode == 175:  # greaterequals
+                    value2 = stack.pop()
+                    value1 = stack.pop()
+                    result = value1 >= value2
+                    stack.append(result)
+                elif opcode == 208:  # getlocal_0
+                    stack.append(registers[0])
+                elif opcode == 209:  # getlocal_1
+                    stack.append(registers[1])
+                elif opcode == 210:  # getlocal_2
+                    stack.append(registers[2])
+                elif opcode == 211:  # getlocal_3
+                    stack.append(registers[3])
+                elif opcode == 212:  # setlocal_0
+                    registers[0] = stack.pop()
+                elif opcode == 213:  # setlocal_1
+                    registers[1] = stack.pop()
+                elif opcode == 214:  # setlocal_2
+                    registers[2] = stack.pop()
+                elif opcode == 215:  # setlocal_3
+                    registers[3] = stack.pop()
+                else:
+                    raise NotImplementedError(
+                        'Unsupported opcode %d' % opcode)
+
+        avm_class.method_pyfunctions[func_name] = resfunc
+        return resfunc
+
index 64a9618ca62493f893af16b31b3fbd331bbdc1e7..bf4d1112f9f62cf6a58a26df3d83bb4326470b1c 100644 (file)
@@ -1193,11 +1193,6 @@ def format_bytes(bytes):
     return u'%.2f%s' % (converted, suffix)
 
 
-def str_to_int(int_str):
-    int_str = re.sub(r'[,\.]', u'', int_str)
-    return int(int_str)
-
-
 def get_term_width():
     columns = os.environ.get('COLUMNS', None)
     if columns:
@@ -1265,15 +1260,22 @@ class HEADRequest(compat_urllib_request.Request):
         return "HEAD"
 
 
-def int_or_none(v, scale=1, default=None, get_attr=None):
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
     if get_attr:
         if v is not None:
             v = getattr(v, get_attr, None)
-    return default if v is None else (int(v) // scale)
+    return default if v is None else (int(v) * invscale // scale)
+
+
+def str_to_int(int_str):
+    if int_str is None:
+        return None
+    int_str = re.sub(r'[,\.]', u'', int_str)
+    return int(int_str)
 
 
-def float_or_none(v, scale=1, default=None):
-    return default if v is None else (float(v) / scale)
+def float_or_none(v, scale=1, invscale=1, default=None):
+    return default if v is None else (float(v) * invscale / scale)
 
 
 def parse_duration(s):
index 4d606c3d2333ffbcdbfb64d55f4e8a2a8db3bf75..e2e0ee25c1d7de5ae9ac0e6a54b5020b8e771061 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2014.07.15'
+__version__ = '2014.07.20.2'