Merge pull request #922 from JohnyMoSwag/master
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Tue, 25 Jun 2013 20:07:31 +0000 (22:07 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Tue, 25 Jun 2013 20:08:58 +0000 (22:08 +0200)
Added embedded youtube detection to WorldstarIE

13 files changed:
devscripts/release.sh
test/test_download.py
test/tests.json
youtube_dl/__init__.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/common.py
youtube_dl/extractor/googleplus.py
youtube_dl/extractor/jukebox.py [new file with mode: 0644]
youtube_dl/extractor/tudou.py [new file with mode: 0644]
youtube_dl/extractor/worldstarhiphop.py
youtube_dl/extractor/youtube.py
youtube_dl/utils.py
youtube_dl/version.py

index d32ae47dd22d82be6567692e12ec0b31715f5e27..46c31e437558659d734d4f59009eadcd94ca62c0 100755 (executable)
@@ -69,7 +69,9 @@ git checkout HEAD -- youtube-dl youtube-dl.exe
 
 /bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..."
 for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done
-scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/
+scp -r "build/$version" ytdl@yt-dl.org:html/tmp/
+ssh ytdl@yt-dl.org "mv html/tmp/$version html/downloads/"
+ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
 
 /bin/echo -e "\n### Now switching to gh-pages..."
 git clone --branch gh-pages --single-branch . build/gh-pages
index 9af626dca0cfd5498aea48aef8825e740a87082a..067bde4bb9159f4d56ae34bbc4690a8425a11550 100644 (file)
@@ -153,9 +153,11 @@ def generator(test_case):
     return test_template
 
 ### And add them to TestDownload
-for test_case in defs:
+for n, test_case in enumerate(defs):
     test_method = generator(test_case)
     test_method.__name__ = "test_{0}".format(test_case["name"])
+    if getattr(TestDownload, test_method.__name__, False):
+        test_method.__name__ = "test_{0}_{1}".format(test_case["name"], n)
     setattr(TestDownload, test_method.__name__, test_method)
     del test_method
 
index fd037d818a07d9feab99e00e4bb44a7880ed0ea9..5f4f642e8ad5036831f7548b1ebdec45d94a6988 100644 (file)
       "description": "test chars:  \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
     }
   },
+  {
+    "name": "Youtube",
+    "url":  "http://www.youtube.com/watch?v=1ltcDfZMA3U",
+    "file":  "1ltcDfZMA3U.flv",
+    "note": "Test VEVO video (#897)",
+    "info_dict": {
+      "upload_date": "20070518",
+      "title": "Maps - It Will Find You",
+      "description": "Music video by Maps performing It Will Find You.",
+      "uploader": "MuteUSA",
+      "uploader_id": "MuteUSA"
+    }
+  },
+  {
+    "name": "Youtube",
+    "url":  "http://www.youtube.com/watch?v=UxxajLWwzqY",
+    "file":  "UxxajLWwzqY.mp4",
+    "note": "Test generic use_cipher_signature video (#897)",
+    "info_dict": {
+      "upload_date": "20120506",
+      "title": "Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
+      "description": "md5:b085c9804f5ab69f4adea963a2dceb3c",
+      "uploader": "IconaPop",
+      "uploader_id": "IconaPop"
+    }
+  },
   {
     "name": "Dailymotion",
     "md5":  "392c4b85a60a90dc4792da41ce3144eb",
         "upload_date": "20130624",
         "uploader": "Hurts"
     }
+  },
+  {
+    "name": "Tudou",
+    "url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html",
+    "file": "159447792.f4v",
+    "md5": "ad7c358a01541e926a1e413612c6b10a",
+    "info_dict": {
+        "title": "卡马乔国足开大脚长传冲吊集锦"
+    }
   }
 ]
index 70e7d53de71e125421cbbf053688e1798ca3feb9..cdc4a696233a5f941164466b36372af0c1ee6f89 100644 (file)
@@ -25,6 +25,7 @@ __authors__  = (
     'M. Yasoob Ullah Khalid',
     'Julien Fraichard',
     'Johny Mo Swag',
+    'Axel Noack',
     )
 
 __license__ = 'Public Domain'
index 9878ad942e7a3969c8138b9ce4cbb7dfdf749ff0..0ea99086044c78857310d27d22c14510433171a6 100644 (file)
@@ -4,8 +4,8 @@ from .arte import ArteTvIE
 from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
-from .comedycentral import ComedyCentralIE
 from .collegehumor import CollegeHumorIE
+from .comedycentral import ComedyCentralIE
 from .dailymotion import DailymotionIE
 from .depositfiles import DepositFilesIE
 from .eighttracks import EightTracksIE
@@ -21,6 +21,7 @@ from .howcast import HowcastIE
 from .hypem import HypemIE
 from .ina import InaIE
 from .infoq import InfoQIE
+from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .keek import KeekIE
 from .liveleak import LiveLeakIE
@@ -30,7 +31,6 @@ from .mtv import MTVIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .nba import NBAIE
-from .statigram import StatigramIE
 from .photobucket import PhotobucketIE
 from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
@@ -38,9 +38,11 @@ from .redtube import RedTubeIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
+from .statigram import StatigramIE
 from .steam import SteamIE
 from .teamcoco import TeamcocoIE
 from .ted import TEDIE
+from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
@@ -48,8 +50,8 @@ from .vevo import VevoIE
 from .vimeo import VimeoIE
 from .vine import VineIE
 from .worldstarhiphop import WorldStarHipHopIE
-from .xnxx import XNXXIE
 from .xhamster import XHamsterIE
+from .xnxx import XNXXIE
 from .xvideos import XVideosIE
 from .yahoo import YahooIE, YahooSearchIE
 from .youjizz import YouJizzIE
@@ -58,6 +60,7 @@ from .youporn import YouPornIE
 from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
 from .zdf import ZDFIE
 
+
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
@@ -127,6 +130,8 @@ def gen_extractors():
         StatigramIE(),
         BreakIE(),
         VevoIE(),
+        JukeboxIE(),
+        TudouIE(),
         GenericIE()
     ]
 
index 062f4cf1ebd5e73f40603789e792639d73ed9130..64d63e109d8b9382503d7e0585f4ed589ba72200 100644 (file)
@@ -211,7 +211,7 @@ class InfoExtractor(object):
             raise ExtractorError(u'Unable to extract %s' % _name)
         else:
             self._downloader.report_warning(u'unable to extract %s; '
-                u'please report this issue on GitHub.' % _name)
+                u'please report this issue on http://yt-dl.org/bug' % _name)
             return None
 
     def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
index e922bd1405d57ba0c074af48f0192b90cf0eb5ef..ff2cdeebb4e9dde5f1ff8e3a7c7bb0c00b48f048 100644 (file)
@@ -46,14 +46,18 @@ class GooglePlusIE(InfoExtractor):
         video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]',
             webpage, 'title', default=u'NA')
 
-        # Step 2, Stimulate clicking the image box to launch video
-        video_page = self._search_regex('"(https\://plus\.google\.com/photos/.*?)",,"image/jpeg","video"\]',
+        # Step 2, Simulate clicking the image box to launch video
+        DOMAIN = 'https://plus.google.com'
+        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
             webpage, u'video page URL')
+        if not video_page.startswith(DOMAIN):
+            video_page = DOMAIN + video_page
+
         webpage = self._download_webpage(video_page, video_id, u'Downloading video page')
 
         # Extract video links on video page
         """Extract video links of all sizes"""
-        pattern = '\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
+        pattern = r'\d+,\d+,(\d+),"(http\://redirector\.googlevideo\.com.*?)"'
         mobj = re.findall(pattern, webpage)
         if len(mobj) == 0:
             raise ExtractorError(u'Unable to extract video links')
diff --git a/youtube_dl/extractor/jukebox.py b/youtube_dl/extractor/jukebox.py
new file mode 100644 (file)
index 0000000..c7bb234
--- /dev/null
@@ -0,0 +1,56 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unescapeHTML,
+)
+
+class JukeboxIE(InfoExtractor):
+    _VALID_URL = r'^http://www\.jukebox?\..+?\/.+[,](?P<video_id>[a-z0-9\-]+).html'
+    _IFRAME = r'<iframe .*src="(?P<iframe>[^"]*)".*>'
+    _VIDEO_URL = r'"config":{"file":"(?P<video_url>http:[^"]+[.](?P<video_ext>[^.?]+)[?]mdtk=[0-9]+)"'
+    _TITLE = r'<h1 class="inline">(?P<title>[^<]+)</h1>.*<span id="infos_article_artist">(?P<artist>[^<]+)</span>'
+    _IS_YOUTUBE = r'config":{"file":"(?P<youtube_url>http:[\\][/][\\][/]www[.]youtube[.]com[\\][/]watch[?]v=[^"]+)"'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        html = self._download_webpage(url, video_id)
+
+        mobj = re.search(self._IFRAME, html)
+        if mobj is None:
+            raise ExtractorError(u'Cannot extract iframe url')
+        iframe_url = unescapeHTML(mobj.group('iframe'))
+
+        iframe_html = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
+        mobj = re.search(r'class="jkb_waiting"', iframe_html)
+        if mobj is not None:
+            raise ExtractorError(u'Video is not available(in your country?)!')
+
+        self.report_extraction(video_id)
+
+        mobj = re.search(self._VIDEO_URL, iframe_html)
+        if mobj is None:
+            mobj = re.search(self._IS_YOUTUBE, iframe_html)
+            if mobj is None:
+                raise ExtractorError(u'Cannot extract video url')
+            youtube_url = unescapeHTML(mobj.group('youtube_url')).replace('\/','/')
+            self.to_screen(u'Youtube video detected')
+            return self.url_result(youtube_url,ie='Youtube')
+        video_url = unescapeHTML(mobj.group('video_url')).replace('\/','/')
+        video_ext = unescapeHTML(mobj.group('video_ext'))
+
+        mobj = re.search(self._TITLE, html)
+        if mobj is None:
+            raise ExtractorError(u'Cannot extract title')
+        title = unescapeHTML(mobj.group('title'))
+        artist = unescapeHTML(mobj.group('artist'))
+
+        return [{'id': video_id,
+                 'url': video_url,
+                 'title': artist + '-' + title,
+                 'ext': video_ext
+                 }]
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dl/extractor/tudou.py
new file mode 100644 (file)
index 0000000..9ca860a
--- /dev/null
@@ -0,0 +1,32 @@
+import re
+
+from .common import InfoExtractor
+
+
+class TudouIE(InfoExtractor):
+    _VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(2).replace('.html','')
+        webpage = self._download_webpage(url, video_id)
+        video_id = re.search('"k":(.+?),',webpage).group(1)
+        title = re.search(",kw:\"(.+)\"",webpage)
+        if title is None:
+            title = re.search(",kw: \'(.+)\'",webpage)
+        title = title.group(1)
+        thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
+        if thumbnail_url is None:
+            thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
+        thumbnail_url = thumbnail_url.group(1)
+        info_url = "http://v2.tudou.com/f?id="+str(video_id)
+        webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
+        final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
+        ext = (final_url.split('?')[0]).split('.')[-1]
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+        }]
index f628e4fb1e6dfb6ba6293c29e956e34c37d46a35..531d0889fbc06273eb8a5f3a7ea7a77cc993438f 100644 (file)
@@ -18,7 +18,7 @@ class WorldStarHipHopIE(InfoExtractor):
 
         if 'youtube' in video_url:
             self.to_screen(u'Youtube video detected:')
-            return self.url_result('%s' % video_url, ie='Youtube')
+            return self.url_result(video_url, ie='Youtube')
 
         if 'mp4' in video_url:
             ext = 'mp4'
index b34c1a7b9a494ca719f2a7ec3f1a128837efbf14..de653cb3d24e6318f157d5177b87c6b09b913bef 100644 (file)
@@ -129,12 +129,13 @@ class YoutubeIE(InfoExtractor):
         """Indicate the download will use the RTMP protocol."""
         self.to_screen(u'RTMP download detected')
 
-    @staticmethod
-    def _decrypt_signature(s):
+    def _decrypt_signature(self, s):
         """Decrypt the key the two subkeys must have a length of 43"""
         (a,b) = s.split('.')
         if len(a) != 43 or len(b) != 43:
-            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths not valid')
+            raise ExtractorError(u'Unable to decrypt signature, subkeys lengths %d.%d not supported; retrying might work' % (len(a), len(b)))
+        if self._downloader.params.get('verbose'):
+            self.to_screen('encrypted signature length %d.%d' % (len(a), len(b)))
         b = ''.join([b[:8],a[0],b[9:18],b[-4],b[19:39], b[18]])[0:40]
         a = a[-40:]
         s_dec = '.'.join((a,b))[::-1]
@@ -484,6 +485,8 @@ class YoutubeIE(InfoExtractor):
 
         try:
             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
+            if not mobj:
+                raise ValueError('Could not find vevo ID')
             info = json.loads(mobj.group(1))
             args = info['args']
             # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
index 66ae41e319e39d7c75597a31034315f996f00797..4d415bd611ec4020052a3c18cfe5fb6dbd6309fc 100644 (file)
@@ -474,7 +474,7 @@ class ExtractorError(Exception):
         """ tb, if given, is the original traceback (so that it can be printed out). """
 
         if not sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
-            msg = msg + u'; please report this issue on GitHub.'
+            msg = msg + u'; please report this issue on http://yt-dl.org/bug'
         super(ExtractorError, self).__init__(msg)
 
         self.traceback = tb
index ba5d5b9199cf5bc35fe329d9730fbf30181eaf7b..15003cccd2a5205bc210edaee3a88f9beabd3b7f 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.06.29'
+__version__ = '2013.06.32'