From: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Date: Sat, 8 Jun 2013 09:11:49 +0000 (+0200)
Subject: Merge pull request #853 from mc2avr/master
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=3054ff0cbebc7969d918f0deee76cd37ea6f5b40;hp=f2cd958c0a09bca580a955c60c19e177f6ff45b8;p=youtube-dl

Merge pull request #853 from mc2avr/master

add ZDFIE
---

diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index a40360122..dd67286a7 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -7,7 +7,7 @@ import unittest
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE
+from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE, YoutubeChannelIE, JustinTVIE
 
 class TestAllURLsMatching(unittest.TestCase):
     def test_youtube_playlist_matching(self):
@@ -29,6 +29,22 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
         self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
 
+    def test_justin_tv_channelid_matching(self):
+        self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/"))
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/"))
+
+    def test_justintv_videoid_matching(self):
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483"))
+
+    def test_justin_tv_chapterid_matching(self):
+        self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
+
     def test_youtube_extract(self):
         self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
         self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index a123e6d72..c80c90cbe 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener)
 class FakeDownloader(FileDownloader):
     def __init__(self):
         self.result = []
-        self.params = parameters
+        # Different instances of the downloader can't share the same dictionary
+        # some test set the "sublang" parameter, which would break the md5 checks.
+        self.params = dict(parameters)
     def to_screen(self, s):
         print(s)
     def trouble(self, s, tb=None):
@@ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase):
         IE = YoutubeIE(DL)
         info_dict = IE.extract('QRS8MkLhQmM')
         self.assertEqual(info_dict, None)
+    def test_youtube_automatic_captions(self):
+        DL = FakeDownloader()
+        DL.params['writesubtitles'] = True
+        DL.params['subtitleslang'] = 'it'
+        IE = YoutubeIE(DL)
+        info_dict = IE.extract('8YoUxe5ncPo')
+        sub = info_dict[0]['subtitles'][0]
+        self.assertTrue(sub[2] is not None)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/tests.json b/test/tests.json
index f57ebf1c9..04be912ce 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -482,5 +482,32 @@
       "title": "Louis C.K. Interview Pt. 1 11/3/11",
       "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one."
     }
+  },
+  {
+    "name": "XHamster",
+    "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html",
+    "file": "1509445.flv",
+    "md5": "9f48e0e8d58e3076bb236ff412ab62fa",
+    "info_dict":{
+      "title":"FemaleAgent Shy beauty takes the bait"
+    }
+  },
+  {
+    "name": "Hypem",
+    "url": "http://hypem.com/track/1v6ga/BODYWORK+-+TAME",
+    "file": "1v6ga.mp3",
+    "md5": "b9cc91b5af8995e9f0c1cee04c575828",
+    "info_dict":{
+      "title":"TAME"
+    }
+  },
+  {
+    "name": "Vbox7",
+    "url": "http://vbox7.com/play:249bb972c2",
+    "file": "249bb972c2.flv",
+    "md5": "9c70d6d956f888bdc08c124acc120cfe",
+    "info_dict":{
+      "title":"Ð¡Ð¼ÑÑ! Ð§ÑÐ´Ð¾ - ÑÐ¸ÑÑ Ð·Ð° ÑÐµÐºÑÐ½Ð´Ð¸ - Ð¡ÐºÑÐ¸ÑÐ° ÐºÐ°Ð¼ÐµÑÐ°"
+    }
   }
 ]
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 2c35a05d8..bf0f5bb9e 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -814,12 +814,10 @@ class FileDownloader(object):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
 
-#        args = ['mmsclient', url]                                     # doesn't work anymore
-#        args = ['wpro', url, '-O', tmpfilename]                       # dont work
         args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
         # Check for mplayer first
         try:
-            subprocess.call(args[0], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+            subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
         except (OSError, IOError):
             self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
             return False
@@ -839,7 +837,7 @@ class FileDownloader(object):
             return True
         else:
             self.to_stderr(u"\n")
-            self.report_error(u'%s exited with code %d' % (args[0], retval))
+            self.report_error(u'mplayer exited with code %d' % retval)
             return False
 
 
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 4ca744daf..24a77a1ab 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -376,6 +376,34 @@ class YoutubeIE(InfoExtractor):
             return (u'Did not fetch video subtitles', None, None)
         return (None, sub_lang, sub)
 
+    def _request_automatic_caption(self, video_id, webpage):
+        """We need the webpage for getting the captions url, pass it as an
+           argument to speed up the process."""
+        sub_lang = self._downloader.params.get('subtitleslang')
+        sub_format = self._downloader.params.get('subtitlesformat')
+        self.to_screen(u'%s: Looking for automatic captions' % video_id)
+        mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+        err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+        if mobj is None:
+            return [(err_msg, None, None)]
+        player_config = json.loads(mobj.group(1))
+        try:
+            args = player_config[u'args']
+            caption_url = args[u'ttsurl']
+            timestamp = args[u'timestamp']
+            params = compat_urllib_parse.urlencode({
+                'lang': 'en',
+                'tlang': sub_lang,
+                'fmt': sub_format,
+                'ts': timestamp,
+                'kind': 'asr',
+            })
+            subtitles_url = caption_url + '&' + params
+            sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
+            return [(None, sub_lang, sub)]
+        except KeyError:
+            return [(err_msg, None, None)]
+
     def _extract_subtitle(self, video_id):
         """
         Return a list with a tuple:
@@ -623,7 +651,14 @@ class YoutubeIE(InfoExtractor):
             if video_subtitles:
                 (sub_error, sub_lang, sub) = video_subtitles[0]
                 if sub_error:
-                    self._downloader.report_error(sub_error)
+                    # We try with the automatic captions
+                    video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+                    (sub_error_auto, sub_lang, sub) = video_subtitles[0]
+                    if sub is not None:
+                        pass
+                    else:
+                        # We report the original error
+                        self._downloader.report_error(sub_error)
 
         if self._downloader.params.get('allsubtitles', False):
             video_subtitles = self._extract_all_subtitles(video_id)
@@ -1025,7 +1060,7 @@ class VimeoIE(InfoExtractor):
     """Information extractor for vimeo.com."""
 
     # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
     IE_NAME = u'vimeo'
 
     def _real_extract(self, url, new_video=True):
@@ -1037,7 +1072,7 @@ class VimeoIE(InfoExtractor):
         video_id = mobj.group('id')
         if not mobj.group('proto'):
             url = 'https://' + url
-        if mobj.group('direct_link'):
+        if mobj.group('direct_link') or mobj.group('pro'):
             url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
@@ -1064,7 +1099,7 @@ class VimeoIE(InfoExtractor):
 
         # Extract uploader and uploader_id
         video_uploader = config["video"]["owner"]["name"]
-        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
+        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
 
         # Extract video thumbnail
         video_thumbnail = config["video"]["thumbnail"]
@@ -1884,7 +1919,7 @@ class FacebookIE(InfoExtractor):
 class BlipTVIE(InfoExtractor):
     """Information extractor for blip.tv"""
 
-    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$'
     _URL_EXT = r'^.*\.([a-z0-9]+)$'
     IE_NAME = u'blip.tv'
 
@@ -1897,6 +1932,10 @@ class BlipTVIE(InfoExtractor):
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
+        # See https://github.com/rg3/youtube-dl/issues/857
+        api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P<video_id>[\d\w]+)', url)
+        if api_mobj is not None:
+            url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id')
         urlp = compat_urllib_parse_urlparse(url)
         if urlp.path.startswith('/play/'):
             request = compat_urllib_request.Request(url)
@@ -3941,7 +3980,7 @@ class SpiegelIE(InfoExtractor):
         video_id = m.group('videoID')
 
         webpage = self._download_webpage(url, video_id)
-        m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
+        m = re.search(r'<div class="module-title">(.*?)</div>', webpage)
         if not m:
             raise ExtractorError(u'Cannot find title')
         video_title = unescapeHTML(m.group(1))
@@ -4070,8 +4109,8 @@ class ZDFIE(InfoExtractor):
         if streams is None:
             raise ExtractorError(u'No media url found.')
 
-        # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' und mms url
-        # s['media_type'] == 'hstreaming' -> use 'Quicktime' und rtsp url
+        # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
+        # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
         # choose first/default media type and highest quality for now
         for s in streams:        #find 300 - dsl1000mbit
             if s['quality'] == '300' and s['media_type'] == 'wstreaming':
@@ -4263,7 +4302,7 @@ class HowcastIE(InfoExtractor):
 
         self.report_extraction(video_id)
 
-        mobj = re.search(r'\'file\': "(http://mobile-media\.howcast\.com/\d+\.mp4)"', webpage)
+        mobj = re.search(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)"', webpage)
         if mobj is None:
             raise ExtractorError(u'Unable to extract video URL')
         video_url = mobj.group(1)
@@ -4444,6 +4483,149 @@ class TeamcocoIE(InfoExtractor):
             'thumbnail':   thumbnail,
             'description': description,
         }]
+        
+class XHamsterIE(InfoExtractor):
+    """Information Extractor for xHamster"""
+    _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P<id>[0-9]+)/.*\.html'
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+
+        video_id = mobj.group('id')
+        mrss_url='http://xhamster.com/movies/%s/.html' % video_id
+        webpage = self._download_webpage(mrss_url, video_id)
+        mobj = re.search(r'\'srv\': \'(?P<server>[^\']*)\',\s*\'file\': \'(?P<file>[^\']+)\',', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+        if len(mobj.group('server')) == 0:
+            video_url = compat_urllib_parse.unquote(mobj.group('file'))
+        else:
+            video_url = mobj.group('server')+'/key='+mobj.group('file')
+        video_extension = video_url.split('.')[-1]
+
+        mobj = re.search(r'<title>(?P<title>.+?) - xHamster\.com</title>', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract title')
+        video_title = unescapeHTML(mobj.group('title'))
+
+        mobj = re.search(r'<span>Description: </span>(?P<description>[^<]+)', webpage)
+        if mobj is None:
+            video_description = u''
+        else:
+            video_description = unescapeHTML(mobj.group('description'))
+
+        mobj = re.search(r'hint=\'(?P<upload_date_Y>[0-9]{4})-(?P<upload_date_m>[0-9]{2})-(?P<upload_date_d>[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract upload date')
+        video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d')
+
+        mobj = re.search(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^>]+)', webpage)
+        if mobj is None:
+            video_uploader_id = u'anonymous'
+        else:
+            video_uploader_id = mobj.group('uploader_id')
+
+        mobj = re.search(r'\'image\':\'(?P<thumbnail>[^\']+)\'', webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract thumbnail URL')
+        video_thumbnail = mobj.group('thumbnail')
+
+        return [{
+            'id':       video_id,
+            'url':      video_url,
+            'ext':      video_extension,
+            'title':    video_title,
+            'description': video_description,
+            'upload_date': video_upload_date,
+            'uploader_id': video_uploader_id,
+            'thumbnail': video_thumbnail
+        }]
+
+class HypemIE(InfoExtractor):
+    """Information Extractor for hypem"""
+    _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        track_id = mobj.group(1)
+
+        data = { 'ax': 1, 'ts': time.time() }
+        data_encoded = compat_urllib_parse.urlencode(data)
+        complete_url = url + "?" + data_encoded
+        request = compat_urllib_request.Request(complete_url)
+        response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url')
+        cookie = urlh.headers.get('Set-Cookie', '')
+
+        self.report_extraction(track_id)
+        mobj = re.search(r'<script type="application/json" id="displayList-data">(.*?)</script>', response, flags=re.MULTILINE|re.DOTALL)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extrack tracks')
+        html_tracks = mobj.group(1).strip()
+        try:
+            track_list = json.loads(html_tracks)
+            track = track_list[u'tracks'][0]
+        except ValueError:
+            raise ExtractorError(u'Hypemachine contained invalid JSON.')
+
+        key = track[u"key"]
+        track_id = track[u"id"]
+        artist = track[u"artist"]
+        title = track[u"song"]
+
+        serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
+        request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
+        request.add_header('cookie', cookie)
+        song_data_json = self._download_webpage(request, track_id, u'Downloading metadata')
+        try:
+            song_data = json.loads(song_data_json)
+        except ValueError:
+            raise ExtractorError(u'Hypemachine contained invalid JSON.')
+        final_url = song_data[u"url"]
+
+        return [{
+            'id':       track_id,
+            'url':      final_url,
+            'ext':      "mp3",
+            'title':    title,
+            'artist':   artist,
+        }]
+
+class Vbox7IE(InfoExtractor):
+    """Information Extractor for Vbox7"""
+    _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        video_id = mobj.group(1)
+
+        redirect_page, urlh = self._download_webpage_handle(url, video_id)
+        redirect_url = urlh.geturl() + re.search(r'window\.location = \'(.*)\';', redirect_page).group(1)
+        webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
+
+        title = re.search(r'<title>(.*)</title>', webpage)
+        title = (title.group(1)).split('/')[0].strip()
+
+        ext = "flv"
+        info_url = "http://vbox7.com/play/magare.do"
+        data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
+        info_request = compat_urllib_request.Request(info_url, data)
+        info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+        info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
+        if info_response is None:
+            raise ExtractorError(u'Unable to extract the media url')
+        (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
+
+        return [{
+            'id':        video_id,
+            'url':       final_url,
+            'ext':       ext,
+            'title':     title,
+            'thumbnail': thumbnail_url,
+        }]
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4463,8 +4645,8 @@ def gen_extractors():
         YahooSearchIE(),
         DepositFilesIE(),
         FacebookIE(),
-        BlipTVUserIE(),
         BlipTVIE(),
+        BlipTVUserIE(),
         VimeoIE(),
         MyVideoIE(),
         ComedyCentralIE(),
@@ -4507,6 +4689,9 @@ def gen_extractors():
         VineIE(),
         FlickrIE(),
         TeamcocoIE(),
+        XHamsterIE(),
+        HypemIE(),
+        Vbox7IE(),
         GenericIE()
     ]
 
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index dbc928394..1cda7fa74 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
 
-__version__ = '2013.05.14'
+__version__ = '2013.05.23'