Merge branch 'qqmusic-more-formats' of https://github.com/ping/youtube-dl into ping...
authorYen Chi Hsuan <yan12125@gmail.com>
Fri, 5 Jun 2015 15:14:44 +0000 (23:14 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Fri, 5 Jun 2015 15:14:44 +0000 (23:14 +0800)
docs/supportedsites.md
youtube_dl/extractor/empflix.py
youtube_dl/extractor/qqmusic.py
youtube_dl/extractor/teamcoco.py
youtube_dl/extractor/tnaflix.py
youtube_dl/version.py

index a421ae62bf95bd8173c0f6427163b25dd150ae18..d147b53fe60a5bba6f9d33d1fba684014513f83b 100644 (file)
@@ -10,6 +10,7 @@
  - **56.com**
  - **5min**
  - **8tracks**
+ - **91porn**
  - **9gag**
  - **abc.net.au**
  - **Abc7News**
  - **Noco**
  - **Normalboots**
  - **NosVideo**
+ - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
  - **novamov**: NovaMov
  - **Nowness**
  - **NowTV**
  - **smotri:user**: Smotri.com user videos
  - **Snotr**
  - **Sohu**
+ - **soompi**
+ - **soompi:show**
  - **soundcloud**
  - **soundcloud:playlist**
  - **soundcloud:set**
  - **Trilulilu**
  - **TruTube**
  - **Tube8**
+ - **TubiTv**
  - **Tudou**
  - **Tumblr**
  - **TuneIn**
index 9a5a8f4bb44039e6c52968801033a3d12a73d835..4827022e088cf33d064acf7ab8cebdadc0d743a0 100644 (file)
@@ -26,6 +26,6 @@ class EMPFlixIE(TNAFlixIE):
         },
         {
             'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html',
-            'matching_only': True,
+            'only_matching': True,
         }
     ]
index dc300e1896ce75c5f2494d7b9ca7c8fc6fb9db35..e24ddaefe6785b17d28e735c99227dcee0f1ec0d 100644 (file)
@@ -9,7 +9,6 @@ from .common import InfoExtractor
 from ..utils import (
     strip_jsonp,
     unescapeHTML,
-    js_to_json,
 )
 from ..compat import compat_urllib_request
 
@@ -196,60 +195,49 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
     _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'
 
     _TESTS = [{
-        'url': 'http://y.qq.com/#type=toplist&p=global_12',
+        'url': 'http://y.qq.com/#type=toplist&p=global_123',
         'info_dict': {
-            'id': 'global_12',
-            'title': 'itunes榜',
+            'id': 'global_123',
+            'title': '美国iTunes榜',
         },
         'playlist_count': 10,
     }, {
-        'url': 'http://y.qq.com/#type=toplist&p=top_6',
+        'url': 'http://y.qq.com/#type=toplist&p=top_3',
         'info_dict': {
-            'id': 'top_6',
+            'id': 'top_3',
             'title': 'QQ音乐巅峰榜·欧美',
+            'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统'
+                           '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据'
+                           '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:'
+                           '登录用户完整播放一首歌曲,记为一次有效播放;同一用户收听同一首歌曲,每天记录为1次有效播放'
         },
         'playlist_count': 100,
     }, {
-        'url': 'http://y.qq.com/#type=toplist&p=global_5',
+        'url': 'http://y.qq.com/#type=toplist&p=global_106',
         'info_dict': {
-            'id': 'global_5',
-            'title': '韩国mnet排行榜',
+            'id': 'global_106',
+            'title': '韩国Mnet榜',
         },
         'playlist_count': 50,
     }]
 
-    @staticmethod
-    def strip_qq_jsonp(code):
-        return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code))
-
     def _real_extract(self, url):
         list_id = self._match_id(url)
 
         list_type, num_id = list_id.split("_")
 
-        list_page = self._download_webpage(
-            "http://y.qq.com/y/static/toplist/index/%s.html" % list_id,
-            list_id, 'Download toplist page')
-
-        entries = []
-        if list_type == 'top':
-            jsonp_url = "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id
-        else:
-            jsonp_url = "http://y.qq.com/y/static/toplist/json/global/%s/1_1.js" % num_id
-
         toplist_json = self._download_json(
-            jsonp_url, list_id, note='Retrieve toplist json',
-            errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp)
-
-        for song in toplist_json['l']:
-            s = song['s']
-            song_mid = s.split("|")[20]
-            entries.append(self.url_result(
-                'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
-                song_mid))
+            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg?type=%s&topid=%s&format=json'
+            % (list_type, num_id),
+            list_id, 'Download toplist page')
 
-        list_name = self._html_search_regex(
-            r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name',
-            default=None)
+        entries = [
+            self.url_result(
+                'http://y.qq.com/#type=song&mid=' + song['data']['songmid'], 'QQMusic', song['data']['songmid']
+            ) for song in toplist_json['songlist']
+        ]
 
-        return self.playlist_result(entries, list_id, list_name)
+        topinfo = toplist_json.get('topinfo', {})
+        list_name = topinfo.get('ListName')
+        list_description = topinfo.get('info')
+        return self.playlist_result(entries, list_id, list_name, list_description)
index b2a4b1fc05430558ad9b33a9aa3ce834107dc6e3..d1b7264b4ca4a0cb72e491da26d7f5bbc1cc66b7 100644 (file)
@@ -51,6 +51,17 @@ class TeamcocoIE(InfoExtractor):
             'params': {
                 'skip_download': True,  # m3u8 downloads
             }
+        }, {
+            'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
+            'info_dict': {
+                'id': '89341',
+                'ext': 'mp4',
+                'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+                'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+            },
+            'params': {
+                'skip_download': True,  # m3u8 downloads
+            }
         }
     ]
     _VIDEO_ID_REGEXES = (
@@ -110,9 +121,23 @@ class TeamcocoIE(InfoExtractor):
         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
         for filed in data['files']:
             if determine_ext(filed['url']) == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    filed['url'], video_id, ext='mp4'))
+                # compat_urllib_parse.urljoin does not work here
+                if filed['url'].startswith('/'):
+                    m3u8_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + filed['url']
+                else:
+                    m3u8_url = filed['url']
+                m3u8_formats = self._extract_m3u8_formats(
+                    m3u8_url, video_id, ext='mp4')
+                for m3u8_format in m3u8_formats:
+                    if m3u8_format not in formats:
+                        formats.append(m3u8_format)
+            elif determine_ext(filed['url']) == 'f4m':
+                # TODO Correct f4m extraction
+                continue
             else:
+                if filed['url'].startswith('/mp4:protected/'):
+                    # TODO Correct extraction for these files
+                    continue
                 m_format = re.search(r'(\d+(k|p))\.mp4', filed['url'])
                 if m_format is not None:
                     format_id = m_format.group(1)
index 59af9aba06399cefcc6c2049c958dfb3819bb20a..c282865b2517d8cbd62df6f2dee0540146baae48 100644 (file)
@@ -33,7 +33,7 @@ class TNAFlixIE(InfoExtractor):
         },
         {
             'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632',
-            'matching_only': True,
+            'only_matching': True,
         }
     ]
 
@@ -51,9 +51,8 @@ class TNAFlixIE(InfoExtractor):
 
         age_limit = self._rta_search(webpage)
 
-        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
-        if duration:
-            duration = parse_duration(duration[1:])
+        duration = parse_duration(self._html_search_meta(
+            'duration', webpage, 'duration', default=None))
 
         cfg_url = self._proto_relative_url(self._html_search_regex(
             self._CONFIG_REGEX, webpage, 'flashvars.config'), 'http:')
@@ -62,14 +61,15 @@ class TNAFlixIE(InfoExtractor):
             cfg_url, display_id, note='Downloading metadata',
             transform_source=fix_xml_ampersands)
 
-        thumbnail = cfg_xml.find('./startThumb').text
+        thumbnail = self._proto_relative_url(
+            cfg_xml.find('./startThumb').text, 'http:')
 
         formats = []
         for item in cfg_xml.findall('./quality/item'):
             video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
             format_id = item.find('res').text
             fmt = {
-                'url': video_url,
+                'url': self._proto_relative_url(video_url, 'http:'),
                 'format_id': format_id,
             }
             m = re.search(r'^(\d+)', format_id)
index 6537101310684fe09d5d6327bab9d46106112cf4..9cf84ff712da103b9c3ce0b3e2f0c9386082e574 100644 (file)
@@ -1,3 +1,3 @@
 from __future__ import unicode_literals
 
-__version__ = '2015.05.29'
+__version__ = '2015.06.04.1'