Merge pull request #8754 from remitamine/5min
authorremitamine <remitamine@gmail.com>
Sat, 5 Mar 2016 17:35:48 +0000 (18:35 +0100)
committerremitamine <remitamine@gmail.com>
Sat, 5 Mar 2016 17:35:48 +0000 (18:35 +0100)
update 5min related web sites info extraction and add support for Aol features.

16 files changed:
youtube_dl/YoutubeDL.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/audimedia.py
youtube_dl/extractor/bleacherreport.py
youtube_dl/extractor/douyutv.py
youtube_dl/extractor/elpais.py
youtube_dl/extractor/foxnews.py
youtube_dl/extractor/jeuxvideo.py
youtube_dl/extractor/kusi.py [new file with mode: 0644]
youtube_dl/extractor/leeco.py
youtube_dl/extractor/revision3.py
youtube_dl/extractor/twitter.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/yandexmusic.py
youtube_dl/extractor/youporn.py
youtube_dl/utils.py

index dcc867e456db6b0131ed797ae6bfd78f327834dc..f91851df91ee2a7bf49910ac2d8ec12f2bf94469 100755 (executable)
@@ -567,7 +567,7 @@ class YoutubeDL(object):
                 elif template_dict.get('height'):
                     template_dict['resolution'] = '%sp' % template_dict['height']
                 elif template_dict.get('width'):
-                    template_dict['resolution'] = '?x%d' % template_dict['width']
+                    template_dict['resolution'] = '%dx?' % template_dict['width']
 
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
index db182766668e2cf33e49e8bf62fe774d61890476..899bf8114f3e7711ef145b373cc9b1d59fec9089 100644 (file)
@@ -343,6 +343,7 @@ from .konserthusetplay import KonserthusetPlayIE
 from .kontrtube import KontrTubeIE
 from .krasview import KrasViewIE
 from .ku6 import Ku6IE
+from .kusi import KUSIIE
 from .kuwo import (
     KuwoIE,
     KuwoAlbumIE,
index 3b2effa15fe15a5527644349d785b452540c7568..aa6925623140f08090515fda2f42a7debd5545ac 100644 (file)
@@ -10,9 +10,9 @@ from ..utils import (
 
 
 class AudiMediaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
     _TEST = {
-        'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
+        'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
         'md5': '79a8b71c46d49042609795ab59779b66',
         'info_dict': {
             'id': '1565',
@@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
 
-        raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload')
+        raw_payload = self._search_regex([
+            r'class="amtv-embed"[^>]+id="([^"]+)"',
+            r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
+        ], webpage, 'raw payload')
         _, stage_mode, video_id, lang = raw_payload.split('-')
 
         # TODO: handle s and e stage_mode (live streams and ended live streams)
@@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
                 video_version_url = video_version.get('download_url') or video_version.get('stream_url')
                 if not video_version_url:
                     continue
-                formats.append({
+                f = {
                     'url': video_version_url,
                     'width': int_or_none(video_version.get('width')),
                     'height': int_or_none(video_version.get('height')),
                     'abr': int_or_none(video_version.get('audio_bitrate')),
                     'vbr': int_or_none(video_version.get('video_bitrate')),
-                })
+                }
+                bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
+                if bitrate:
+                    f.update({
+                        'format_id': 'http-%s' % bitrate,
+                    })
+                formats.append(f)
             self._sort_formats(formats)
 
             return {
index 38bda3af5a189cc7a2c8d65937a7d710edd0211f..7a8e1f60b82923b643918e43924fa64a5250cb83 100644 (file)
@@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
         'add_ie': ['Ooyala'],
     }, {
         'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
-        'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
+        'md5': '6a5cd403418c7b01719248ca97fb0692',
         'info_dict': {
             'id': '2586817',
-            'ext': 'mp4',
+            'ext': 'webm',
             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
             'timestamp': 1446839961,
             'uploader': 'Sean Fay',
@@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
         'md5': '8c2c12e3af7805152675446c905d159b',
         'info_dict': {
             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
         },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
index 373b3b4b4735d8544128c48a10037eed3c570e5d..bdc768c783b9b3213badc5cf4b354f6159142f9f 100644 (file)
@@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
             'display_id': 'iseven',
             'ext': 'flv',
             'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
-            'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+            'description': 'md5:f34981259a03e980a3c6404190a3ed61',
             'thumbnail': 're:^https?://.*\.jpg$',
             'uploader': '7师傅',
             'uploader_id': '431925',
@@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
     }, {
         'url': 'http://www.douyutv.com/85982',
         'info_dict': {
@@ -42,7 +42,24 @@ class DouyuTVIE(InfoExtractor):
         },
         'params': {
             'skip_download': True,
-        }
+        },
+        'skip': 'Romm not found',
+    }, {
+        'url': 'http://www.douyutv.com/17732',
+        'info_dict': {
+            'id': '17732',
+            'display_id': '17732',
+            'ext': 'flv',
+            'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'description': 'md5:f34981259a03e980a3c6404190a3ed61',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'uploader': '7师傅',
+            'uploader_id': '431925',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
index 00a69e6312aede6069e062c6abff29137939daa9..8c725a4e631860584781b116e72b02dd05813fc2 100644 (file)
@@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
     _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
     IE_DESC = 'El País'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
         'md5': '98406f301f19562170ec071b83433d55',
         'info_dict': {
@@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
             'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
             'upload_date': '20140206',
         }
-    }
+    }, {
+        'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
+        'md5': '3bd5b09509f3519d7d9e763179b013de',
+        'info_dict': {
+            'id': '1456340311_668921',
+            'ext': 'mp4',
+            'title': 'Cómo hacer el mejor café con cafetera italiana',
+            'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
+            'upload_date': '20160303',
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
         prefix = self._html_search_regex(
-            r'var url_cache = "([^"]+)";', webpage, 'URL prefix')
+            r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
         video_suffix = self._search_regex(
-            r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL')
+            r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
         video_url = prefix + video_suffix
         thumbnail_suffix = self._search_regex(
-            r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL',
-            fatal=False)
+            r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
+            webpage, 'thumbnail URL', fatal=False)
         thumbnail = (
             None if thumbnail_suffix is None
             else prefix + thumbnail_suffix)
         title = self._html_search_regex(
-            '<h2 class="entry-header entry-title.*?>(.*?)</h2>',
+            (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
+             r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
             webpage, 'title')
-        date_str = self._search_regex(
+        upload_date = unified_strdate(self._search_regex(
             r'<p class="date-header date-int updated"\s+title="([^"]+)">',
-            webpage, 'upload date', fatal=False)
-        upload_date = (None if date_str is None else unified_strdate(date_str))
+            webpage, 'upload date', default=None) or self._html_search_meta(
+            'datePublished', webpage, 'timestamp'))
 
         return {
             'id': video_id,
index 318ac013d44b9ca8ce9de5c77d67b2cd3c9bb1e1..1dc50318ce81feb2604cfef943ddfb90e0a7641b 100644 (file)
@@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):
                 # 'upload_date': '20141204',
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
index eef7daa299813219c5211aefe2051a1160238319..137db873cc09f7e57b258bcf65b8331d8b36b8c0 100644 (file)
@@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor):
         webpage = self._download_webpage(url, title)
         title = self._html_search_meta('name', webpage) or self._og_search_title(webpage)
         config_url = self._html_search_regex(
-            r'data-src="(/contenu/medias/video.php.*?)"',
+            r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"',
             webpage, 'config URL')
         config_url = 'http://www.jeuxvideo.com' + config_url
 
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dl/extractor/kusi.py
new file mode 100644 (file)
index 0000000..931f34c
--- /dev/null
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote_plus
+from ..utils import (
+    int_or_none,
+    float_or_none,
+    timeconvert,
+    update_url_query,
+    xpath_text,
+)
+
+
+class KUSIIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
+    _TESTS = [{
+        'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
+        'md5': 'f926e7684294cf8cb7bdf8858e1b3988',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+    }, {
+        'url': 'http://kusi.com/video?clipId=12203019',
+        'info_dict': {
+            'id': '12203019',
+            'ext': 'mp4',
+            'title': 'Turko Files: Case Closed! & Put On Hold!',
+            'duration': 231.0,
+            'upload_date': '20160210',
+            'timestamp': 1455087571,
+            'thumbnail': 're:^https?://.*\.jpg$'
+        },
+        'params': {
+            'skip_download': True,  # Same as previous one
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        clip_id = mobj.group('clipId')
+        video_id = clip_id or mobj.group('path')
+
+        webpage = self._download_webpage(url, video_id)
+
+        if clip_id is None:
+            video_id = clip_id = self._html_search_regex(
+                r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
+
+        affiliate_id = self._search_regex(
+            r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
+
+        # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
+        xml_url = update_url_query('http://www.kusi.com/build.asp', {
+            'buildtype': 'buildfeaturexmlrequest',
+            'featureType': 'Clip',
+            'featureid': clip_id,
+            'affiliateno': affiliate_id,
+            'clientgroupid': '1',
+            'rnd': int(round(random.random() * 1000000)),
+        })
+
+        doc = self._download_xml(xml_url, video_id)
+
+        video_title = xpath_text(doc, 'HEADLINE', fatal=True)
+        duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
+        description = xpath_text(doc, 'ABSTRACT')
+        thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
+        createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
+
+        quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
+        formats = []
+        for quality in quality_options:
+            formats.append({
+                'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+                'height': int_or_none(quality.attrib.get('height')),
+                'width': int_or_none(quality.attrib.get('width')),
+                'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_title,
+            'description': description,
+            'duration': duration,
+            'formats': formats,
+            'thumbnail': thumbnail,
+            'timestamp': createtion_time,
+        }
index d0cd3f5919350117a1babe44df1e1ea6ff4bb3c5..df47e88ba98a8d69d2dca43cf20ccb7366232f57 100644 (file)
@@ -217,14 +217,8 @@ class LePlaylistIE(InfoExtractor):
         'playlist_mincount': 96
     }, {
         'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml',
-        'info_dict': {
-            'id': 'lswjzzjc',
-            # The title should be "劲舞青春", but I can't find a simple way to
-            # determine the playlist title
-            'title': '乐视午间自制剧场',
-            'description': 'md5:b1eef244f45589a7b5b1af9ff25a4489'
-        },
-        'playlist_mincount': 7
+        # This series is moved to http://www.le.com/tv/10005297.html
+        'only_matching': True,
     }, {
         'url': 'http://www.le.com/comic/92063.html',
         'only_matching': True,
@@ -338,7 +332,7 @@ class LetvCloudIE(InfoExtractor):
             formats.append({
                 'url': url,
                 'ext': determine_ext(decoded_url),
-                'format_id': int_or_none(play_url.get('vtype')),
+                'format_id': str_or_none(play_url.get('vtype')),
                 'format_note': str_or_none(play_url.get('definition')),
                 'width': int_or_none(play_url.get('vwidth')),
                 'height': int_or_none(play_url.get('vheight')),
index b1b8800b97c9eb8caad2c03f999f1bc8f304c4da..99979ebe1a9fe82099076b46b576ef38a58bca8c 100644 (file)
@@ -19,7 +19,7 @@ class Revision3IE(InfoExtractor):
         'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
         'md5': 'd94a72d85d0a829766de4deb8daaf7df',
         'info_dict': {
-            'id': '73034',
+            'id': '71089',
             'display_id': 'technobuffalo/5-google-predictions-for-2016',
             'ext': 'webm',
             'title': '5 Google Predictions for 2016',
@@ -31,6 +31,7 @@ class Revision3IE(InfoExtractor):
             'uploader_id': 'technobuffalo',
         }
     }, {
+        # Show
         'url': 'http://testtube.com/brainstuff',
         'info_dict': {
             'id': '251',
@@ -41,7 +42,7 @@ class Revision3IE(InfoExtractor):
     }, {
         'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
         'info_dict': {
-            'id': '60163',
+            'id': '58227',
             'display_id': 'dnews/5-weird-ways-plants-can-eat-animals',
             'duration': 275,
             'ext': 'webm',
@@ -52,18 +53,72 @@ class Revision3IE(InfoExtractor):
             'uploader': 'DNews',
             'uploader_id': 'dnews',
         },
+    }, {
+        'url': 'http://testtube.com/tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+        'info_dict': {
+            'id': '71618',
+            'ext': 'mp4',
+            'display_id': 'tt-editors-picks/the-israel-palestine-conflict-explained-in-ten-min',
+            'title': 'The Israel-Palestine Conflict Explained in Ten Minutes',
+            'description': 'If you\'d like to learn about the struggle between Israelis and Palestinians, this video is a great place to start',
+            'uploader': 'Editors\' Picks',
+            'uploader_id': 'tt-editors-picks',
+            'timestamp': 1453309200,
+            'upload_date': '20160120',
+        },
+        'add_ie': ['Youtube'],
+    }, {
+        # Tag
+        'url': 'http://testtube.com/tech-news',
+        'info_dict': {
+            'id': '21018',
+            'title': 'tech news',
+        },
+        'playlist_mincount': 9,
     }]
     _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
     _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
 
     def _real_extract(self, url):
         domain, display_id = re.match(self._VALID_URL, url).groups()
+        site = domain.split('.')[0]
         page_info = self._download_json(
             self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
 
-        if page_info['data']['type'] == 'episode':
-            episode_data = page_info['data']
-            video_id = compat_str(episode_data['video']['data']['id'])
+        page_data = page_info['data']
+        page_type = page_data['type']
+        if page_type in ('episode', 'embed'):
+            show_data = page_data['show']['data']
+            page_id = compat_str(page_data['id'])
+            video_id = compat_str(page_data['video']['data']['id'])
+
+            preference = qualities(['mini', 'small', 'medium', 'large'])
+            thumbnails = [{
+                'url': image_url,
+                'id': image_id,
+                'preference': preference(image_id)
+            } for image_id, image_url in page_data.get('images', {}).items()]
+
+            info = {
+                'id': page_id,
+                'display_id': display_id,
+                'title': unescapeHTML(page_data['name']),
+                'description': unescapeHTML(page_data.get('summary')),
+                'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
+                'author': page_data.get('author'),
+                'uploader': show_data.get('name'),
+                'uploader_id': show_data.get('slug'),
+                'thumbnails': thumbnails,
+                'extractor_key': site,
+            }
+
+            if page_type == 'embed':
+                info.update({
+                    '_type': 'url_transparent',
+                    'url': page_data['video']['data']['embed'],
+                })
+                return info
+
             video_data = self._download_json(
                 'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id),
                 video_id)['items'][0]
@@ -84,36 +139,30 @@ class Revision3IE(InfoExtractor):
                         })
             self._sort_formats(formats)
 
-            preference = qualities(['mini', 'small', 'medium', 'large'])
-            thumbnails = [{
-                'url': image_url,
-                'id': image_id,
-                'preference': preference(image_id)
-            } for image_id, image_url in video_data.get('images', {}).items()]
-
-            return {
-                'id': video_id,
-                'display_id': display_id,
+            info.update({
                 'title': unescapeHTML(video_data['title']),
                 'description': unescapeHTML(video_data.get('summary')),
-                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '),
-                'author': episode_data.get('author'),
                 'uploader': video_data.get('show', {}).get('name'),
                 'uploader_id': video_data.get('show', {}).get('slug'),
                 'duration': int_or_none(video_data.get('duration')),
-                'thumbnails': thumbnails,
                 'formats': formats,
-            }
+            })
+            return info
         else:
-            show_data = page_info['show']['data']
+            list_data = page_info[page_type]['data']
             episodes_data = page_info['episodes']['data']
             num_episodes = page_info['meta']['totalEpisodes']
             processed_episodes = 0
             entries = []
             page_num = 1
             while True:
-                entries.extend([self.url_result(
-                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data])
+                entries.extend([{
+                    '_type': 'url',
+                    'url': 'http://%s%s' % (domain, episode['path']),
+                    'id': compat_str(episode['id']),
+                    'ie_key': 'Revision3',
+                    'extractor_key': site,
+                } for episode in episodes_data])
                 processed_episodes += len(episodes_data)
                 if processed_episodes == num_episodes:
                     break
@@ -123,5 +172,5 @@ class Revision3IE(InfoExtractor):
                     display_id)['episodes']['data']
 
             return self.playlist_result(
-                entries, compat_str(show_data['id']),
-                show_data.get('name'), show_data.get('summary'))
+                entries, compat_str(list_data['id']),
+                list_data.get('name'), list_data.get('summary'))
index 67762a003075c71c8067bd763772642d77888d93..e70b2ab3c8d564cd907e8763fd8136e1b3827ac5 100644 (file)
@@ -102,6 +102,14 @@ class TwitterCardIE(TwitterBaseIE):
             r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'),
             video_id)
 
+        def _search_dimensions_in_video_url(a_format, video_url):
+            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+            if m:
+                a_format.update({
+                    'width': int(m.group('width')),
+                    'height': int(m.group('height')),
+                })
+
         playlist = config.get('playlist')
         if playlist:
             video_url = playlist[0]['source']
@@ -110,12 +118,8 @@ class TwitterCardIE(TwitterBaseIE):
                 'url': video_url,
             }
 
-            m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
-            if m:
-                f.update({
-                    'width': int(m.group('width')),
-                    'height': int(m.group('height')),
-                })
+            _search_dimensions_in_video_url(f, video_url)
+
             formats.append(f)
 
         vmap_url = config.get('vmapUrl') or config.get('vmap_url')
@@ -148,6 +152,8 @@ class TwitterCardIE(TwitterBaseIE):
                     if not a_format['vbr']:
                         del a_format['vbr']
 
+                    _search_dimensions_in_video_url(a_format, media_url)
+
                     formats.append(a_format)
 
             duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
index 14e945d494cd2f6e5f3b3e6a03ff6ebb076826dd..e148b1ef513321376efe1795056503ea2a8bcad8 100644 (file)
@@ -20,6 +20,7 @@ class VGTVIE(XstreamIE):
         'aftenbladet.no/tv': 'satv',
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
+        'ap.vgtv.no/webtv': 'aptv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -35,7 +36,7 @@ class VGTVIE(XstreamIE):
                     (?P<host>
                         %s
                     )
-                    /
+                    /?
                     (?:
                         \#!/(?:video|live)/|
                         embed?.*id=
@@ -107,19 +108,27 @@ class VGTVIE(XstreamIE):
             'md5': 'fd828cd29774a729bf4d4425fe192972',
             'info_dict': {
                 'id': '21039',
-                'ext': 'mov',
+                'ext': 'mp4',
                 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
                 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
                 'duration': 66,
                 'timestamp': 1417002452,
                 'upload_date': '20141126',
                 'view_count': int,
-            }
+            },
+            'params': {
+                # m3u8 download
+                'skip_download': True,
+            },
         },
         {
             'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
             'only_matching': True,
         },
+        {
+            'url': 'http://ap.vgtv.no/webtv#!/video/111084/de-nye-bysyklene-lettere-bedre-gir-stoerre-hjul-og-feste-til-mobil',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):
@@ -144,8 +153,6 @@ class VGTVIE(XstreamIE):
         if len(video_id) == 5:
             if appname == 'bttv':
                 info = self._extract_video_info('btno', video_id)
-            elif appname == 'aptv':
-                info = self._extract_video_info('ap', video_id)
 
         streams = data['streamUrls']
         stream_type = data.get('streamType')
index d3cc1a29fa473fee2f58e91323774633be00fc4b..e699e663f60818b090bb6bf0ccdf24802c3c14c4 100644 (file)
@@ -10,13 +10,27 @@ from ..compat import (
     compat_urllib_parse,
 )
 from ..utils import (
+    ExtractorError,
     int_or_none,
     float_or_none,
     sanitized_Request,
 )
 
 
-class YandexMusicTrackIE(InfoExtractor):
+class YandexMusicBaseIE(InfoExtractor):
+    @staticmethod
+    def _handle_error(response):
+        error = response.get('error')
+        if error:
+            raise ExtractorError(error, expected=True)
+
+    def _download_json(self, *args, **kwargs):
+        response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+        self._handle_error(response)
+        return response
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
     IE_NAME = 'yandexmusic:track'
     IE_DESC = 'Яндекс.Музыка - Трек'
     _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
@@ -73,7 +87,7 @@ class YandexMusicTrackIE(InfoExtractor):
         return self._get_track_info(track)
 
 
-class YandexMusicPlaylistBaseIE(InfoExtractor):
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
     def _build_playlist(self, tracks):
         return [
             self.url_result(
index b29baafc441c220b4128c9363f341f7159b8df93..1124fe6c280cb0e23bee3a41ea323165ec714dce 100644 (file)
@@ -75,7 +75,7 @@ class YouPornIE(InfoExtractor):
         links = []
 
         sources = self._search_regex(
-            r'sources\s*:\s*({.+?})', webpage, 'sources', default=None)
+            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
         if sources:
             for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
                 links.append(link)
@@ -101,8 +101,9 @@ class YouPornIE(InfoExtractor):
             }
             # Video URL's path looks like this:
             #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             # We will benefit from it by extracting some metadata
-            mobj = re.search(r'/(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
             if mobj:
                 height = int(mobj.group('height'))
                 bitrate = int(mobj.group('bitrate'))
index d431aa6b726c59b40a9c48b3e3144f1d7a2c8db0..22a39a0ab7af191095242e6b2a3f580e59e17c37 100644 (file)
@@ -1743,6 +1743,7 @@ def update_url_query(url, query):
     parsed_url = compat_urlparse.urlparse(url)
     qs = compat_parse_qs(parsed_url.query)
     qs.update(query)
+    qs = encode_dict(qs)
     return compat_urlparse.urlunparse(parsed_url._replace(
         query=compat_urllib_parse.urlencode(qs, True)))