Merge pull request #10817 from TRox1972/clubic_match_id
authorYen Chi Hsuan <yan12125@gmail.com>
Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
committerGitHub <noreply@github.com>
Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
[clubic] Rely on _match_id and _parse_json

16 files changed:
ChangeLog
test/test_utils.py
youtube_dl/__init__.py
youtube_dl/downloader/http.py
youtube_dl/extractor/aftonbladet.py [deleted file]
youtube_dl/extractor/dctp.py
youtube_dl/extractor/extractors.py
youtube_dl/extractor/instagram.py
youtube_dl/extractor/ketnet.py
youtube_dl/extractor/leeco.py
youtube_dl/extractor/limelight.py
youtube_dl/extractor/tvland.py
youtube_dl/extractor/vgtv.py
youtube_dl/extractor/vk.py
youtube_dl/postprocessor/xattrpp.py
youtube_dl/utils.py

index f8149cc3098d2cbf45f9aaefae13b2f290dfbc85..efc3e494e18ddd784d6306bd7ef9ed8ed99ac2e7 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,11 @@
-vesion 2016.09.27
+version <unreleased>
+
+Extractors
+* [dctp] Fix extraction (#10734)
++ [leeco] Recognize more Le Sports URLs (#10794)
+
+
+version 2016.09.27
 
 Core
 + Add hdcore query parameter to akamai f4m formats
index 9789d86119f81a0f2d009a707c81565cd7bdfa55..b1b2effcab0af84ced8aabf926b7f8a4b76a6e1b 100644 (file)
@@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_strdate('25-09-2014'), '20140925')
         self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
         self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
+        self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
 
     def test_unified_timestamps(self):
         self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
         self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
+        self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
index 1cf3140a038cdcc27737c6ae018cb0db3eb9014b..72141b983d6ec1c62f58eff3920d80e77a9eee39 100644 (file)
@@ -283,12 +283,6 @@ def _real_main(argv=None):
             'key': 'ExecAfterDownload',
             'exec_cmd': opts.exec_cmd,
         })
-    if opts.xattr_set_filesize:
-        try:
-            import xattr
-            xattr  # Confuse flake8
-        except ImportError:
-            parser.error('setting filesize xattr requested but python-xattr is not available')
     external_downloader_args = None
     if opts.external_downloader_args:
         external_downloader_args = compat_shlex_split(opts.external_downloader_args)
index f8b69d186ac5ee93c8402f85bc66e7ed59570118..11294d106064414e2fe538b3c53327fb617b08f5 100644 (file)
@@ -13,6 +13,9 @@ from ..utils import (
     encodeFilename,
     sanitize_open,
     sanitized_Request,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
 )
 
 
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
 
                 if self.params.get('xattr_set_filesize', False) and data_len is not None:
                     try:
-                        import xattr
-                        xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
-                    except(OSError, IOError, ImportError) as err:
+                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+                    except (XAttrUnavailableError, XAttrMetadataError) as err:
                         self.report_error('unable to set filesize xattr: %s' % str(err))
 
             try:
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py
deleted file mode 100644 (file)
index 5766b4f..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
-        'info_dict': {
-            'id': '36015',
-            'ext': 'mp4',
-            'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
-            'description': 'Jupiters måne mest aktiv av alla himlakroppar',
-            'timestamp': 1394142732,
-            'upload_date': '20140306',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        # find internal video meta data
-        meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
-        player_config = self._parse_json(self._html_search_regex(
-            r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
-        internal_meta_id = player_config['aptomaVideoId']
-        internal_meta_url = meta_url % internal_meta_id
-        internal_meta_json = self._download_json(
-            internal_meta_url, video_id, 'Downloading video meta data')
-
-        # find internal video formats
-        format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
-        internal_video_id = internal_meta_json['videoId']
-        internal_formats_url = format_url % internal_video_id
-        internal_formats_json = self._download_json(
-            internal_formats_url, video_id, 'Downloading video formats')
-
-        formats = []
-        for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
-            p = fmt['paths'][0]
-            formats.append({
-                'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
-                'ext': 'mp4',
-                'width': int_or_none(fmt.get('width')),
-                'height': int_or_none(fmt.get('height')),
-                'tbr': int_or_none(fmt.get('bitrate')),
-                'protocol': 'http',
-            })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': internal_meta_json['title'],
-            'formats': formats,
-            'thumbnail': internal_meta_json.get('imageUrl'),
-            'description': internal_meta_json.get('shortPreamble'),
-            'timestamp': int_or_none(internal_meta_json.get('timePublished')),
-            'duration': int_or_none(internal_meta_json.get('duration')),
-            'view_count': int_or_none(internal_meta_json.get('views')),
-        }
index a47e0499346b978aeb9172c8353c18c94ecf867c..14ba88715887caeb9144e68384417b2e7b518b07 100644 (file)
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
 
 
 class DctpTvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
     _TEST = {
         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+        'md5': '174dd4a8a6225cf5655952f969cfbe24',
         'info_dict': {
-            'id': '1324',
+            'id': '95eaa4f33dad413aa17b4ee613cccc6c',
             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'flv',
-            'title': 'Videoinstallation für eine Kaufhausfassade'
+            'ext': 'mp4',
+            'title': 'Videoinstallation für eine Kaufhausfassade',
+            'description': 'Kurzfilm',
+            'upload_date': '20110407',
+            'thumbnail': 're:^https?://.*\.jpg$',
         },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
     }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
-        version_json = self._download_json(
-            base_url + 'version.json',
-            video_id, note='Determining file version')
-        version = version_json['version_name']
-        info_json = self._download_json(
-            '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
-            video_id, note='Fetching object ID')
-        object_id = compat_str(info_json['object_id'])
-        meta_json = self._download_json(
-            '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
-            video_id, note='Downloading metadata')
-        uuid = meta_json['uuid']
-        title = meta_json['title']
-        wide = meta_json['is_wide']
-        if wide:
-            ratio = '16x9'
-        else:
-            ratio = '4x3'
-        play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+        webpage = self._download_webpage(url, video_id)
+
+        object_id = self._html_search_meta('DC.identifier', webpage)
 
         servers_json = self._download_json(
-            'http://www.dctp.tv/streaming_servers/',
+            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
             video_id, note='Downloading server list')
-        url = servers_json[0]['endpoint']
+        server = servers_json[0]['server']
+        m3u8_path = self._search_regex(
+            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+        formats = self._extract_m3u8_formats(
+            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+            entry_protocol='m3u8_native')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('DC.description', webpage)
+        upload_date = unified_strdate(
+            self._html_search_meta('DC.date.created', webpage))
+        thumbnail = self._og_search_thumbnail(webpage)
 
         return {
             'id': object_id,
             'title': title,
-            'format': 'rtmp',
-            'url': url,
-            'play_path': play_path,
-            'rtmp_real_time': True,
-            'ext': 'flv',
-            'display_id': video_id
+            'formats': formats,
+            'display_id': video_id,
+            'description': description,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
         }
index 23fd2a3083dcafbd2ce17c0859b48578b470228d..09b3b49420307f3d9a2febf6be97bcff74a5af3e 100644 (file)
@@ -31,7 +31,6 @@ from .aenetworks import (
     HistoryTopicIE,
 )
 from .afreecatv import AfreecaTVIE
-from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
index 8f7f232bea720ce0cfbf3c8e6aa9b38bddb93658..196407b063a9393b94c759be6c8080de9a494277 100644 (file)
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
             'uploader': 'Naomi Leonor Phan-Quang',
             'like_count': int,
             'comment_count': int,
+            'comments': list,
         },
     }, {
         # missing description
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
             'uploader': 'Britney Spears',
             'like_count': int,
             'comment_count': int,
+            'comments': list,
         },
         'params': {
             'skip_download': True,
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
 
         (video_url, description, thumbnail, timestamp, uploader,
-         uploader_id, like_count, comment_count) = [None] * 8
+         uploader_id, like_count, comment_count, height, width) = [None] * 10
 
         shared_data = self._parse_json(
             self._search_regex(
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
                 shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
             if media:
                 video_url = media.get('video_url')
+                height = int_or_none(media.get('dimensions', {}).get('height'))
+                width = int_or_none(media.get('dimensions', {}).get('width'))
                 description = media.get('caption')
                 thumbnail = media.get('display_src')
                 timestamp = int_or_none(media.get('date'))
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
                 uploader_id = media.get('owner', {}).get('username')
                 like_count = int_or_none(media.get('likes', {}).get('count'))
                 comment_count = int_or_none(media.get('comments', {}).get('count'))
+                comments = [{
+                    'author': comment.get('user', {}).get('username'),
+                    'author_id': comment.get('user', {}).get('id'),
+                    'id': comment.get('id'),
+                    'text': comment.get('text'),
+                    'timestamp': int_or_none(comment.get('created_at')),
+                } for comment in media.get(
+                    'comments', {}).get('nodes', []) if comment.get('text')]
 
         if not video_url:
             video_url = self._og_search_video_url(webpage, secure=False)
 
+        formats = [{
+            'url': video_url,
+            'width': width,
+            'height': height,
+        }]
+
         if not uploader_id:
             uploader_id = self._search_regex(
                 r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
 
         return {
             'id': video_id,
-            'url': video_url,
+            'formats': formats,
             'ext': 'mp4',
             'title': 'Video by %s' % uploader_id,
             'description': description,
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
             'uploader': uploader,
             'like_count': like_count,
             'comment_count': comment_count,
+            'comments': comments,
         }
 
 
index aaf3f807a9217b2b3ce50f269dcdc6ebc3d29656..eb0a160089b395736a1370171ca7460e32f4e7e2 100644 (file)
@@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor):
     }, {
         'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
         'only_matching': True,
+    }, {
+        # mzsource, geo restricted to Belgium
+        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor):
 
         title = config['title']
 
-        formats = self._extract_m3u8_formats(
-            config['source']['hls'], video_id, 'mp4',
-            entry_protocol='m3u8_native', m3u8_id='hls')
+        formats = []
+        for source_key in ('', 'mz'):
+            source = config.get('%ssource' % source_key)
+            if not isinstance(source, dict):
+                continue
+            for format_id, format_url in source.items():
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id=format_id,
+                        fatal=False))
+                elif format_id == 'hds':
+                    formats.extend(self._extract_f4m_formats(
+                        format_url, video_id, f4m_id=format_id, fatal=False))
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                    })
         self._sort_formats(formats)
 
         return {
index e9cc9aa5983967861b08a2d9ee79297ae3a1726e..c48a5aad17ad36324b3cf70956d0ed234ffa522b 100644 (file)
@@ -29,7 +29,7 @@ from ..utils import (
 
 class LeIE(InfoExtractor):
     IE_DESC = '乐视网'
-    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
 
     _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
 
@@ -73,6 +73,12 @@ class LeIE(InfoExtractor):
     }, {
         'url': 'http://sports.le.com/video/25737697.html',
         'only_matching': True,
+    }, {
+        'url': 'http://www.lesports.com/match/1023203003.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://sports.le.com/match/1023203003.html',
+        'only_matching': True,
     }]
 
     # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
index 6752ffee23140b17389be127ae2a4e3c11ee5582..b7bfa7a6d524e4a5ebd190947b52a369a211e753 100644 (file)
@@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor):
                     format_id = 'rtmp'
                     if stream.get('videoBitRate'):
                         format_id += '-%d' % int_or_none(stream['videoBitRate'])
-                    http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
+                    http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
                     urls.append(http_url)
                     http_fmt = fmt.copy()
                     http_fmt.update({
index cb76a2a583912d120faa81bcbcb17fa136a95eeb..957cf1ea2666ace07087ffd7d9e94810e87fe1e8 100644 (file)
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
 
 class TVLandIE(MTVServicesInfoExtractor):
     IE_NAME = 'tvland.com'
-    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
     _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
     _TESTS = [{
         # Geo-restricted. Without a proxy metadata are still there. With a
@@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor):
             'upload_date': '20151228',
             'timestamp': 1451289600,
         },
+    }, {
+        'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+        'only_matching': True,
     }]
index 185756301c3a9b0afe440254d3d2051ca97730d7..3b38ac700296a2eef8c12f0b45406f54785d7684 100644 (file)
@@ -22,6 +22,7 @@ class VGTVIE(XstreamIE):
         'fvn.no/fvntv': 'fvntv',
         'aftenposten.no/webtv': 'aptv',
         'ap.vgtv.no/webtv': 'aptv',
+        'tv.aftonbladet.se/abtv': 'abtv',
     }
 
     _APP_NAME_TO_VENDOR = {
@@ -30,6 +31,7 @@ class VGTVIE(XstreamIE):
         'satv': 'sa',
         'fvntv': 'fvn',
         'aptv': 'ap',
+        'abtv': 'ab',
     }
 
     _VALID_URL = r'''(?x)
@@ -40,7 +42,8 @@ class VGTVIE(XstreamIE):
                     /?
                     (?:
                         \#!/(?:video|live)/|
-                        embed?.*id=
+                        embed?.*id=|
+                        articles/
                     )|
                     (?P<appname>
                         %s
@@ -135,6 +138,14 @@ class VGTVIE(XstreamIE):
             'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
             'only_matching': True,
         },
+        {
+            'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+            'only_matching': True,
+        },
+        {
+            'url': 'abtv:140026',
+            'only_matching': True,
+        }
     ]
 
     def _real_extract(self, url):
index f26e0732c2b0693456acec3e9fb2390b36016d97..58799d413715d93d31f959a815ca5b8a92d835ed 100644 (file)
@@ -20,7 +20,7 @@ from ..utils import (
     remove_start,
     str_to_int,
     unescapeHTML,
-    unified_strdate,
+    unified_timestamp,
     urlencode_postdata,
 )
 from .dailymotion import DailymotionIE
@@ -106,6 +106,7 @@ class VKIE(VKBaseIE):
                 'title': 'ProtivoGunz - Хуёвая песня',
                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                 'duration': 195,
+                'timestamp': 1329060660,
                 'upload_date': '20120212',
                 'view_count': int,
             },
@@ -119,6 +120,7 @@ class VKIE(VKBaseIE):
                 'uploader': 'Tom Cruise',
                 'title': 'No name',
                 'duration': 9,
+                'timestamp': 1374374880,
                 'upload_date': '20130721',
                 'view_count': int,
             }
@@ -195,6 +197,7 @@ class VKIE(VKBaseIE):
                 'upload_date': '20150709',
                 'view_count': int,
             },
+            'skip': 'Removed',
         },
         {
             # youtube embed
@@ -226,7 +229,7 @@ class VKIE(VKBaseIE):
             },
             'params': {
                 'skip_download': True,
-            }
+            },
         },
         {
             # video key is extra_data not url\d+
@@ -237,10 +240,30 @@ class VKIE(VKBaseIE):
                 'ext': 'mp4',
                 'title': 'S-Dance, репетиции к The way show',
                 'uploader': 'THE WAY SHOW | 17 апреля',
+                'timestamp': 1454870100,
                 'upload_date': '20160207',
                 'view_count': int,
             },
         },
+        {
+            # finished live stream, live_mp4
+            'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+            'md5': '90d22d051fccbbe9becfccc615be6791',
+            'info_dict': {
+                'id': '456242764',
+                'ext': 'mp4',
+                'title': 'ИгроМир 2016 — день 1',
+                'uploader': 'Игромания',
+                'duration': 5239,
+                'view_count': int,
+            },
+        },
+        {
+            # live stream, hls and rtmp links,most likely already finished live
+            # stream by the time you are reading this comment
+            'url': 'https://vk.com/video-140332_456239111',
+            'only_matching': True,
+        },
         {
             # removed video, just testing that we match the pattern
             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@@ -349,42 +372,51 @@ class VKIE(VKBaseIE):
         data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
         data = json.loads(data_json)
 
-        # Extract upload date
-        upload_date = None
-        mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
-        if mobj is not None:
-            mobj.group(1) + ' ' + mobj.group(2)
-            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
-
-        view_count = None
-        views = self._html_search_regex(
-            r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
-            info_page, 'view count', default=None)
-        if views:
-            view_count = str_to_int(self._search_regex(
-                r'([\d,.]+)', views, 'view count', fatal=False))
+        title = unescapeHTML(data['md_title'])
+
+        if data.get('live') == 2:
+            title = self._live_title(title)
+
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+            'upload date', fatal=False))
+
+        view_count = str_to_int(self._search_regex(
+            r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+            info_page, 'view count', fatal=False))
 
         formats = []
-        for k, v in data.items():
-            if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
+        for format_id, format_url in data.items():
+            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
                 continue
-            height = int_or_none(self._search_regex(
-                r'^(?:url|cache)(\d+)', k, 'height', default=None))
-            formats.append({
-                'format_id': k,
-                'url': v,
-                'height': height,
-            })
+            if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'):
+                height = int_or_none(self._search_regex(
+                    r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'height': height,
+                })
+            elif format_id == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id,
+                    fatal=False, live=True))
+            elif format_id == 'rtmp':
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'ext': 'flv',
+                })
         self._sort_formats(formats)
 
         return {
-            'id': compat_str(data['vid']),
+            'id': compat_str(data.get('vid') or video_id),
             'formats': formats,
-            'title': unescapeHTML(data['md_title']),
+            'title': title,
             'thumbnail': data.get('jpg'),
             'uploader': data.get('md_author'),
             'duration': data.get('duration'),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
             'view_count': view_count,
         }
 
index e39ca60aa08326b6f05814ff800bb09c75755e48..fbdfa02acc88ff8ba82684a2e5545aebe3fce5da 100644 (file)
@@ -1,37 +1,15 @@
 from __future__ import unicode_literals
 
-import os
-import subprocess
-import sys
-import errno
-
 from .common import PostProcessor
 from ..compat import compat_os_name
 from ..utils import (
-    check_executable,
     hyphenate_date,
-    version_tuple,
-    PostProcessingError,
-    encodeArgument,
-    encodeFilename,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
 )
 
 
-class XAttrMetadataError(PostProcessingError):
-    def __init__(self, code=None, msg='Unknown error'):
-        super(XAttrMetadataError, self).__init__(msg)
-        self.code = code
-
-        # Parsing code and msg
-        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
-                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
-            self.reason = 'NO_SPACE'
-        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
-            self.reason = 'VALUE_TOO_LONG'
-        else:
-            self.reason = 'NOT_SUPPORTED'
-
-
 class XAttrMetadataPP(PostProcessor):
 
     #
@@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor):
     def run(self, info):
         """ Set extended attributes on downloaded file (if xattr support is found). """
 
-        # This mess below finds the best xattr tool for the job and creates a
-        # "write_xattr" function.
-        try:
-            # try the pyxattr module...
-            import xattr
-
-            # Unicode arguments are not supported in python-pyxattr until
-            # version 0.5.0
-            # See https://github.com/rg3/youtube-dl/issues/5498
-            pyxattr_required_version = '0.5.0'
-            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
-                self._downloader.report_warning(
-                    'python-pyxattr is detected but is too old. '
-                    'youtube-dl requires %s or above while your version is %s. '
-                    'Falling back to other xattr implementations' % (
-                        pyxattr_required_version, xattr.__version__))
-
-                raise ImportError
-
-            def write_xattr(path, key, value):
-                try:
-                    xattr.set(path, key, value)
-                except EnvironmentError as e:
-                    raise XAttrMetadataError(e.errno, e.strerror)
-
-        except ImportError:
-            if compat_os_name == 'nt':
-                # Write xattrs to NTFS Alternate Data Streams:
-                # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
-                def write_xattr(path, key, value):
-                    assert ':' not in key
-                    assert os.path.exists(path)
-
-                    ads_fn = path + ':' + key
-                    try:
-                        with open(ads_fn, 'wb') as f:
-                            f.write(value)
-                    except EnvironmentError as e:
-                        raise XAttrMetadataError(e.errno, e.strerror)
-            else:
-                user_has_setfattr = check_executable('setfattr', ['--version'])
-                user_has_xattr = check_executable('xattr', ['-h'])
-
-                if user_has_setfattr or user_has_xattr:
-
-                    def write_xattr(path, key, value):
-                        value = value.decode('utf-8')
-                        if user_has_setfattr:
-                            executable = 'setfattr'
-                            opts = ['-n', key, '-v', value]
-                        elif user_has_xattr:
-                            executable = 'xattr'
-                            opts = ['-w', key, value]
-
-                        cmd = ([encodeFilename(executable, True)] +
-                               [encodeArgument(o) for o in opts] +
-                               [encodeFilename(path, True)])
-
-                        try:
-                            p = subprocess.Popen(
-                                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
-                        except EnvironmentError as e:
-                            raise XAttrMetadataError(e.errno, e.strerror)
-                        stdout, stderr = p.communicate()
-                        stderr = stderr.decode('utf-8', 'replace')
-                        if p.returncode != 0:
-                            raise XAttrMetadataError(p.returncode, stderr)
-
-                else:
-                    # On Unix, and can't find pyxattr, setfattr, or xattr.
-                    if sys.platform.startswith('linux'):
-                        self._downloader.report_error(
-                            "Couldn't find a tool to set the xattrs. "
-                            "Install either the python 'pyxattr' or 'xattr' "
-                            "modules, or the GNU 'attr' package "
-                            "(which contains the 'setfattr' tool).")
-                    else:
-                        self._downloader.report_error(
-                            "Couldn't find a tool to set the xattrs. "
-                            "Install either the python 'xattr' module, "
-                            "or the 'xattr' binary.")
-
         # Write the metadata to the file's xattrs
         self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
 
@@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor):
 
             return [], info
 
+        except XAttrUnavailableError as e:
+            self._downloader.report_error(str(e))
+            return [], info
+
         except XAttrMetadataError as e:
             if e.reason == 'NO_SPACE':
                 self._downloader.report_warning(
index 69ca88c8520fa20681832b04d43454bbbd9669d3..d2dfa80139e25babab7fef073dc4cfe670ce7c50 100644 (file)
@@ -42,6 +42,7 @@ from .compat import (
     compat_html_entities_html5,
     compat_http_client,
     compat_kwargs,
+    compat_os_name,
     compat_parse_qs,
     compat_shlex_quote,
     compat_socket_create_connection,
@@ -141,6 +142,8 @@ DATE_FORMATS = (
     '%Y-%m-%dT%H:%M:%S',
     '%Y-%m-%dT%H:%M:%S.%f',
     '%Y-%m-%dT%H:%M',
+    '%b %d %Y at %H:%M',
+    '%b %d %Y at %H:%M:%S',
 )
 
 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -775,6 +778,25 @@ class ContentTooShortError(Exception):
         self.expected = expected
 
 
+class XAttrMetadataError(Exception):
+    def __init__(self, code=None, msg='Unknown error'):
+        super(XAttrMetadataError, self).__init__(msg)
+        self.code = code
+
+        # Parsing code and msg
+        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+            self.reason = 'NO_SPACE'
+        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+            self.reason = 'VALUE_TOO_LONG'
+        else:
+            self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(Exception):
+    pass
+
+
 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
     # expected HTTP responses to meet HTTP/1.0 or later (see also
@@ -3131,3 +3153,82 @@ def decode_png(png_data):
             current_row.append(color)
 
     return width, height, pixels
+
+
+def write_xattr(path, key, value):
+    # This mess below finds the best xattr tool for the job
+    try:
+        # try the pyxattr module...
+        import xattr
+
+        # Unicode arguments are not supported in python-pyxattr until
+        # version 0.5.0
+        # See https://github.com/rg3/youtube-dl/issues/5498
+        pyxattr_required_version = '0.5.0'
+        if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+            # TODO: fallback to CLI tools
+            raise XAttrUnavailableError(
+                'python-pyxattr is detected but is too old. '
+                'youtube-dl requires %s or above while your version is %s. '
+                'Falling back to other xattr implementations' % (
+                    pyxattr_required_version, xattr.__version__))
+
+        try:
+            xattr.set(path, key, value)
+        except EnvironmentError as e:
+            raise XAttrMetadataError(e.errno, e.strerror)
+
+    except ImportError:
+        if compat_os_name == 'nt':
+            # Write xattrs to NTFS Alternate Data Streams:
+            # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+            assert ':' not in key
+            assert os.path.exists(path)
+
+            ads_fn = path + ':' + key
+            try:
+                with open(ads_fn, 'wb') as f:
+                    f.write(value)
+            except EnvironmentError as e:
+                raise XAttrMetadataError(e.errno, e.strerror)
+        else:
+            user_has_setfattr = check_executable('setfattr', ['--version'])
+            user_has_xattr = check_executable('xattr', ['-h'])
+
+            if user_has_setfattr or user_has_xattr:
+
+                value = value.decode('utf-8')
+                if user_has_setfattr:
+                    executable = 'setfattr'
+                    opts = ['-n', key, '-v', value]
+                elif user_has_xattr:
+                    executable = 'xattr'
+                    opts = ['-w', key, value]
+
+                cmd = ([encodeFilename(executable, True)] +
+                       [encodeArgument(o) for o in opts] +
+                       [encodeFilename(path, True)])
+
+                try:
+                    p = subprocess.Popen(
+                        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+                except EnvironmentError as e:
+                    raise XAttrMetadataError(e.errno, e.strerror)
+                stdout, stderr = p.communicate()
+                stderr = stderr.decode('utf-8', 'replace')
+                if p.returncode != 0:
+                    raise XAttrMetadataError(p.returncode, stderr)
+
+            else:
+                # On Unix, and can't find pyxattr, setfattr, or xattr.
+                if sys.platform.startswith('linux'):
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'pyxattr' or 'xattr' "
+                        "modules, or the GNU 'attr' package "
+                        "(which contains the 'setfattr' tool).")
+                else:
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'xattr' module, "
+                        "or the 'xattr' binary.")