Merge pull request #10817 from TRox1972/clubic_match_id

author Yen Chi Hsuan <yan12125@gmail.com>

Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)

committer GitHub <noreply@github.com>

Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
author Yen Chi Hsuan <yan12125@gmail.com>
Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
committer GitHub <noreply@github.com>
Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
diff --git a/ChangeLog b/ChangeLog

index f8149cc3098d2cbf45f9aaefae13b2f290dfbc85..efc3e494e18ddd784d6306bd7ef9ed8ed99ac2e7 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,11 @@
-vesion 2016.09.27
+version <unreleased>
+
+Extractors
+* [dctp] Fix extraction (#10734)
++ [leeco] Recognize more Le Sports URLs (#10794)
+
+
+version 2016.09.27
  
  Core
  + Add hdcore query parameter to akamai f4m formats
diff --git a/test/test_utils.py b/test/test_utils.py

index 9789d86119f81a0f2d009a707c81565cd7bdfa55..b1b2effcab0af84ced8aabf926b7f8a4b76a6e1b 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -292,6 +292,7 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(unified_strdate('25-09-2014'), '20140925')
          self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227')
          self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None)
+        self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207')
  
      def test_unified_timestamps(self):
          self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -312,6 +313,7 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
          self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
          self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
+        self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
  
      def test_determine_ext(self):
          self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py

index 1cf3140a038cdcc27737c6ae018cb0db3eb9014b..72141b983d6ec1c62f58eff3920d80e77a9eee39 100644 (file)
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -283,12 +283,6 @@ def _real_main(argv=None):
              'key': 'ExecAfterDownload',
              'exec_cmd': opts.exec_cmd,
          })
-    if opts.xattr_set_filesize:
-        try:
-            import xattr
-            xattr  # Confuse flake8
-        except ImportError:
-            parser.error('setting filesize xattr requested but python-xattr is not available')
      external_downloader_args = None
      if opts.external_downloader_args:
          external_downloader_args = compat_shlex_split(opts.external_downloader_args)
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py

index f8b69d186ac5ee93c8402f85bc66e7ed59570118..11294d106064414e2fe538b3c53327fb617b08f5 100644 (file)
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -13,6 +13,9 @@ from ..utils import (
      encodeFilename,
      sanitize_open,
      sanitized_Request,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
  )
  
  
@@ -179,9 +182,8 @@ class HttpFD(FileDownloader):
  
                  if self.params.get('xattr_set_filesize', False) and data_len is not None:
                      try:
-                        import xattr
-                        xattr.setxattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
-                    except(OSError, IOError, ImportError) as err:
+                        write_xattr(tmpfilename, 'user.ytdl.filesize', str(data_len))
+                    except (XAttrUnavailableError, XAttrMetadataError) as err:
                          self.report_error('unable to set filesize xattr: %s' % str(err))
  
              try:
diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py

deleted file mode 100644 (file)

index 5766b4f..0000000
--- a/youtube_dl/extractor/aftonbladet.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class AftonbladetIE(InfoExtractor):
-    _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
-    _TEST = {
-        'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
-        'info_dict': {
-            'id': '36015',
-            'ext': 'mp4',
-            'title': 'Vulkanutbrott i rymden - nu släpper NASA bilderna',
-            'description': 'Jupiters måne mest aktiv av alla himlakroppar',
-            'timestamp': 1394142732,
-            'upload_date': '20140306',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        # find internal video meta data
-        meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json'
-        player_config = self._parse_json(self._html_search_regex(
-            r'data-player-config="([^"]+)"', webpage, 'player config'), video_id)
-        internal_meta_id = player_config['aptomaVideoId']
-        internal_meta_url = meta_url % internal_meta_id
-        internal_meta_json = self._download_json(
-            internal_meta_url, video_id, 'Downloading video meta data')
-
-        # find internal video formats
-        format_url = 'http://aftonbladet-play.videodata.drvideo.aptoma.no/actions/video/?id=%s'
-        internal_video_id = internal_meta_json['videoId']
-        internal_formats_url = format_url % internal_video_id
-        internal_formats_json = self._download_json(
-            internal_formats_url, video_id, 'Downloading video formats')
-
-        formats = []
-        for fmt in internal_formats_json['formats']['http']['pseudostreaming']['mp4']:
-            p = fmt['paths'][0]
-            formats.append({
-                'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
-                'ext': 'mp4',
-                'width': int_or_none(fmt.get('width')),
-                'height': int_or_none(fmt.get('height')),
-                'tbr': int_or_none(fmt.get('bitrate')),
-                'protocol': 'http',
-            })
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': internal_meta_json['title'],
-            'formats': formats,
-            'thumbnail': internal_meta_json.get('imageUrl'),
-            'description': internal_meta_json.get('shortPreamble'),
-            'timestamp': int_or_none(internal_meta_json.get('timePublished')),
-            'duration': int_or_none(internal_meta_json.get('duration')),
-            'view_count': int_or_none(internal_meta_json.get('views')),
-        }
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py

index a47e0499346b978aeb9172c8353c18c94ecf867c..14ba88715887caeb9144e68384417b2e7b518b07 100644 (file)
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -1,61 +1,54 @@
-# encoding: utf-8
+# coding: utf-8
  from __future__ import unicode_literals
  
  from .common import InfoExtractor
-from ..compat import compat_str
+from ..utils import unified_strdate
  
  
  class DctpTvIE(InfoExtractor):
      _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P<id>.+?)/$'
      _TEST = {
          'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+        'md5': '174dd4a8a6225cf5655952f969cfbe24',
          'info_dict': {
-            'id': '1324',
+            'id': '95eaa4f33dad413aa17b4ee613cccc6c',
              'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
-            'ext': 'flv',
-            'title': 'Videoinstallation für eine Kaufhausfassade'
+            'ext': 'mp4',
+            'title': 'Videoinstallation für eine Kaufhausfassade',
+            'description': 'Kurzfilm',
+            'upload_date': '20110407',
+            'thumbnail': 're:^https?://.*\.jpg$',
          },
-        'params': {
-            # rtmp download
-            'skip_download': True,
-        }
      }
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
-        base_url = 'http://dctp-ivms2-restapi.s3.amazonaws.com/'
-        version_json = self._download_json(
-            base_url + 'version.json',
-            video_id, note='Determining file version')
-        version = version_json['version_name']
-        info_json = self._download_json(
-            '{0}{1}/restapi/slugs/{2}.json'.format(base_url, version, video_id),
-            video_id, note='Fetching object ID')
-        object_id = compat_str(info_json['object_id'])
-        meta_json = self._download_json(
-            '{0}{1}/restapi/media/{2}.json'.format(base_url, version, object_id),
-            video_id, note='Downloading metadata')
-        uuid = meta_json['uuid']
-        title = meta_json['title']
-        wide = meta_json['is_wide']
-        if wide:
-            ratio = '16x9'
-        else:
-            ratio = '4x3'
-        play_path = 'mp4:{0}_dctp_0500_{1}.m4v'.format(uuid, ratio)
+        webpage = self._download_webpage(url, video_id)
+
+        object_id = self._html_search_meta('DC.identifier', webpage)
  
          servers_json = self._download_json(
-            'http://www.dctp.tv/streaming_servers/',
+            'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
              video_id, note='Downloading server list')
-        url = servers_json[0]['endpoint']
+        server = servers_json[0]['server']
+        m3u8_path = self._search_regex(
+            r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
+        formats = self._extract_m3u8_formats(
+            'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
+            entry_protocol='m3u8_native')
+
+        title = self._og_search_title(webpage)
+        description = self._html_search_meta('DC.description', webpage)
+        upload_date = unified_strdate(
+            self._html_search_meta('DC.date.created', webpage))
+        thumbnail = self._og_search_thumbnail(webpage)
  
          return {
              'id': object_id,
              'title': title,
-            'format': 'rtmp',
-            'url': url,
-            'play_path': play_path,
-            'rtmp_real_time': True,
-            'ext': 'flv',
-            'display_id': video_id
+            'formats': formats,
+            'display_id': video_id,
+            'description': description,
+            'upload_date': upload_date,
+            'thumbnail': thumbnail,
          }
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py

index 23fd2a3083dcafbd2ce17c0859b48578b470228d..09b3b49420307f3d9a2febf6be97bcff74a5af3e 100644 (file)
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -31,7 +31,6 @@ from .aenetworks import (
      HistoryTopicIE,
  )
  from .afreecatv import AfreecaTVIE
-from .aftonbladet import AftonbladetIE
  from .airmozilla import AirMozillaIE
  from .aljazeera import AlJazeeraIE
  from .alphaporno import AlphaPornoIE
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py

index 8f7f232bea720ce0cfbf3c8e6aa9b38bddb93658..196407b063a9393b94c759be6c8080de9a494277 100644 (file)
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -29,6 +29,7 @@ class InstagramIE(InfoExtractor):
              'uploader': 'Naomi Leonor Phan-Quang',
              'like_count': int,
              'comment_count': int,
+            'comments': list,
          },
      }, {
          # missing description
@@ -44,6 +45,7 @@ class InstagramIE(InfoExtractor):
              'uploader': 'Britney Spears',
              'like_count': int,
              'comment_count': int,
+            'comments': list,
          },
          'params': {
              'skip_download': True,
@@ -82,7 +84,7 @@ class InstagramIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
  
          (video_url, description, thumbnail, timestamp, uploader,
-         uploader_id, like_count, comment_count) = [None] * 8
+         uploader_id, like_count, comment_count, height, width) = [None] * 10
  
          shared_data = self._parse_json(
              self._search_regex(
@@ -94,6 +96,8 @@ class InstagramIE(InfoExtractor):
                  shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
              if media:
                  video_url = media.get('video_url')
+                height = int_or_none(media.get('dimensions', {}).get('height'))
+                width = int_or_none(media.get('dimensions', {}).get('width'))
                  description = media.get('caption')
                  thumbnail = media.get('display_src')
                  timestamp = int_or_none(media.get('date'))
@@ -101,10 +105,24 @@ class InstagramIE(InfoExtractor):
                  uploader_id = media.get('owner', {}).get('username')
                  like_count = int_or_none(media.get('likes', {}).get('count'))
                  comment_count = int_or_none(media.get('comments', {}).get('count'))
+                comments = [{
+                    'author': comment.get('user', {}).get('username'),
+                    'author_id': comment.get('user', {}).get('id'),
+                    'id': comment.get('id'),
+                    'text': comment.get('text'),
+                    'timestamp': int_or_none(comment.get('created_at')),
+                } for comment in media.get(
+                    'comments', {}).get('nodes', []) if comment.get('text')]
  
          if not video_url:
              video_url = self._og_search_video_url(webpage, secure=False)
  
+        formats = [{
+            'url': video_url,
+            'width': width,
+            'height': height,
+        }]
+
          if not uploader_id:
              uploader_id = self._search_regex(
                  r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"',
@@ -121,7 +139,7 @@ class InstagramIE(InfoExtractor):
  
          return {
              'id': video_id,
-            'url': video_url,
+            'formats': formats,
              'ext': 'mp4',
              'title': 'Video by %s' % uploader_id,
              'description': description,
@@ -131,6 +149,7 @@ class InstagramIE(InfoExtractor):
              'uploader': uploader,
              'like_count': like_count,
              'comment_count': comment_count,
+            'comments': comments,
          }
  
  
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py

index aaf3f807a9217b2b3ce50f269dcdc6ebc3d29656..eb0a160089b395736a1370171ca7460e32f4e7e2 100644 (file)
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dl/extractor/ketnet.py
@@ -21,6 +21,10 @@ class KetnetIE(InfoExtractor):
      }, {
          'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
          'only_matching': True,
+    }, {
+        # mzsource, geo restricted to Belgium
+        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -36,9 +40,25 @@ class KetnetIE(InfoExtractor):
  
          title = config['title']
  
-        formats = self._extract_m3u8_formats(
-            config['source']['hls'], video_id, 'mp4',
-            entry_protocol='m3u8_native', m3u8_id='hls')
+        formats = []
+        for source_key in ('', 'mz'):
+            source = config.get('%ssource' % source_key)
+            if not isinstance(source, dict):
+                continue
+            for format_id, format_url in source.items():
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, video_id, 'mp4',
+                        entry_protocol='m3u8_native', m3u8_id=format_id,
+                        fatal=False))
+                elif format_id == 'hds':
+                    formats.extend(self._extract_f4m_formats(
+                        format_url, video_id, f4m_id=format_id, fatal=False))
+                else:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                    })
          self._sort_formats(formats)
  
          return {
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py

index e9cc9aa5983967861b08a2d9ee79297ae3a1726e..c48a5aad17ad36324b3cf70956d0ed234ffa522b 100644 (file)
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -29,7 +29,7 @@ from ..utils import (
  
  class LeIE(InfoExtractor):
      IE_DESC = '乐视网'
-    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|sports\.le\.com/video)/(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.le\.com/ptv/vplay|(?:sports\.le|(?:www\.)?lesports)\.com/(?:match|video))/(?P<id>\d+)\.html'
  
      _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
  
@@ -73,6 +73,12 @@ class LeIE(InfoExtractor):
      }, {
          'url': 'http://sports.le.com/video/25737697.html',
          'only_matching': True,
+    }, {
+        'url': 'http://www.lesports.com/match/1023203003.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://sports.le.com/match/1023203003.html',
+        'only_matching': True,
      }]
  
      # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py

index 6752ffee23140b17389be127ae2a4e3c11ee5582..b7bfa7a6d524e4a5ebd190947b52a369a211e753 100644 (file)
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -59,7 +59,7 @@ class LimelightBaseIE(InfoExtractor):
                      format_id = 'rtmp'
                      if stream.get('videoBitRate'):
                          format_id += '-%d' % int_or_none(stream['videoBitRate'])
-                    http_url = 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:])
+                    http_url = 'http://cpl.delvenetworks.com/' + rtmp.group('playpath')[4:]
                      urls.append(http_url)
                      http_fmt = fmt.copy()
                      http_fmt.update({
diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py

index cb76a2a583912d120faa81bcbcb17fa136a95eeb..957cf1ea2666ace07087ffd7d9e94810e87fe1e8 100644 (file)
--- a/youtube_dl/extractor/tvland.py
+++ b/youtube_dl/extractor/tvland.py
@@ -6,7 +6,7 @@ from .mtv import MTVServicesInfoExtractor
  
  class TVLandIE(MTVServicesInfoExtractor):
      IE_NAME = 'tvland.com'
-    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)'
+    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
      _FEED_URL = 'http://www.tvland.com/feeds/mrss/'
      _TESTS = [{
          # Geo-restricted. Without a proxy metadata are still there. With a
@@ -28,4 +28,7 @@ class TVLandIE(MTVServicesInfoExtractor):
              'upload_date': '20151228',
              'timestamp': 1451289600,
          },
+    }, {
+        'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
+        'only_matching': True,
      }]
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py

index 185756301c3a9b0afe440254d3d2051ca97730d7..3b38ac700296a2eef8c12f0b45406f54785d7684 100644 (file)
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -22,6 +22,7 @@ class VGTVIE(XstreamIE):
          'fvn.no/fvntv': 'fvntv',
          'aftenposten.no/webtv': 'aptv',
          'ap.vgtv.no/webtv': 'aptv',
+        'tv.aftonbladet.se/abtv': 'abtv',
      }
  
      _APP_NAME_TO_VENDOR = {
@@ -30,6 +31,7 @@ class VGTVIE(XstreamIE):
          'satv': 'sa',
          'fvntv': 'fvn',
          'aptv': 'ap',
+        'abtv': 'ab',
      }
  
      _VALID_URL = r'''(?x)
@@ -40,7 +42,8 @@ class VGTVIE(XstreamIE):
                      /?
                      (?:
                          \#!/(?:video|live)/|
-                        embed?.*id=
+                        embed?.*id=|
+                        articles/
                      )|
                      (?P<appname>
                          %s
@@ -135,6 +138,14 @@ class VGTVIE(XstreamIE):
              'url': 'http://www.vgtv.no/#!/video/127205/inside-the-mind-of-favela-funk',
              'only_matching': True,
          },
+        {
+            'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
+            'only_matching': True,
+        },
+        {
+            'url': 'abtv:140026',
+            'only_matching': True,
+        }
      ]
  
      def _real_extract(self, url):
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index f26e0732c2b0693456acec3e9fb2390b36016d97..58799d413715d93d31f959a815ca5b8a92d835ed 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -20,7 +20,7 @@ from ..utils import (
      remove_start,
      str_to_int,
      unescapeHTML,
-    unified_strdate,
+    unified_timestamp,
      urlencode_postdata,
  )
  from .dailymotion import DailymotionIE
@@ -106,6 +106,7 @@ class VKIE(VKBaseIE):
                  'title': 'ProtivoGunz - Хуёвая песня',
                  'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
                  'duration': 195,
+                'timestamp': 1329060660,
                  'upload_date': '20120212',
                  'view_count': int,
              },
@@ -119,6 +120,7 @@ class VKIE(VKBaseIE):
                  'uploader': 'Tom Cruise',
                  'title': 'No name',
                  'duration': 9,
+                'timestamp': 1374374880,
                  'upload_date': '20130721',
                  'view_count': int,
              }
@@ -195,6 +197,7 @@ class VKIE(VKBaseIE):
                  'upload_date': '20150709',
                  'view_count': int,
              },
+            'skip': 'Removed',
          },
          {
              # youtube embed
@@ -226,7 +229,7 @@ class VKIE(VKBaseIE):
              },
              'params': {
                  'skip_download': True,
-            }
+            },
          },
          {
              # video key is extra_data not url\d+
@@ -237,10 +240,30 @@ class VKIE(VKBaseIE):
                  'ext': 'mp4',
                  'title': 'S-Dance, репетиции к The way show',
                  'uploader': 'THE WAY SHOW | 17 апреля',
+                'timestamp': 1454870100,
                  'upload_date': '20160207',
                  'view_count': int,
              },
          },
+        {
+            # finished live stream, live_mp4
+            'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+            'md5': '90d22d051fccbbe9becfccc615be6791',
+            'info_dict': {
+                'id': '456242764',
+                'ext': 'mp4',
+                'title': 'ИгроМир 2016 — день 1',
+                'uploader': 'Игромания',
+                'duration': 5239,
+                'view_count': int,
+            },
+        },
+        {
+            # live stream, hls and rtmp links,most likely already finished live
+            # stream by the time you are reading this comment
+            'url': 'https://vk.com/video-140332_456239111',
+            'only_matching': True,
+        },
          {
              # removed video, just testing that we match the pattern
              'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
@@ -349,42 +372,51 @@ class VKIE(VKBaseIE):
          data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars')
          data = json.loads(data_json)
  
-        # Extract upload date
-        upload_date = None
-        mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page)
-        if mobj is not None:
-            mobj.group(1) + ' ' + mobj.group(2)
-            upload_date = unified_strdate(mobj.group(1) + ' ' + mobj.group(2))
-
-        view_count = None
-        views = self._html_search_regex(
-            r'"mv_views_count_number"[^>]*>(.+?\bviews?)<',
-            info_page, 'view count', default=None)
-        if views:
-            view_count = str_to_int(self._search_regex(
-                r'([\d,.]+)', views, 'view count', fatal=False))
+        title = unescapeHTML(data['md_title'])
+
+        if data.get('live') == 2:
+            title = self._live_title(title)
+
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+            'upload date', fatal=False))
+
+        view_count = str_to_int(self._search_regex(
+            r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+            info_page, 'view count', fatal=False))
  
          formats = []
-        for k, v in data.items():
-            if not k.startswith('url') and not k.startswith('cache') and k != 'extra_data' or not v:
+        for format_id, format_url in data.items():
+            if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
                  continue
-            height = int_or_none(self._search_regex(
-                r'^(?:url|cache)(\d+)', k, 'height', default=None))
-            formats.append({
-                'format_id': k,
-                'url': v,
-                'height': height,
-            })
+            if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'):
+                height = int_or_none(self._search_regex(
+                    r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'height': height,
+                })
+            elif format_id == 'hls':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', m3u8_id=format_id,
+                    fatal=False, live=True))
+            elif format_id == 'rtmp':
+                formats.append({
+                    'format_id': format_id,
+                    'url': format_url,
+                    'ext': 'flv',
+                })
          self._sort_formats(formats)
  
          return {
-            'id': compat_str(data['vid']),
+            'id': compat_str(data.get('vid') or video_id),
              'formats': formats,
-            'title': unescapeHTML(data['md_title']),
+            'title': title,
              'thumbnail': data.get('jpg'),
              'uploader': data.get('md_author'),
              'duration': data.get('duration'),
-            'upload_date': upload_date,
+            'timestamp': timestamp,
              'view_count': view_count,
          }
  
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py

index e39ca60aa08326b6f05814ff800bb09c75755e48..fbdfa02acc88ff8ba82684a2e5545aebe3fce5da 100644 (file)
--- a/youtube_dl/postprocessor/xattrpp.py
+++ b/youtube_dl/postprocessor/xattrpp.py
@@ -1,37 +1,15 @@
  from __future__ import unicode_literals
  
-import os
-import subprocess
-import sys
-import errno
-
  from .common import PostProcessor
  from ..compat import compat_os_name
  from ..utils import (
-    check_executable,
      hyphenate_date,
-    version_tuple,
-    PostProcessingError,
-    encodeArgument,
-    encodeFilename,
+    write_xattr,
+    XAttrMetadataError,
+    XAttrUnavailableError,
  )
  
  
-class XAttrMetadataError(PostProcessingError):
-    def __init__(self, code=None, msg='Unknown error'):
-        super(XAttrMetadataError, self).__init__(msg)
-        self.code = code
-
-        # Parsing code and msg
-        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
-                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
-            self.reason = 'NO_SPACE'
-        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
-            self.reason = 'VALUE_TOO_LONG'
-        else:
-            self.reason = 'NOT_SUPPORTED'
-
-
  class XAttrMetadataPP(PostProcessor):
  
      #
@@ -48,88 +26,6 @@ class XAttrMetadataPP(PostProcessor):
      def run(self, info):
          """ Set extended attributes on downloaded file (if xattr support is found). """
  
-        # This mess below finds the best xattr tool for the job and creates a
-        # "write_xattr" function.
-        try:
-            # try the pyxattr module...
-            import xattr
-
-            # Unicode arguments are not supported in python-pyxattr until
-            # version 0.5.0
-            # See https://github.com/rg3/youtube-dl/issues/5498
-            pyxattr_required_version = '0.5.0'
-            if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
-                self._downloader.report_warning(
-                    'python-pyxattr is detected but is too old. '
-                    'youtube-dl requires %s or above while your version is %s. '
-                    'Falling back to other xattr implementations' % (
-                        pyxattr_required_version, xattr.__version__))
-
-                raise ImportError
-
-            def write_xattr(path, key, value):
-                try:
-                    xattr.set(path, key, value)
-                except EnvironmentError as e:
-                    raise XAttrMetadataError(e.errno, e.strerror)
-
-        except ImportError:
-            if compat_os_name == 'nt':
-                # Write xattrs to NTFS Alternate Data Streams:
-                # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
-                def write_xattr(path, key, value):
-                    assert ':' not in key
-                    assert os.path.exists(path)
-
-                    ads_fn = path + ':' + key
-                    try:
-                        with open(ads_fn, 'wb') as f:
-                            f.write(value)
-                    except EnvironmentError as e:
-                        raise XAttrMetadataError(e.errno, e.strerror)
-            else:
-                user_has_setfattr = check_executable('setfattr', ['--version'])
-                user_has_xattr = check_executable('xattr', ['-h'])
-
-                if user_has_setfattr or user_has_xattr:
-
-                    def write_xattr(path, key, value):
-                        value = value.decode('utf-8')
-                        if user_has_setfattr:
-                            executable = 'setfattr'
-                            opts = ['-n', key, '-v', value]
-                        elif user_has_xattr:
-                            executable = 'xattr'
-                            opts = ['-w', key, value]
-
-                        cmd = ([encodeFilename(executable, True)] +
-                               [encodeArgument(o) for o in opts] +
-                               [encodeFilename(path, True)])
-
-                        try:
-                            p = subprocess.Popen(
-                                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
-                        except EnvironmentError as e:
-                            raise XAttrMetadataError(e.errno, e.strerror)
-                        stdout, stderr = p.communicate()
-                        stderr = stderr.decode('utf-8', 'replace')
-                        if p.returncode != 0:
-                            raise XAttrMetadataError(p.returncode, stderr)
-
-                else:
-                    # On Unix, and can't find pyxattr, setfattr, or xattr.
-                    if sys.platform.startswith('linux'):
-                        self._downloader.report_error(
-                            "Couldn't find a tool to set the xattrs. "
-                            "Install either the python 'pyxattr' or 'xattr' "
-                            "modules, or the GNU 'attr' package "
-                            "(which contains the 'setfattr' tool).")
-                    else:
-                        self._downloader.report_error(
-                            "Couldn't find a tool to set the xattrs. "
-                            "Install either the python 'xattr' module, "
-                            "or the 'xattr' binary.")
-
          # Write the metadata to the file's xattrs
          self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs')
  
@@ -159,6 +55,10 @@ class XAttrMetadataPP(PostProcessor):
  
              return [], info
  
+        except XAttrUnavailableError as e:
+            self._downloader.report_error(str(e))
+            return [], info
+
          except XAttrMetadataError as e:
              if e.reason == 'NO_SPACE':
                  self._downloader.report_warning(
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 69ca88c8520fa20681832b04d43454bbbd9669d3..d2dfa80139e25babab7fef073dc4cfe670ce7c50 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,6 +42,7 @@ from .compat import (
      compat_html_entities_html5,
      compat_http_client,
      compat_kwargs,
+    compat_os_name,
      compat_parse_qs,
      compat_shlex_quote,
      compat_socket_create_connection,
@@ -141,6 +142,8 @@ DATE_FORMATS = (
      '%Y-%m-%dT%H:%M:%S',
      '%Y-%m-%dT%H:%M:%S.%f',
      '%Y-%m-%dT%H:%M',
+    '%b %d %Y at %H:%M',
+    '%b %d %Y at %H:%M:%S',
  )
  
  DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
@@ -775,6 +778,25 @@ class ContentTooShortError(Exception):
          self.expected = expected
  
  
+class XAttrMetadataError(Exception):
+    def __init__(self, code=None, msg='Unknown error'):
+        super(XAttrMetadataError, self).__init__(msg)
+        self.code = code
+
+        # Parsing code and msg
+        if (self.code in (errno.ENOSPC, errno.EDQUOT) or
+                'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+            self.reason = 'NO_SPACE'
+        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+            self.reason = 'VALUE_TOO_LONG'
+        else:
+            self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(Exception):
+    pass
+
+
  def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
      # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
      # expected HTTP responses to meet HTTP/1.0 or later (see also
@@ -3131,3 +3153,82 @@ def decode_png(png_data):
              current_row.append(color)
  
      return width, height, pixels
+
+
+def write_xattr(path, key, value):
+    # This mess below finds the best xattr tool for the job
+    try:
+        # try the pyxattr module...
+        import xattr
+
+        # Unicode arguments are not supported in python-pyxattr until
+        # version 0.5.0
+        # See https://github.com/rg3/youtube-dl/issues/5498
+        pyxattr_required_version = '0.5.0'
+        if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+            # TODO: fallback to CLI tools
+            raise XAttrUnavailableError(
+                'python-pyxattr is detected but is too old. '
+                'youtube-dl requires %s or above while your version is %s. '
+                'Falling back to other xattr implementations' % (
+                    pyxattr_required_version, xattr.__version__))
+
+        try:
+            xattr.set(path, key, value)
+        except EnvironmentError as e:
+            raise XAttrMetadataError(e.errno, e.strerror)
+
+    except ImportError:
+        if compat_os_name == 'nt':
+            # Write xattrs to NTFS Alternate Data Streams:
+            # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+            assert ':' not in key
+            assert os.path.exists(path)
+
+            ads_fn = path + ':' + key
+            try:
+                with open(ads_fn, 'wb') as f:
+                    f.write(value)
+            except EnvironmentError as e:
+                raise XAttrMetadataError(e.errno, e.strerror)
+        else:
+            user_has_setfattr = check_executable('setfattr', ['--version'])
+            user_has_xattr = check_executable('xattr', ['-h'])
+
+            if user_has_setfattr or user_has_xattr:
+
+                value = value.decode('utf-8')
+                if user_has_setfattr:
+                    executable = 'setfattr'
+                    opts = ['-n', key, '-v', value]
+                elif user_has_xattr:
+                    executable = 'xattr'
+                    opts = ['-w', key, value]
+
+                cmd = ([encodeFilename(executable, True)] +
+                       [encodeArgument(o) for o in opts] +
+                       [encodeFilename(path, True)])
+
+                try:
+                    p = subprocess.Popen(
+                        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+                except EnvironmentError as e:
+                    raise XAttrMetadataError(e.errno, e.strerror)
+                stdout, stderr = p.communicate()
+                stderr = stderr.decode('utf-8', 'replace')
+                if p.returncode != 0:
+                    raise XAttrMetadataError(p.returncode, stderr)
+
+            else:
+                # On Unix, and can't find pyxattr, setfattr, or xattr.
+                if sys.platform.startswith('linux'):
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'pyxattr' or 'xattr' "
+                        "modules, or the GNU 'attr' package "
+                        "(which contains the 'setfattr' tool).")
+                else:
+                    raise XAttrUnavailableError(
+                        "Couldn't find a tool to set the xattrs. "
+                        "Install either the python 'xattr' module, "
+                        "or the 'xattr' binary.")
author	Yen Chi Hsuan <yan12125@gmail.com>
	Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
committer	GitHub <noreply@github.com>
	Sat, 1 Oct 2016 08:20:12 +0000 (16:20 +0800)
ChangeLog		patch \| blob \| history
test/test_utils.py		patch \| blob \| history
youtube_dl/__init__.py		patch \| blob \| history
youtube_dl/downloader/http.py		patch \| blob \| history
youtube_dl/extractor/aftonbladet.py	[deleted file]	patch \| blob \| history
youtube_dl/extractor/dctp.py		patch \| blob \| history
youtube_dl/extractor/extractors.py		patch \| blob \| history
youtube_dl/extractor/instagram.py		patch \| blob \| history
youtube_dl/extractor/ketnet.py		patch \| blob \| history
youtube_dl/extractor/leeco.py		patch \| blob \| history
youtube_dl/extractor/limelight.py		patch \| blob \| history
youtube_dl/extractor/tvland.py		patch \| blob \| history
youtube_dl/extractor/vgtv.py		patch \| blob \| history
youtube_dl/extractor/vk.py		patch \| blob \| history
youtube_dl/postprocessor/xattrpp.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history