Merge pull request #7296 from jaimeMF/xml_attrib_unicode

author Sergey M <dstftw@gmail.com>

Sat, 31 Oct 2015 18:15:21 +0000 (18:15 +0000)

committer Sergey M <dstftw@gmail.com>

Sat, 31 Oct 2015 18:15:21 +0000 (18:15 +0000)
author Sergey M <dstftw@gmail.com>
Sat, 31 Oct 2015 18:15:21 +0000 (18:15 +0000)
committer Sergey M <dstftw@gmail.com>
Sat, 31 Oct 2015 18:15:21 +0000 (18:15 +0000)
diff --git a/test/test_utils.py b/test/test_utils.py

index a9e0fed7e90287270d2909315efdcf33fdd234ae..3298315d2458903da67187b2d2bddeccd5e125a7 100644 (file)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -278,9 +278,16 @@ class TestUtil(unittest.TestCase):
          p = xml.etree.ElementTree.SubElement(div, 'p')
          p.text = 'Foo'
          self.assertEqual(xpath_element(doc, 'div/p'), p)
+        self.assertEqual(xpath_element(doc, ['div/p']), p)
+        self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p)
          self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default')
+        self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default')
          self.assertTrue(xpath_element(doc, 'div/bar') is None)
+        self.assertTrue(xpath_element(doc, ['div/bar']) is None)
+        self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None)
          self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True)
+        self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True)
+        self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True)
  
      def test_xpath_text(self):
          testxml = '''<root>
@@ -428,6 +435,8 @@ class TestUtil(unittest.TestCase):
          self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
          self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
          self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
+        self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251)
+        self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None)
  
      def test_strip_jsonp(self):
          stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);')
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py

index 6318ac4a26d0c0d5afd024a0fae9ff190838b054..f98e6487e467f63c5476bd7682723e8f6770773b 100644 (file)
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -90,6 +90,7 @@ from .cliphunter import CliphunterIE
  from .clipsyndicate import ClipsyndicateIE
  from .cloudy import CloudyIE
  from .clubic import ClubicIE
+from .clyp import ClypIE
  from .cmt import CMTIE
  from .cnet import CNETIE
  from .cnn import (
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py

index 31f0d417ce420de69f9e06ac0498242dc913bf73..23f942ae2d039cf92c9a67934a07d3062f744b0d 100644 (file)
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@@ -26,8 +26,8 @@ class AnitubeIE(InfoExtractor):
          video_id = mobj.group('id')
  
          webpage = self._download_webpage(url, video_id)
-        key = self._html_search_regex(
-            r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key')
+        key = self._search_regex(
+            r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
  
          config_xml = self._download_xml(
              'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key)
diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py

new file mode 100644 (file)

index 0000000..57e6437
--- /dev/null
+++ b/youtube_dl/extractor/clyp.py
@@ -0,0 +1,57 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    float_or_none,
+    parse_iso8601,
+)
+
+
+class ClypIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
+    _TEST = {
+        'url': 'https://clyp.it/ojz2wfah',
+        'md5': '1d4961036c41247ecfdcc439c0cddcbb',
+        'info_dict': {
+            'id': 'ojz2wfah',
+            'ext': 'mp3',
+            'title': 'Krisson80 - bits wip wip',
+            'description': '#Krisson80BitsWipWip #chiptune\n#wip',
+            'duration': 263.21,
+            'timestamp': 1443515251,
+            'upload_date': '20150929',
+        },
+    }
+
+    def _real_extract(self, url):
+        audio_id = self._match_id(url)
+
+        metadata = self._download_json(
+            'https://api.clyp.it/%s' % audio_id, audio_id)
+
+        formats = []
+        for secure in ('', 'Secure'):
+            for ext in ('Ogg', 'Mp3'):
+                format_id = '%s%s' % (secure, ext)
+                format_url = metadata.get('%sUrl' % format_id)
+                if format_url:
+                    formats.append({
+                        'url': format_url,
+                        'format_id': format_id,
+                        'vcodec': 'none',
+                    })
+        self._sort_formats(formats)
+
+        title = metadata['Title']
+        description = metadata.get('Description')
+        duration = float_or_none(metadata.get('Duration'))
+        timestamp = parse_iso8601(metadata.get('DateCreated'))
+
+        return {
+            'id': audio_id,
+            'title': title,
+            'description': description,
+            'duration': duration,
+            'timestamp': timestamp,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 52523d7b249210eed88d334c2fefe28cabe78706..5e263f8b5a2cf46fbb26e928f5df85c87c42dfde 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -310,11 +310,11 @@ class InfoExtractor(object):
      @classmethod
      def ie_key(cls):
          """A string for getting the InfoExtractor with get_info_extractor"""
-        return cls.__name__[:-2]
+        return compat_str(cls.__name__[:-2])
  
      @property
      def IE_NAME(self):
-        return type(self).__name__[:-2]
+        return compat_str(type(self).__name__[:-2])
  
      def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
          """ Returns the response handle """
@@ -943,13 +943,14 @@ class InfoExtractor(object):
              if re.match(r'^https?://', u)
              else compat_urlparse.urljoin(m3u8_url, u))
  
-        m3u8_doc, urlh = self._download_webpage_handle(
+        res = self._download_webpage_handle(
              m3u8_url, video_id,
              note=note or 'Downloading m3u8 information',
              errnote=errnote or 'Failed to download m3u8 information',
              fatal=fatal)
-        if m3u8_doc is False:
-            return m3u8_doc
+        if res is False:
+            return res
+        m3u8_doc, urlh = res
          m3u8_url = urlh.geturl()
          last_info = None
          last_media = None
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py

index 129984a5fb1ea793d49937dd251d460b509c60c7..8e60cf60f7f7be74c751c037bcae31fb0e1f6510 100644 (file)
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -83,6 +83,14 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
          if subtitle:
              title += ' - %s' % subtitle
  
+        subtitles = {}
+        subtitles_list = [{
+            'url': subformat['url'],
+            'ext': subformat.get('format'),
+        } for subformat in info.get('subtitles', []) if subformat.get('url')]
+        if subtitles_list:
+            subtitles['fr'] = subtitles_list
+
          return {
              'id': video_id,
              'title': title,
@@ -91,20 +99,27 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
              'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
              'timestamp': int_or_none(info['diffusion']['timestamp']),
              'formats': formats,
+            'subtitles': subtitles,
          }
  
  
  class PluzzIE(FranceTVBaseInfoExtractor):
      IE_NAME = 'pluzz.francetv.fr'
-    _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
+    _VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
  
      # Can't use tests, videos expire in 7 days
  
      def _real_extract(self, url):
-        title = re.match(self._VALID_URL, url).group(1)
-        webpage = self._download_webpage(url, title)
-        video_id = self._search_regex(
-            r'data-diffusion="(\d+)"', webpage, 'ID')
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        video_id = self._html_search_meta(
+            'id_video', webpage, 'video id', default=None)
+        if not video_id:
+            video_id = self._search_regex(
+                r'data-diffusion=["\'](\d+)', webpage, 'video id')
+
          return self._extract_video(video_id, 'Pluzz')
  
  
@@ -120,6 +135,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
              'title': 'Soir 3',
              'upload_date': '20130826',
              'timestamp': 1377548400,
+            'subtitles': {
+                'fr': 'mincount:2',
+            },
          },
      }, {
          'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 1de96b268c959490b17c966b7b334b8ab22b7fae..ee5419f51017a5f3a5ce01018eaaa65f4ab979ab 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -141,6 +141,7 @@ class GenericIE(InfoExtractor):
                  'ext': 'mp4',
                  'title': 'Automatics, robotics and biocybernetics',
                  'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+                'upload_date': '20130627',
                  'formats': 'mincount:16',
                  'subtitles': 'mincount:1',
              },
diff --git a/youtube_dl/extractor/mdr.py b/youtube_dl/extractor/mdr.py

index fc7499958d750338116f5e14546e15c52377c0b9..88334889e950915304bca35c8c42bf104c143307 100644 (file)
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dl/extractor/mdr.py
@@ -1,64 +1,169 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
-import re
-
  from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+    determine_ext,
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    xpath_text,
+)
  
  
  class MDRIE(InfoExtractor):
-    _VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)'
+    IE_DESC = 'MDR.DE and KiKA'
+    _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
  
-    # No tests, MDR regularily deletes its videos
-    _TEST = {
+    _TESTS = [{
+        # MDR regularily deletes its videos
          'url': 'http://www.mdr.de/fakt/video189002.html',
          'only_matching': True,
-    }
+    }, {
+        # audio
+        'url': 'http://www.mdr.de/kultur/audio1312272_zc-15948bad_zs-86171fdd.html',
+        'md5': '64c4ee50f0a791deb9479cd7bbe9d2fa',
+        'info_dict': {
+            'id': '1312272',
+            'ext': 'mp3',
+            'title': 'Feuilleton vom 30. Oktober 2015',
+            'duration': 250,
+            'uploader': 'MITTELDEUTSCHER RUNDFUNK',
+        },
+    }, {
+        'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
+        'md5': '4930515e36b06c111213e80d1e4aad0e',
+        'info_dict': {
+            'id': '19636',
+            'ext': 'mp4',
+            'title': 'Baumhaus vom 30. Oktober 2015',
+            'duration': 134,
+            'uploader': 'KIKA',
+        },
+    }, {
+        'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
+        'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
+        'info_dict': {
+            'id': '8182',
+            'ext': 'mp4',
+            'title': 'Beutolomäus und der geheime Weihnachtswunsch',
+            'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
+            'timestamp': 1419047100,
+            'upload_date': '20141220',
+            'duration': 4628,
+            'uploader': 'KIKA',
+        },
+    }, {
+        'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
+        'only_matching': True,
+    }]
  
      def _real_extract(self, url):
-        m = re.match(self._VALID_URL, url)
-        video_id = m.group('video_id')
-        domain = m.group('domain')
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        data_url = self._search_regex(
+            r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
+            webpage, 'data url', group='url')
  
-        # determine title and media streams from webpage
-        html = self._download_webpage(url, video_id)
+        doc = self._download_xml(
+            compat_urlparse.urljoin(url, data_url), video_id)
  
-        title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title')
-        xmlurl = self._search_regex(
-            r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL')
+        title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
  
-        doc = self._download_xml(domain + xmlurl, video_id)
          formats = []
-        for a in doc.findall('./assets/asset'):
-            url_el = a.find('./progressiveDownloadUrl')
-            if url_el is None:
-                continue
-            abr = int(a.find('bitrateAudio').text) // 1000
-            media_type = a.find('mediaType').text
-            format = {
-                'abr': abr,
-                'filesize': int(a.find('fileSize').text),
-                'url': url_el.text,
-            }
-
-            vbr_el = a.find('bitrateVideo')
-            if vbr_el is None:
-                format.update({
-                    'vcodec': 'none',
-                    'format_id': '%s-%d' % (media_type, abr),
-                })
-            else:
-                vbr = int(vbr_el.text) // 1000
-                format.update({
-                    'vbr': vbr,
-                    'width': int(a.find('frameWidth').text),
-                    'height': int(a.find('frameHeight').text),
-                    'format_id': '%s-%d' % (media_type, vbr),
-                })
-            formats.append(format)
+        processed_urls = []
+        for asset in doc.findall('./assets/asset'):
+            for source in (
+                    'progressiveDownload',
+                    'dynamicHttpStreamingRedirector',
+                    'adaptiveHttpStreamingRedirector'):
+                url_el = asset.find('./%sUrl' % source)
+                if url_el is None:
+                    continue
+
+                video_url = url_el.text
+                if video_url in processed_urls:
+                    continue
+
+                processed_urls.append(video_url)
+
+                vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
+                abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
+
+                ext = determine_ext(url_el.text)
+                if ext == 'm3u8':
+                    url_formats = self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        preference=0, m3u8_id='HLS', fatal=False)
+                elif ext == 'f4m':
+                    url_formats = self._extract_f4m_formats(
+                        video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
+                        preference=0, f4m_id='HDS', fatal=False)
+                else:
+                    media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
+                    vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
+                    abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
+                    filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
+
+                    f = {
+                        'url': video_url,
+                        'format_id': '%s-%d' % (media_type, vbr or abr),
+                        'filesize': filesize,
+                        'abr': abr,
+                        'preference': 1,
+                    }
+
+                    if vbr:
+                        width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
+                        height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
+                        f.update({
+                            'vbr': vbr,
+                            'width': width,
+                            'height': height,
+                        })
+
+                    url_formats = [f]
+
+                if not url_formats:
+                    continue
+
+                if not vbr:
+                    for f in url_formats:
+                        abr = f.get('tbr') or abr
+                        if 'tbr' in f:
+                            del f['tbr']
+                        f.update({
+                            'abr': abr,
+                            'vcodec': 'none',
+                        })
+
+                formats.extend(url_formats)
+
          self._sort_formats(formats)
  
+        description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
+        timestamp = parse_iso8601(
+            xpath_text(
+                doc, [
+                    './broadcast/broadcastDate',
+                    './broadcast/broadcastStartDate',
+                    './broadcast/broadcastEndDate'],
+                'timestamp', default=None))
+        duration = parse_duration(xpath_text(doc, './duration', 'duration'))
+        uploader = xpath_text(doc, './rights', 'uploader')
+
          return {
              'id': video_id,
              'title': title,
+            'description': description,
+            'timestamp': timestamp,
+            'duration': duration,
+            'uploader': uploader,
              'formats': formats,
          }
diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py

index 69e4bcd1a28f7be589fd26ca448740bd16794cfc..7c0c4e50e7c00d5bf66b30ee77deabc562fe2f07 100644 (file)
--- a/youtube_dl/extractor/moniker.py
+++ b/youtube_dl/extractor/moniker.py
@@ -17,7 +17,7 @@ from ..utils import (
  
  class MonikerIE(InfoExtractor):
      IE_DESC = 'allmyvideos.net and vidspot.net'
-    _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
  
      _TESTS = [{
          'url': 'http://allmyvideos.net/jih3nce3x6wn',
@@ -46,6 +46,18 @@ class MonikerIE(InfoExtractor):
      }, {
          'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
          'only_matching': True,
+    }, {
+        'url': 'http://vidspot.net/2/v-ywDf99',
+        'md5': '5f8254ce12df30479428b0152fb8e7ba',
+        'info_dict': {
+            'id': 'ywDf99',
+            'ext': 'mp4',
+            'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
+            'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
+        },
+    }, {
+        'url': 'http://allmyvideos.net/v/v-HXZm5t',
+        'only_matching': True,
      }]
  
      def _real_extract(self, url):
@@ -64,18 +76,30 @@ class MonikerIE(InfoExtractor):
              raise ExtractorError(
                  '%s returned error: %s' % (self.IE_NAME, error), expected=True)
  
-        fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
-        data = dict(fields)
+        builtin_url = self._search_regex(
+            r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
+            orig_webpage, 'builtin URL', default=None, group='url')
  
-        post = compat_urllib_parse.urlencode(data)
-        headers = {
-            b'Content-Type': b'application/x-www-form-urlencoded',
-        }
-        req = compat_urllib_request.Request(url, post, headers)
-        webpage = self._download_webpage(
-            req, video_id, note='Downloading video page ...')
+        if builtin_url:
+            req = compat_urllib_request.Request(builtin_url)
+            req.add_header('Referer', url)
+            webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
+            title = self._og_search_title(orig_webpage).strip()
+            description = self._og_search_description(orig_webpage).strip()
+        else:
+            fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
+            data = dict(fields)
+
+            post = compat_urllib_parse.urlencode(data)
+            headers = {
+                b'Content-Type': b'application/x-www-form-urlencoded',
+            }
+            req = compat_urllib_request.Request(url, post, headers)
+            webpage = self._download_webpage(
+                req, video_id, note='Downloading video page ...')
  
-        title = os.path.splitext(data['fname'])[0]
+            title = os.path.splitext(data['fname'])[0]
+            description = None
  
          # Could be several links with different quality
          links = re.findall(r'"file" : "?(.+?)",', webpage)
@@ -89,5 +113,6 @@ class MonikerIE(InfoExtractor):
          return {
              'id': video_id,
              'title': title,
+            'description': description,
              'formats': formats,
          }
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py

index eb5cde761af54d0d33b6ca99e746963afbc756de..3d63ed4f08930275ee725c68a520368721e1ed10 100644 (file)
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -101,6 +101,10 @@ class VidmeIE(InfoExtractor):
          # suspended
          'url': 'https://vid.me/Ox3G',
          'only_matching': True,
+    }, {
+        # deleted
+        'url': 'https://vid.me/KTPm',
+        'only_matching': True,
      }, {
          # no formats in the API response
          'url': 'https://vid.me/e5g',
@@ -143,6 +147,11 @@ class VidmeIE(InfoExtractor):
  
          video = response['video']
  
+        if video.get('state') == 'deleted':
+            raise ExtractorError(
+                'Vidme said: Sorry, this video has been deleted.',
+                expected=True)
+
          if video.get('state') in ('user-disabled', 'suspended'):
              raise ExtractorError(
                  'Vidme said: This video has been suspended either due to a copyright claim, '
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index c761ea22a4580216bb2484bd08ec7e8de1d724e4..efd5f4ae111448ca9933136b151ca1bf6a0505a8 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -179,10 +179,19 @@ def xpath_with_ns(path, ns_map):
  
  
  def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
-    if sys.version_info < (2, 7):  # Crazy 2.6
-        xpath = xpath.encode('ascii')
+    def _find_xpath(xpath):
+        if sys.version_info < (2, 7):  # Crazy 2.6
+            xpath = xpath.encode('ascii')
+        return node.find(xpath)
+
+    if isinstance(xpath, (str, compat_str)):
+        n = _find_xpath(xpath)
+    else:
+        for xp in xpath:
+            n = _find_xpath(xp)
+            if n is not None:
+                break
  
-    n = node.find(xpath)
      if n is None:
          if default is not NO_DEFAULT:
              return default
@@ -815,9 +824,11 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
      if date_str is None:
          return None
  
+    date_str = re.sub(r'\.[0-9]+', '', date_str)
+
      if timezone is None:
          m = re.search(
-            r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+            r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
              date_str)
          if not m:
              timezone = datetime.timedelta()
@@ -830,9 +841,12 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
                  timezone = datetime.timedelta(
                      hours=sign * int(m.group('hours')),
                      minutes=sign * int(m.group('minutes')))
-    date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
-    dt = datetime.datetime.strptime(date_str, date_format) - timezone
-    return calendar.timegm(dt.timetuple())
+    try:
+        date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+        dt = datetime.datetime.strptime(date_str, date_format) - timezone
+        return calendar.timegm(dt.timetuple())
+    except ValueError:
+        pass
  
  
  def unified_strdate(date_str, day_first=True):
@@ -897,7 +911,7 @@ def unified_strdate(date_str, day_first=True):
          timetuple = email.utils.parsedate_tz(date_str)
          if timetuple:
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
-    return upload_date
+    return compat_str(upload_date)
  
  
  def determine_ext(url, default_ext='unknown_video'):
author	Sergey M <dstftw@gmail.com>
	Sat, 31 Oct 2015 18:15:21 +0000 (18:15 +0000)
committer	Sergey M <dstftw@gmail.com>
	Sat, 31 Oct 2015 18:15:21 +0000 (18:15 +0000)
test/test_utils.py		patch \| blob \| history
youtube_dl/extractor/__init__.py		patch \| blob \| history
youtube_dl/extractor/anitube.py		patch \| blob \| history
youtube_dl/extractor/clyp.py	[new file with mode: 0644]	patch \| blob
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/francetv.py		patch \| blob \| history
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/mdr.py		patch \| blob \| history
youtube_dl/extractor/moniker.py		patch \| blob \| history
youtube_dl/extractor/vidme.py		patch \| blob \| history
youtube_dl/utils.py		patch \| blob \| history