Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube...

author Sergey M․ <dstftw@gmail.com>

Fri, 13 Nov 2015 19:23:15 +0000 (01:23 +0600)

committer Sergey M․ <dstftw@gmail.com>

Fri, 13 Nov 2015 19:23:15 +0000 (01:23 +0600)
author Sergey M․ <dstftw@gmail.com>
Fri, 13 Nov 2015 19:23:15 +0000 (01:23 +0600)
committer Sergey M․ <dstftw@gmail.com>
Fri, 13 Nov 2015 19:23:15 +0000 (01:23 +0600)
diff --combined youtube_dl/extractor/__init__.py

index 06d25ef405a1e07d30ddaab4424c1dc4dd192e5d,fcd9edec39398c347b478cd5480689a082f52104..08cb93d7689d3fcd55a47c33ced931fce03d4a8c
--- 1/youtube_dl/extractor/__init__.py
--- 2/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@@ -45,7 -45,6 +45,7 @@@ from .bambuser import BambuserIE, Bambu
   from .bandcamp import BandcampIE, BandcampAlbumIE
   from .bbc import (
       BBCCoUkIE,
+ +    BBCCoUkArticleIE,
       BBCIE,
   )
   from .beeg import BeegIE
@@@ -60,7 -59,10 +60,10 @@@ from .bloomberg import BloombergI
   from .bpb import BpbIE
   from .br import BRIE
   from .breakcom import BreakIE
- from .brightcove import BrightcoveIE
+ from .brightcove import (
+     BrightcoveIE,
+     BrightcoveInPageEmbedIE,
+ )
   from .buzzfeed import BuzzFeedIE
   from .byutv import BYUtvIE
   from .c56 import C56IE
@@@ -77,7 -79,6 +80,7 @@@ from .cbssports import CBSSportsI
   from .ccc import CCCIE
   from .ceskatelevize import CeskaTelevizeIE
   from .channel9 import Channel9IE
+ +from .chaturbate import ChaturbateIE
   from .chilloutzone import ChilloutzoneIE
   from .chirbit import (
       ChirbitIE,
@@@ -90,7 -91,6 +93,7 @@@ from .cliphunter import CliphunterI
   from .clipsyndicate import ClipsyndicateIE
   from .cloudy import CloudyIE
   from .clubic import ClubicIE
+ +from .clyp import ClypIE
   from .cmt import CMTIE
   from .cnet import CNETIE
   from .cnn import (
@@@ -124,7 -124,6 +127,7 @@@ from .dbtv import DBTVI
   from .dcn import DCNIE
   from .dctp import DctpTvIE
   from .deezer import DeezerPlaylistIE
+ +from .democracynow import DemocracynowIE
   from .dfb import DFBIE
   from .dhm import DHMIE
   from .dotsub import DotsubIE
@@@ -142,6 -141,7 +145,6 @@@ from .dump import DumpI
   from .dumpert import DumpertIE
   from .defense import DefenseGouvFrIE
   from .discovery import DiscoveryIE
- -from .divxstage import DivxStageIE
   from .dropbox import DropboxIE
   from .eagleplatform import EaglePlatformIE
   from .ebaumsworld import EbaumsWorldIE
@@@ -162,7 -162,6 +165,7 @@@ from .eroprofile import EroProfileI
   from .escapist import EscapistIE
   from .espn import ESPNIE
   from .esri import EsriVideoIE
+ +from .europa import EuropaIE
   from .everyonesmixtape import EveryonesMixtapeIE
   from .exfm import ExfmIE
   from .expotv import ExpoTVIE
@@@ -170,12 -169,14 +173,12 @@@ from .extremetube import ExtremeTubeI
   from .facebook import FacebookIE
   from .faz import FazIE
   from .fc2 import FC2IE
+ +from .fczenit import FczenitIE
   from .firstpost import FirstpostIE
   from .firsttv import FirstTVIE
   from .fivemin import FiveMinIE
   from .fivetv import FiveTVIE
- -from .fktv import (
- -    FKTVIE,
- -    FKTVPosteckeIE,
- -)
+ +from .fktv import FKTVIE
   from .flickr import FlickrIE
   from .folketinget import FolketingetIE
   from .footyroom import FootyRoomIE
@@@ -212,15 -213,13 +215,15 @@@ from .gfycat import GfycatI
   from .giantbomb import GiantBombIE
   from .giga import GigaIE
   from .glide import GlideIE
- -from .globo import GloboIE
+ +from .globo import (
+ +    GloboIE,
+ +    GloboArticleIE,
+ +)
   from .godtube import GodTubeIE
   from .goldenmoustache import GoldenMoustacheIE
   from .golem import GolemIE
   from .googleplus import GooglePlusIE
   from .googlesearch import GoogleSearchIE
- -from .gorillavid import GorillaVidIE
   from .goshgay import GoshgayIE
   from .groupon import GrouponIE
   from .hark import HarkIE
@@@ -233,6 -232,7 +236,6 @@@ from .historicfilms import HistoricFilm
   from .history import HistoryIE
   from .hitbox import HitboxIE, HitboxLiveIE
   from .hornbunny import HornBunnyIE
- -from .hostingbulk import HostingBulkIE
   from .hotnewhiphop import HotNewHipHopIE
   from .howcast import HowcastIE
   from .howstuffworks import HowStuffWorksIE
@@@ -302,11 -302,6 +305,11 @@@ from .lifenews import 
       LifeNewsIE,
       LifeEmbedIE,
   )
+ +from .limelight import (
+ +    LimelightMediaIE,
+ +    LimelightChannelIE,
+ +    LimelightChannelListIE,
+ +)
   from .liveleak import LiveLeakIE
   from .livestream import (
       LivestreamIE,
@@@ -324,6 -319,7 +327,6 @@@ from .macgamestore import MacGameStoreI
   from .mailru import MailRuIE
   from .malemotion import MalemotionIE
   from .mdr import MDRIE
- -from .megavideoz import MegaVideozIE
   from .metacafe import MetacafeIE
   from .metacritic import MetacriticIE
   from .mgoon import MgoonIE
@@@ -374,9 -370,6 +377,9 @@@ from .nbc import 
   from .ndr import (
       NDRIE,
       NJoyIE,
+ +    NDREmbedBaseIE,
+ +    NDREmbedIE,
+ +    NJoyEmbedIE,
   )
   from .ndtv import NDTVIE
   from .netzkino import NetzkinoIE
@@@ -412,11 -405,7 +415,11 @@@ from .normalboots import NormalbootsI
   from .nosvideo import NosVideoIE
   from .nova import NovaIE
   from .novamov import NovaMovIE
- -from .nowness import NownessIE
+ +from .nowness import (
+ +    NownessIE,
+ +    NownessPlaylistIE,
+ +    NownessSeriesIE,
+ +)
   from .nowtv import NowTVIE
   from .nowvideo import NowVideoIE
   from .npo import (
@@@ -446,6 -435,7 +449,6 @@@ from .ooyala import 
       OoyalaIE,
       OoyalaExternalIE,
   )
- -from .openfilm import OpenFilmIE
   from .orf import (
       ORFTVthekIE,
       ORFOE1IE,
@@@ -591,7 -581,6 +594,7 @@@ from .spankwire import SpankwireI
   from .spiegel import SpiegelIE, SpiegelArticleIE
   from .spiegeltv import SpiegeltvIE
   from .spike import SpikeIE
+ +from .stitcher import StitcherIE
   from .sport5 import Sport5IE
   from .sportbox import (
       SportBoxIE,
@@@ -696,7 -685,7 +699,7 @@@ from .twitch import 
       TwitchBookmarksIE,
       TwitchStreamIE,
   )
- -from .twitter import TwitterCardIE
+ +from .twitter import TwitterCardIE, TwitterIE
   from .ubu import UbuIE
   from .udemy import (
       UdemyIE,
@@@ -723,6 -712,7 +726,6 @@@ from .vh1 import VH1I
   from .vice import ViceIE
   from .viddler import ViddlerIE
   from .videodetective import VideoDetectiveIE
- -from .videolecturesnet import VideoLecturesNetIE
   from .videofyme import VideofyMeIE
   from .videomega import VideoMegaIE
   from .videopremium import VideoPremiumIE
@@@ -732,7 -722,6 +735,7 @@@ from .vidme import VidmeI
   from .vidzi import VidziIE
   from .vier import VierIE, VierVideosIE
   from .viewster import ViewsterIE
+ +from .viidea import ViideaIE
   from .vimeo import (
       VimeoIE,
       VimeoAlbumIE,
@@@ -785,7 -774,6 +788,7 @@@ from .wrzuta import WrzutaI
   from .wsj import WSJIE
   from .xbef import XBefIE
   from .xboxclips import XboxClipsIE
+ +from .xfileshare import XFileShareIE
   from .xhamster import (
       XHamsterIE,
       XHamsterEmbedIE,
diff --combined youtube_dl/extractor/brightcove.py

index 1686cdde14fcc7383f91bf52a6723d4831d6311d,c6ad1d065512a138b6f4535cce577eb81238debf..2c7d968a848a2c9d4dbad8960aeac2e6f0b55cfd
--- 1/youtube_dl/extractor/brightcove.py
--- 2/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@@ -3,10 -3,10 +3,10 @@@ from __future__ import unicode_literal
   
   import re
   import json
- -import xml.etree.ElementTree
   
   from .common import InfoExtractor
   from ..compat import (
+ +    compat_etree_fromstring,
       compat_parse_qs,
       compat_str,
       compat_urllib_parse,
@@@ -22,6 -22,10 +22,10 @@@ from ..utils import 
       fix_xml_ampersands,
       unescapeHTML,
       unsmuggle_url,
+     js_to_json,
+     int_or_none,
+     parse_iso8601,
+     extract_attributes,
   )
   
   
@@@ -119,7 -123,7 +123,7 @@@ class BrightcoveIE(InfoExtractor)
           object_str = fix_xml_ampersands(object_str)
   
           try:
- -            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+ +            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
           except compat_xml_parse_error:
               return
   
@@@ -346,3 -350,94 +350,94 @@@
           if 'url' not in info and not info.get('formats'):
               raise ExtractorError('Unable to extract video url for %s' % info['id'])
           return info
+ 
+ 
+ class BrightcoveInPageEmbedIE(InfoExtractor):
+     _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/([a-z0-9-]+)_([a-z]+)/index.html?.*videoId=(?P<video_id>\d+)'
+     _TEST = {
+         'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+         'md5': 'c8100925723840d4b0d243f7025703be',
+         'info_dict': {
+             'id': '4463358922001',
+             'ext': 'mp4',
+             'title': 'Meet the man behind Popcorn Time',
+             'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+             'timestamp': 1441391203,
+             'upload_date': '20150904',
+             'duration': 165768,
+             'uploader_id': '929656772001',
+         }
+     }
+ 
+     @staticmethod
+     def _extract_url(webpage):
+         video_attributes = re.search(r'(?s)<video([^>]*)>.*?</(?:video|audio)>', webpage)
+         if video_attributes:
+             video_attributes = extract_attributes(video_attributes.group(), r'(?s)\s*data-(account|video-id|playlist-id|policy-key|player|embed)\s*=\s*["\']([^"\']+)["\']')
+             account_id = video_attributes.get('account')
+             player_id = video_attributes.get('player')
+             embed = video_attributes.get('embed')
+             video_id = video_attributes.get('video-id')
+             if account_id and player_id and embed and video_id:
+                 return 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (account_id, player_id, embed, video_id)
+         return None
+ 
+     def _real_extract(self, url):
+         account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
+ 
+         webpage = self._download_webpage('http://players.brightcove.net/%s/%s_%s/index.min.js' % (account_id, player_id, embed), video_id)
+ 
+         catalog = self._parse_json(
+             js_to_json(
+                 self._search_regex(
+                     r'catalog\(({[^}]+})\);',
+                     webpage,
+                     'catalog'
+                 )
+             ),
+             video_id
+         )
+         policy_key = catalog['policyKey']
+ 
+         req = compat_urllib_request.Request(
+             'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id),
+             headers={'Accept': 'application/json;pk=%s' % policy_key})
+         json_data = self._download_json(req, video_id)
+ 
+         title = json_data['name']
+         description = json_data.get('description')
+         thumbnail = json_data.get('thumbnail')
+         timestamp = parse_iso8601(json_data.get('published_at'))
+         duration = int_or_none(json_data.get('duration'))
+ 
+         formats = []
+         for source in json_data.get('sources'):
+             source_type = source.get('type')
+             if source_type == 'application/x-mpegURL':
+                 formats.extend(self._extract_m3u8_formats(source.get('src'), video_id))
+             else:
+                 src = source.get('src') or source.get('streaming_src')
+                 if src:
+                     formats.append({
+                         'url': src,
+                         'tbr': source.get('avg_bitrate'),
+                         'width': int_or_none(source.get('width')),
+                         'height': int_or_none(source.get('height')),
+                         'filesize': source.get('size'),
+                         'container': source.get('container'),
+                         'vcodec': source.get('codec'),
+                         'ext': source.get('container').lower(),
+                     })
+ 
+         self._sort_formats(formats)
+ 
+         return {
+             'id': video_id,
+             'title': title,
+             'description': description,
+             'thumbnail': thumbnail,
+             'timestamp': timestamp,
+             'duration': duration,
+             'formats': formats,
+             'uploader_id': account_id,
+         }
diff --combined youtube_dl/extractor/generic.py

index d0b486d2a03a5e1dcd0ae0f0703246907d1550a6,7a3a7f66b66b7b499bc4b66328161b6b726c10b7..34d930a2db82fe340d55efe681bd43ff00b32aa1
--- 1/youtube_dl/extractor/generic.py
--- 2/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@@ -4,12 -4,10 +4,12 @@@ from __future__ import unicode_literal
   
   import os
   import re
+ +import sys
   
   from .common import InfoExtractor
   from .youtube import YoutubeIE
   from ..compat import (
+ +    compat_etree_fromstring,
       compat_urllib_parse_unquote,
       compat_urllib_request,
       compat_urlparse,
@@@ -22,6 -20,7 +22,6 @@@ from ..utils import 
       HEADRequest,
       is_html,
       orderedSet,
- -    parse_xml,
       smuggle_url,
       unescapeHTML,
       unified_strdate,
@@@ -30,7 -29,10 +30,10 @@@
       url_basename,
       xpath_text,
   )
- from .brightcove import BrightcoveIE
+ from .brightcove import (
+     BrightcoveIE,
+     BrightcoveInPageEmbedIE,
+ )
   from .nbc import NBCSportsVPlayerIE
   from .ooyala import OoyalaIE
   from .rutv import RUTVIE
@@@ -50,7 -52,6 +53,7 @@@ from .dailymotion import DailymotionClo
   from .onionstudios import OnionStudiosIE
   from .snagfilms import SnagFilmsEmbedIE
   from .screenwavemedia import ScreenwaveMediaIE
+ +from .mtv import MTVServicesEmbeddedIE
   
   
   class GenericIE(InfoExtractor):
@@@ -141,7 -142,6 +144,7 @@@
                   'ext': 'mp4',
                   'title': 'Automatics, robotics and biocybernetics',
                   'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ +                'upload_date': '20130627',
                   'formats': 'mincount:16',
                   'subtitles': 'mincount:1',
               },
@@@ -233,22 -233,6 +236,22 @@@
                   'skip_download': False,
               }
           },
+ +        {
+ +            # redirect in Refresh HTTP header
+ +            'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ +            'info_dict': {
+ +                'id': 'pO8h3EaFRdo',
+ +                'ext': 'mp4',
+ +                'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ +                'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ +                'upload_date': '20150917',
+ +                'uploader_id': 'brtvofficial',
+ +                'uploader': 'Boiler Room',
+ +            },
+ +            'params': {
+ +                'skip_download': False,
+ +            },
+ +        },
           {
               'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
               'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
@@@ -1031,6 -1015,17 +1034,17 @@@
                   'ext': 'mp4',
                   'title': 'cinemasnob',
               },
+         },
+         # BrightcoveInPageEmbed embed
+         {
+             'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+             'info_dict': {
+                 'id': '4238694884001',
+                 'ext': 'flv',
+                 'title': 'Tabletop: Dread, Last Thoughts',
+                 'description': 'Tabletop: Dread, Last Thoughts',
+                 'duration': 51690,
+             },
           }
       ]
   
@@@ -1238,7 -1233,7 +1252,7 @@@
   
           # Is it an RSS feed, a SMIL file or a XSPF playlist?
           try:
- -            doc = parse_xml(webpage)
+ +            doc = compat_etree_fromstring(webpage.encode('utf-8'))
               if doc.tag == 'rss':
                   return self._extract_rss(url, video_id, doc)
               elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
@@@ -1307,6 -1302,11 +1321,11 @@@
                   'entries': entries,
               }
   
+         # Look for Brightcove In Page Embed:
+         brightcove_in_page_embed_url = BrightcoveInPageEmbedIE._extract_url(webpage)
+         if brightcove_in_page_embed_url:
+             return self.url_result(brightcove_in_page_embed_url, 'BrightcoveInPageEmbed')
+ 
           # Look for embedded rtl.nl player
           matches = re.findall(
               r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@@ -1613,9 -1613,12 +1632,9 @@@
               return self.url_result(url, ie='Vulture')
   
           # Look for embedded mtvservices player
- -        mobj = re.search(
- -            r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"',
- -            webpage)
- -        if mobj is not None:
- -            url = unescapeHTML(mobj.group('url'))
- -            return self.url_result(url, ie='MTVServicesEmbedded')
+ +        mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+ +        if mtvservices_url:
+ +            return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
   
           # Look for embedded yahoo player
           mobj = re.search(
@@@ -1654,7 -1657,7 +1673,7 @@@
               return self.url_result(mobj.group('url'), 'MLB')
   
           mobj = re.search(
- -            r'<iframe[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ +            r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
               webpage)
           if mobj is not None:
               return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
@@@ -1672,8 -1675,8 +1691,8 @@@
               return self.url_result(mobj.group('url'), 'Zapiks')
   
           # Look for Kaltura embeds
- -        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_id'\s*:\s*'(?P<id>[^']+)',", webpage) or
- -                re.search(r'(?s)(["\'])(?:https?:)?//cdnapisec\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?\1.*?entry_id\s*:\s*(["\'])(?P<id>[^\2]+?)\2', webpage))
+ +        mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
+ +                re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
           if mobj is not None:
               return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
   
@@@ -1824,9 -1827,6 +1843,9 @@@
                   # Look also in Refresh HTTP header
                   refresh_header = head_response.headers.get('Refresh')
                   if refresh_header:
+ +                    # In python 2 response HTTP headers are bytestrings
+ +                    if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+ +                        refresh_header = refresh_header.decode('iso-8859-1')
                       found = re.search(REDIRECT_REGEX, refresh_header)
               if found:
                   new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
diff --combined youtube_dl/utils.py

index d39f313a41802f38f91418e12cc89dcd798b0286,518cea98bd52639403caaf16dc7a5137651e5e2f..65556d056a7edfe12b94e82d0f55baa93360aa70
--- 1/youtube_dl/utils.py
--- 2/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@@ -3,7 -3,6 +3,7 @@@
   
   from __future__ import unicode_literals
   
+ +import base64
   import calendar
   import codecs
   import contextlib
@@@ -36,7 -35,6 +36,7 @@@ import zli
   from .compat import (
       compat_basestring,
       compat_chr,
+ +    compat_etree_fromstring,
       compat_html_entities,
       compat_http_client,
       compat_kwargs,
@@@ -179,19 -177,10 +179,19 @@@ def xpath_with_ns(path, ns_map)
   
   
   def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
- -    if sys.version_info < (2, 7):  # Crazy 2.6
- -        xpath = xpath.encode('ascii')
+ +    def _find_xpath(xpath):
+ +        if sys.version_info < (2, 7):  # Crazy 2.6
+ +            xpath = xpath.encode('ascii')
+ +        return node.find(xpath)
+ +
+ +    if isinstance(xpath, (str, compat_str)):
+ +        n = _find_xpath(xpath)
+ +    else:
+ +        for xp in xpath:
+ +            n = _find_xpath(xp)
+ +            if n is not None:
+ +                break
   
- -    n = node.find(xpath)
       if n is None:
           if default is not NO_DEFAULT:
               return default
@@@ -259,6 -248,15 +259,15 @@@ def get_element_by_attribute(attribute
       return unescapeHTML(res)
   
   
+ def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
+     attributes = re.findall(attributes_regex, attributes_str)
+     attributes_dict = {}
+     if attributes:
+         for (attribute_name, attribute_value) in attributes:
+             attributes_dict[attribute_name] = attribute_value
+     return attributes_dict
+ 
+ 
   def clean_html(html):
       """Clean an HTML snippet into a readable string"""
   
@@@ -366,7 -364,7 +375,7 @@@ def sanitize_path(s)
       if drive_or_unc:
           norm_path.pop(0)
       sanitized_path = [
- -        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ +        path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
           for path_part in norm_path]
       if drive_or_unc:
           sanitized_path.insert(0, drive_or_unc + os.path.sep)
@@@ -630,7 -628,7 +639,7 @@@ def _create_http_connection(ydl_handler
       # expected HTTP responses to meet HTTP/1.0 or later (see also
       # https://github.com/rg3/youtube-dl/issues/6727)
       if sys.version_info < (3, 0):
- -        kwargs['strict'] = True
+ +        kwargs[b'strict'] = True
       hc = http_class(*args, **kwargs)
       source_address = ydl_handler._params.get('source_address')
       if source_address is not None:
@@@ -824,11 -822,9 +833,11 @@@ def parse_iso8601(date_str, delimiter='
       if date_str is None:
           return None
   
+ +    date_str = re.sub(r'\.[0-9]+', '', date_str)
+ +
       if timezone is None:
           m = re.search(
- -            r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ +            r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
               date_str)
           if not m:
               timezone = datetime.timedelta()
@@@ -841,12 -837,9 +850,12 @@@
                   timezone = datetime.timedelta(
                       hours=sign * int(m.group('hours')),
                       minutes=sign * int(m.group('minutes')))
- -    date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
- -    dt = datetime.datetime.strptime(date_str, date_format) - timezone
- -    return calendar.timegm(dt.timetuple())
+ +    try:
+ +        date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+ +        dt = datetime.datetime.strptime(date_str, date_format) - timezone
+ +        return calendar.timegm(dt.timetuple())
+ +    except ValueError:
+ +        pass
   
   
   def unified_strdate(date_str, day_first=True):
@@@ -911,8 -904,7 +920,8 @@@
           timetuple = email.utils.parsedate_tz(date_str)
           if timetuple:
               upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
- -    return upload_date
+ +    if upload_date is not None:
+ +        return compat_str(upload_date)
   
   
   def determine_ext(url, default_ext='unknown_video'):
@@@ -1388,12 -1380,7 +1397,12 @@@ def int_or_none(v, scale=1, default=Non
               v = getattr(v, get_attr, None)
       if v == '':
           v = None
- -    return default if v is None else (int(v) * invscale // scale)
+ +    if v is None:
+ +        return default
+ +    try:
+ +        return int(v) * invscale // scale
+ +    except ValueError:
+ +        return default
   
   
   def str_or_none(v, default=None):
@@@ -1409,12 -1396,7 +1418,12 @@@ def str_to_int(int_str)
   
   
   def float_or_none(v, scale=1, invscale=1, default=None):
- -    return default if v is None else (float(v) * invscale / scale)
+ +    if v is None:
+ +        return default
+ +    try:
+ +        return float(v) * invscale / scale
+ +    except ValueError:
+ +        return default
   
   
   def parse_duration(s):
@@@ -1667,6 -1649,29 +1676,6 @@@ def encode_dict(d, encoding='utf-8')
       return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
   
   
- -try:
- -    etree_iter = xml.etree.ElementTree.Element.iter
- -except AttributeError:  # Python <=2.6
- -    etree_iter = lambda n: n.findall('.//*')
- -
- -
- -def parse_xml(s):
- -    class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
- -        def doctype(self, name, pubid, system):
- -            pass  # Ignore doctypes
- -
- -    parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
- -    kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
- -    tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
- -    # Fix up XML parser in Python 2.x
- -    if sys.version_info < (3, 0):
- -        for n in etree_iter(tree):
- -            if n.text is not None:
- -                if not isinstance(n.text, compat_str):
- -                    n.text = n.text.decode('utf-8')
- -    return tree
- -
- -
   US_RATINGS = {
       'G': 0,
       'PG': 10,
@@@ -1694,8 -1699,8 +1703,8 @@@ def js_to_json(code)
           if v in ('true', 'false', 'null'):
               return v
           if v.startswith('"'):
- -            return v
- -        if v.startswith("'"):
+ +            v = re.sub(r"\\'", "'", v[1:-1])
+ +        elif v.startswith("'"):
               v = v[1:-1]
               v = re.sub(r"\\\\|\\'|\"", lambda m: {
                   '\\\\': '\\\\',
@@@ -1789,10 -1794,6 +1798,10 @@@ def urlhandle_detect_ext(url_handle)
       return mimetype2ext(getheader('Content-Type'))
   
   
+ +def encode_data_uri(data, mime_type):
+ +    return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+ +
+ +
   def age_restricted(content_limit, age_limit):
       """ Returns True iff the content should be blocked """
   
@@@ -1967,7 -1968,7 +1976,7 @@@ def dfxp2srt(dfxp_data)
   
           return out
   
- -    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+ +    dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
       out = []
       paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
author	Sergey M․ <dstftw@gmail.com>
	Fri, 13 Nov 2015 19:23:15 +0000 (01:23 +0600)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 13 Nov 2015 19:23:15 +0000 (01:23 +0600)
		1	2
youtube_dl/extractor/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/brightcove.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/extractor/generic.py	patch \|	diff1 \|	diff2 \|	blob \| history
youtube_dl/utils.py	patch \|	diff1 \|	diff2 \|	blob \| history