Switch codebase to use sanitized_Request instead of
[youtube-dl] / youtube_dl / extractor / mtv.py
index d160f4a220aa0e264b1d05a4154cd83d049b4416..d887583e6600806dcb8d21bde241b1169a53e6b0 100644 (file)
@@ -5,7 +5,6 @@ import re
 from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
-    compat_urllib_request,
     compat_str,
 )
 from ..utils import (
@@ -13,6 +12,7 @@ from ..utils import (
     find_xpath_attr,
     fix_xml_ampersands,
     HEADRequest,
+    sanitized_Request,
     unescapeHTML,
     url_basename,
     RegexNotFoundError,
@@ -53,7 +53,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
 
     def _extract_mobile_video_formats(self, mtvn_id):
         webpage_url = self._MOBILE_TEMPLATE % mtvn_id
-        req = compat_urllib_request.Request(webpage_url)
+        req = sanitized_Request(webpage_url)
         # Otherwise we get a webpage that would execute some javascript
         req.add_header('User-Agent', 'curl/7')
         webpage = self._download_webpage(req, mtvn_id,
@@ -67,7 +67,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         return [{'url': url, 'ext': 'mp4'}]
 
     def _extract_video_formats(self, mdoc, mtvn_id):
-        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4)$', mdoc.find('.//src').text) is not None:
+        if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
             if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
                 self.to_screen('The normal version is not available from your '
                                'country, trying with the mobile version')
@@ -142,7 +142,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if title_el is None:
             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
         if title_el is None:
-            title_el = itemdoc.find('.//title')
+            title_el = itemdoc.find('.//title') or itemdoc.find('./title')
             if title_el.text is None:
                 title_el = None
 
@@ -200,7 +200,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
         if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
-                webpage, 'mgid')
+                webpage, 'mgid', default=None)
+
+        if not mgid:
+            sm4_embed = self._html_search_meta(
+                'sm4:video:embed', webpage, 'sm4 embed', default='')
+            mgid = self._search_regex(
+                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid')
 
         videos_info = self._get_videos_info(mgid)
         return videos_info
@@ -222,6 +228,13 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
         },
     }
 
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
+        if mobj:
+            return mobj.group('url')
+
     def _get_feed_url(self, uri):
         video_id = self._id_from_uri(uri)
         site_id = uri.replace(video_id, '')