[vevo] Centralize timestamp handling

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 7a2e5dee0469e4e03ea0f97c7657f44c104cb266..6e632477921c17bfd0dff837074b72d4ae83abed 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
  
  import os
  import re
-import xml.etree.ElementTree
  
  from .common import InfoExtractor
  from .youtube import YoutubeIE
@@ -17,6 +16,7 @@ from ..utils import (
  
      ExtractorError,
      HEADRequest,
+    parse_xml,
      smuggle_url,
      unescapeHTML,
      unified_strdate,
@@ -134,6 +134,17 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
+        # funnyordie embed
+        {
+            'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+            'md5': '7cf780be104d40fea7bae52eed4a470e',
+            'info_dict': {
+                'id': '18e820ec3f',
+                'ext': 'mp4',
+                'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+                'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+            }
+        },
      ]
  
      def report_download_webpage(self, video_id):
@@ -274,7 +285,7 @@ class GenericIE(InfoExtractor):
  
          # Is it an RSS feed?
          try:
-            doc = xml.etree.ElementTree.fromstring(webpage.encode('utf-8'))
+            doc = parse_xml(webpage)
              if doc.tag == 'rss':
                  return self._extract_rss(url, video_id, doc)
          except compat_xml_parse_error:
@@ -432,6 +443,14 @@ class GenericIE(InfoExtractor):
          if mobj is not None:
              return self.url_result(compat_urllib_parse.unquote(mobj.group('url')))
  
+        # Look for funnyordie embed
+        matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+        if matches:
+            urlrs = [self.url_result(unescapeHTML(eurl), 'FunnyOrDie')
+                     for eurl in matches]
+            return self.playlist_result(
+                urlrs, playlist_id=video_id, playlist_title=video_title)
+
          # Start with something easy: JW Player in SWFObject
          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
          if mobj is None: