From: Philipp Hagemeister <phihag@phihag.de>
Date: Mon, 24 Mar 2014 00:42:11 +0000 (+0100)
Subject: Merge remote-tracking branch 'origin/master'
X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=ce328530a9009ffb5f0f486fc5178520d972f07a;hp=6e25c58ed74505f69770ee01fd762f416d7405d3

Merge remote-tracking branch 'origin/master'
---

diff --git a/test/test_playlists.py b/test/test_playlists.py
index b1e38e7e9..4af38632e 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -309,6 +309,8 @@ class TestPlaylists(unittest.TestCase):
             'thumbnail': 're:^https?://.*\.jpg',
             'uploader': 'Porsche',
             'uploader_id': 'porsche',
+            'timestamp': 1387486713,
+            'upload_date': '20131219',
         }
         expect_info_dict(self, EXPECTED, test_video)
 
diff --git a/test/test_utils.py b/test/test_utils.py
index 7ee74e36c..e920d661f 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -35,6 +35,7 @@ from youtube_dl.utils import (
     url_basename,
     urlencode_postdata,
     xpath_with_ns,
+    parse_iso8601,
 )
 
 if sys.version_info < (3, 0):
@@ -266,5 +267,10 @@ class TestUtil(unittest.TestCase):
         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
         self.assertTrue(isinstance(data, bytes))
 
+    def test_parse_iso8601(self):
+        self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
+        self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
+        self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3e728e876..245860140 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -195,6 +195,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
+from .rts import RTSIE
 from .rutube import (
     RutubeIE,
     RutubeChannelIE,
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 647720c8a..78f238f84 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -74,7 +74,7 @@ class InfoExtractor(object):
                                  "http", "https", "rtsp", "rtmp", "m3u8" or so.
                     * preference Order number of this format. If this field is
                                  present and not None, the formats get sorted
-                                 by this field.
+                                 by this field, regardless of all other values.
                                  -1 for default (order by other properties),
                                  -2 or smaller for less than default.
                     * quality    Order number of the video quality of this
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 994f0e4ae..b5372bf7a 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -89,7 +89,7 @@ class InstagramUserIE(InfoExtractor):
                     'uploader': user.get('full_name'),
                     'uploader_id': user.get('username'),
                     'like_count': like_count,
-                    'upload_timestamp': int_or_none(it.get('created_time')),
+                    'timestamp': int_or_none(it.get('created_time')),
                 })
 
             if not page['items']:
diff --git a/youtube_dl/extractor/radiofrance.py b/youtube_dl/extractor/radiofrance.py
index 34652f6c1..09352ed82 100644
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dl/extractor/radiofrance.py
@@ -1,4 +1,6 @@
 # coding: utf-8
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
@@ -6,16 +8,17 @@ from .common import InfoExtractor
 
 class RadioFranceIE(InfoExtractor):
     _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
-    IE_NAME = u'radiofrance'
+    IE_NAME = 'radiofrance'
 
     _TEST = {
-        u'url': u'http://maison.radiofrance.fr/radiovisions/one-one',
-        u'file': u'one-one.ogg',
-        u'md5': u'bdbb28ace95ed0e04faab32ba3160daf',
-        u'info_dict': {
-            u"title": u"One to one",
-            u"description": u"PlutÃ´t que d'imaginer la radio de demain comme technologie ou comme crÃ©ation de contenu, je veux montrer que quelles que soient ses Ã©volutions, j'ai l'intime conviction que la radio continuera d'Ãªtre un grand mÃ©dia de proximitÃ© pour les auditeurs.",
-            u"uploader": u"Thomas HercouÃ«t",
+        'url': 'http://maison.radiofrance.fr/radiovisions/one-one',
+        'md5': 'bdbb28ace95ed0e04faab32ba3160daf',
+        'info_dict': {
+            'id': 'one-one',
+            'ext': 'ogg',
+            "title": "One to one",
+            "description": "PlutÃ´t que d'imaginer la radio de demain comme technologie ou comme crÃ©ation de contenu, je veux montrer que quelles que soient ses Ã©volutions, j'ai l'intime conviction que la radio continuera d'Ãªtre un grand mÃ©dia de proximitÃ© pour les auditeurs.",
+            "uploader": "Thomas HercouÃ«t",
         },
     }
 
@@ -24,27 +27,28 @@ class RadioFranceIE(InfoExtractor):
         video_id = m.group('id')
 
         webpage = self._download_webpage(url, video_id)
-        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, u'title')
+        title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
         description = self._html_search_regex(
             r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>',
-            webpage, u'description', fatal=False)
+            webpage, 'description', fatal=False)
         uploader = self._html_search_regex(
             r'<div class="credit">&nbsp;&nbsp;&copy;&nbsp;(.*?)</div>',
-            webpage, u'uploader', fatal=False)
+            webpage, 'uploader', fatal=False)
 
         formats_str = self._html_search_regex(
             r'class="jp-jplayer[^"]*" data-source="([^"]+)">',
-            webpage, u'audio URLs')
+            webpage, 'audio URLs')
         formats = [
             {
                 'format_id': fm[0],
                 'url': fm[1],
                 'vcodec': 'none',
+                'preference': i,
             }
-            for fm in
-            re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
+            for i, fm in
+            enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
         ]
-        # No sorting, we don't know any more about these formats
+        self._sort_formats(formats)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py
new file mode 100644
index 000000000..f211637a7
--- /dev/null
+++ b/youtube_dl/extractor/rts.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    parse_duration,
+    parse_iso8601,
+    unescapeHTML,
+)
+
+
+class RTSIE(InfoExtractor):
+    IE_DESC = 'RTS.ch'
+    _VALID_URL = r'^https?://(?:www\.)?rts\.ch/archives/tv/[^/]+/(?P<id>[0-9]+)-.*?\.html'
+
+    _TEST = {
+        'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html',
+        'md5': '753b877968ad8afaeddccc374d4256a5',
+        'info_dict': {
+            'id': '3449373',
+            'ext': 'mp4',
+            'duration': 1488,
+            'title': 'Les Enfants Terribles',
+            'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.',
+            'uploader': 'Divers',
+            'upload_date': '19680921',
+            'timestamp': -40280400,
+        },
+    }
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('id')
+
+        all_info = self._download_json(
+            'http://www.rts.ch/a/%s.html?f=json/article' % video_id, video_id)
+        info = all_info['video']['JSONinfo']
+
+        upload_timestamp = parse_iso8601(info.get('broadcast_date'))
+        duration = parse_duration(info.get('duration'))
+        thumbnail = unescapeHTML(info.get('preview_image_url'))
+        formats = [{
+            'format_id': fid,
+            'url': furl,
+            'tbr': int_or_none(self._search_regex(
+                r'-([0-9]+)k\.', furl, 'bitrate', default=None)),
+        } for fid, furl in info['streams'].items()]
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': info['title'],
+            'description': info.get('intro'),
+            'duration': duration,
+            'uploader': info.get('programName'),
+            'timestamp': upload_timestamp,
+        }
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 8b359cb77..68d590ba2 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import calendar
 import contextlib
 import ctypes
 import datetime
@@ -501,13 +502,13 @@ def orderedSet(iterable):
             res.append(el)
     return res
 
+
 def unescapeHTML(s):
-    """
-    @param s a string
-    """
-    assert type(s) == type(u'')
+    if s is None:
+        return None
+    assert type(s) == compat_str
 
-    result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
+    result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s)
     return result
 
 
@@ -761,6 +762,31 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     https_response = http_response
 
 
+def parse_iso8601(date_str):
+    """ Return a UNIX timestamp from the given date """
+
+    if date_str is None:
+        return None
+
+    m = re.search(
+        r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
+        date_str)
+    if not m:
+        timezone = datetime.timedelta()
+    else:
+        date_str = date_str[:-len(m.group(0))]
+        if not m.group('sign'):
+            timezone = datetime.timedelta()
+        else:
+            sign = 1 if m.group('sign') == '+' else -1
+            timezone = datetime.timedelta(
+                hours=sign * int(m.group('hours')),
+                minutes=sign * int(m.group('minutes')))
+
+    dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone
+    return calendar.timegm(dt.timetuple())
+
+
 def unified_strdate(date_str):
     """Return a string with the date in the format YYYYMMDD"""