[ninegag] Add support for p/ URLs
[youtube-dl] / youtube_dl / extractor / ninegag.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6
7
8 class NineGagIE(InfoExtractor):
9     IE_NAME = '9gag'
10     _VALID_URL = r'''(?x)^https?://(?:www\.)?9gag\.tv/
11         (?:
12             v/(?P<numid>[0-9]+)|
13             p/(?P<id>[a-zA-Z0-9]+)/(?P<display_id>[^?#/]+)
14         )
15     '''
16
17     _TESTS = [{
18         "url": "http://9gag.tv/v/1912",
19         "info_dict": {
20             "id": "1912",
21             "ext": "mp4",
22             "description": "This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)",
23             "title": "\"People Are Awesome 2013\" Is Absolutely Awesome",
24             "view_count": int,
25             "thumbnail": "re:^https?://",
26         },
27         'add_ie': ['Youtube']
28     },
29     {
30         'url': 'http://9gag.tv/p/KklwM/alternate-banned-opening-scene-of-gravity?ref=fsidebar',
31         'info_dict': {
32             'id': 'KklwM',
33             'ext': 'mp4',
34             'display_id': 'alternate-banned-opening-scene-of-gravity',
35             "description": "While Gravity was a pretty awesome movie already, YouTuber Krishna Shenoi came up with a way to improve upon it, introducing a much better solution to Sandra Bullock's seemingly endless tumble in space. The ending is priceless.",
36             'title': "Banned Opening Scene Of \"Gravity\" That Changes The Whole Movie",
37         },
38     }]
39
40     def _real_extract(self, url):
41         mobj = re.match(self._VALID_URL, url)
42         video_id = mobj.group('numid') or mobj.group('id')
43         display_id = mobj.group('display_id') or video_id
44
45         webpage = self._download_webpage(url, display_id)
46
47         youtube_id = self._html_search_regex(
48             r'(?s)id="jsid-video-post-container".*?data-external-id="([^"]+)"',
49             webpage, 'video ID')
50         title = self._html_search_regex(
51             r'(?s)id="jsid-video-post-container".*?data-title="([^"]+)"',
52             webpage, 'title', default=None)
53         if not title:
54             title = self._og_search_title(webpage)
55         description = self._html_search_regex(
56             r'(?s)<div class="video-caption">.*?<p>(.*?)</p>', webpage,
57             'description', fatal=False)
58         view_count_str = self._html_search_regex(
59             r'<p><b>([0-9][0-9,]*)</b> views</p>', webpage, 'view count',
60             fatal=False)
61         view_count = (
62             None if view_count_str is None
63             else int(view_count_str.replace(',', '')))
64
65         return {
66             '_type': 'url_transparent',
67             'url': youtube_id,
68             'ie_key': 'Youtube',
69             'id': video_id,
70             'display_id': display_id,
71             'title': title,
72             'description': description,
73             'view_count': view_count,
74             'thumbnail': self._og_search_thumbnail(webpage),
75         }