From: remitamine Date: Tue, 29 Dec 2015 09:40:32 +0000 (+0100) Subject: Merge branch 'daum' of https://github.com/remitamine/youtube-dl into remitamine-daum X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=126d7701b0f88c33b5fbda6fd07ef056b96c9367;hp=-c;p=youtube-dl Merge branch 'daum' of https://github.com/remitamine/youtube-dl into remitamine-daum --- 126d7701b0f88c33b5fbda6fd07ef056b96c9367 diff --combined youtube_dl/extractor/__init__.py index 68c9c1288,99b4be002..ec1f80155 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@@ -3,18 -3,13 +3,18 @@@ from __future__ import unicode_literal from .abc import ABCIE from .abc7news import Abc7NewsIE from .academicearth import AcademicEarthCourseIE +from .acast import ( + ACastIE, + ACastChannelIE, +) from .addanime import AddAnimeIE from .adobetv import ( AdobeTVIE, + AdobeTVShowIE, + AdobeTVChannelIE, AdobeTVVideoIE, ) from .adultswim import AdultSwimIE -from .aftenposten import AftenpostenIE from .aftonbladet import AftonbladetIE from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE @@@ -25,10 -20,7 +25,10 @@@ from .aol import AolI from .allocine import AllocineIE from .aparat import AparatIE from .appleconnect import AppleConnectIE -from .appletrailers import AppleTrailersIE +from .appletrailers import ( + AppleTrailersIE, + AppleTrailersSectionIE, +) from .archiveorg import ArchiveOrgIE from .ard import ( ARDIE, @@@ -46,7 -38,6 +46,7 @@@ from .arte import ) from .atresplayer import AtresPlayerIE from .atttechchannel import ATTTechChannelIE +from .audimedia import AudiMediaIE from .audiomack import AudiomackIE, AudiomackAlbumIE from .azubu import AzubuIE from .baidu import BaiduVideoIE @@@ -63,19 -54,13 +63,19 @@@ from .beatportpro import BeatportProI from .bet import BetIE from .bild import BildIE from .bilibili import BiliBiliIE +from .bleacherreport import ( + BleacherReportIE, + BleacherReportCMSIE, +) from .blinkx import BlinkxIE -from .bliptv import BlipTVIE, BlipTVUserIE from .bloomberg import BloombergIE from .bpb import BpbIE from .br import BRIE from .breakcom import BreakIE -from .brightcove import BrightcoveIE +from .brightcove import ( + BrightcoveLegacyIE, + BrightcoveNewIE, +) from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE @@@ -83,6 -68,7 +83,6 @@@ from .camdemy import CamdemyIE, CamdemyFolderIE ) -from .canal13cl import Canal13clIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .cbs import CBSIE @@@ -133,22 -119,18 +133,25 @@@ from .dailymotion import DailymotionUserIE, DailymotionCloudIE, ) - from .daum import DaumIE + from .daum import ( + DaumIE, + DaumClipIE, + ) from .dbtv import DBTVIE -from .dcn import DCNIE +from .dcn import ( + DCNIE, + DCNVideoIE, + DCNLiveIE, + DCNSeasonIE, +) from .dctp import DctpTvIE from .deezer import DeezerPlaylistIE +from .democracynow import DemocracynowIE from .dfb import DFBIE from .dhm import DHMIE from .dotsub import DotsubIE from .douyutv import DouyuTVIE +from .dplay import DPlayIE from .dramafever import ( DramaFeverIE, DramaFeverSeriesIE, @@@ -203,10 -185,7 +206,10 @@@ from .fourtube import FourTubeI from .foxgay import FoxgayIE from .foxnews import FoxNewsIE from .foxsports import FoxSportsIE -from .franceculture import FranceCultureIE +from .franceculture import ( + FranceCultureIE, + FranceCultureEmissionIE, +) from .franceinter import FranceInterIE from .francetv import ( PluzzIE, @@@ -218,9 -197,7 +221,9 @@@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freevideo import FreeVideoIE +from .funimation import FunimationIE from .funnyordie import FunnyOrDieIE +from .gameinformer import GameInformerIE from .gamekings import GamekingsIE from .gameone import ( GameOneIE, @@@ -237,18 -214,14 +240,18 @@@ from .gfycat import GfycatI from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE -from .globo import GloboIE +from .globo import ( + GloboIE, + GloboArticleIE, +) from .godtube import GodTubeIE from .goldenmoustache import GoldenMoustacheIE from .golem import GolemIE +from .googledrive import GoogleDriveIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE -from .gorillavid import GorillaVidIE from .goshgay import GoshgayIE +from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hark import HarkIE from .hearthisat import HearThisAtIE @@@ -261,17 -234,12 +264,17 @@@ from .history import HistoryI from .hitbox import HitboxIE, HitboxLiveIE from .hornbunny import HornBunnyIE from .hotnewhiphop import HotNewHipHopIE +from .hotstar import HotStarIE from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE from .huffpost import HuffPostIE from .hypem import HypemIE from .iconosquare import IconosquareIE -from .ign import IGNIE, OneUPIE +from .ign import ( + IGNIE, + OneUPIE, + PCMagIE, +) from .imdb import ( ImdbIE, ImdbListIE @@@ -300,7 -268,6 +303,7 @@@ from .jadorecettepub import JadoreCette from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .jukebox import JukeboxIE +from .jwplatform import JWPlatformIE from .jpopsukitv import JpopsukiIE from .kaltura import KalturaIE from .kanalplay import KanalPlayIE @@@ -355,7 -322,6 +358,7 @@@ from .lynda import from .m6 import M6IE from .macgamestore import MacGameStoreIE from .mailru import MailRuIE +from .makertv import MakerTVIE from .malemotion import MalemotionIE from .mdr import MDRIE from .metacafe import MetacafeIE @@@ -379,6 -345,7 +382,6 @@@ from .motherless import MotherlessI from .motorsport import MotorsportIE from .movieclips import MovieClipsIE from .moviezine import MoviezineIE -from .movshare import MovShareIE from .mtv import ( MTVIE, MTVServicesEmbeddedIE, @@@ -444,22 -411,14 +447,22 @@@ from .noco import NocoI from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE from .nova import NovaIE -from .novamov import NovaMovIE +from .novamov import ( + NovaMovIE, + WholeCloudIE, + NowVideoIE, + VideoWeedIE, + CloudTimeIE, +) from .nowness import ( NownessIE, NownessPlaylistIE, NownessSeriesIE, ) -from .nowtv import NowTVIE -from .nowvideo import NowVideoIE +from .nowtv import ( + NowTVIE, + NowTVListIE, +) from .npo import ( NPOIE, NPOLiveIE, @@@ -496,7 -455,10 +499,7 @@@ from .orf import from .parliamentliveuk import ParliamentLiveUKIE from .patreon import PatreonIE from .pbs import PBSIE -from .periscope import ( - PeriscopeIE, - QuickscopeIE, -) +from .periscope import PeriscopeIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE @@@ -540,10 -502,7 +543,10 @@@ from .radiode import RadioDeI from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE -from .rai import RaiIE +from .rai import ( + RaiTVIE, + RaiIE, +) from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redtube import RedTubeIE @@@ -591,10 -550,6 +594,10 @@@ from .shahid import ShahidI from .shared import SharedIE from .sharesix import ShareSixIE from .sina import SinaIE +from .skynewsarabia import ( + SkyNewsArabiaIE, + SkyNewsArabiaArticleIE, +) from .slideshare import SlideshareIE from .slutload import SlutloadIE from .smotri import ( @@@ -609,12 -564,15 +612,12 @@@ from .snagfilms import ) from .snotr import SnotrIE from .sohu import SohuIE -from .soompi import ( - SoompiIE, - SoompiShowIE, -) from .soundcloud import ( SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE, - SoundcloudPlaylistIE + SoundcloudPlaylistIE, + SoundcloudSearchIE ) from .soundgasm import ( SoundgasmIE, @@@ -667,7 -625,6 +670,7 @@@ from .teachingchannel import TeachingCh from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE @@@ -677,7 -634,6 +680,7 @@@ from .tenplay import TenPlayI from .testurl import TestURLIE from .testtube import TestTubeIE from .tf1 import TF1IE +from .theintercept import TheInterceptIE from .theonion import TheOnionIE from .theplatform import ( ThePlatformIE, @@@ -697,7 -653,6 +700,7 @@@ from .tnaflix import EMPFlixIE, MovieFapIE, ) +from .toggle import ToggleIE from .thvideo import ( THVideoIE, THVideoPlaylistIE @@@ -711,13 -666,7 +714,13 @@@ from .tube8 import Tube8I from .tubitv import TubiTvIE from .tudou import TudouIE from .tumblr import TumblrIE -from .tunein import TuneInIE +from .tunein import ( + TuneInClipIE, + TuneInStationIE, + TuneInProgramIE, + TuneInTopicIE, + TuneInShortenerIE, +) from .turbo import TurboIE from .tutv import TutvIE from .tv2 import ( @@@ -774,15 -723,16 +777,15 @@@ from .vh1 import VH1I from .vice import ViceIE from .viddler import ViddlerIE from .videodetective import VideoDetectiveIE -from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videomega import VideoMegaIE from .videopremium import VideoPremiumIE from .videott import VideoTtIE -from .videoweed import VideoWeedIE from .vidme import VidmeIE from .vidzi import VidziIE from .vier import VierIE, VierVideosIE from .viewster import ViewsterIE +from .viidea import ViideaIE from .vimeo import ( VimeoIE, VimeoAlbumIE, @@@ -835,7 -785,6 +838,7 @@@ from .wrzuta import WrzutaI from .wsj import WSJIE from .xbef import XBefIE from .xboxclips import XboxClipsIE +from .xfileshare import XFileShareIE from .xhamster import ( XHamsterIE, XHamsterEmbedIE, @@@ -879,7 -828,6 +882,7 @@@ from .youtube import YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeUserIE, + YoutubePlaylistsIE, YoutubeWatchLaterIE, ) from .zapiks import ZapiksIE diff --combined youtube_dl/extractor/daum.py index 9a94cf361,0f5686e07..a083cc0dc --- a/youtube_dl/extractor/daum.py +++ b/youtube_dl/extractor/daum.py @@@ -2,71 -2,45 +2,51 @@@ from __future__ import unicode_literals - import re - from .common import InfoExtractor - from ..compat import ( - compat_urllib_parse, + from ..compat import compat_urllib_parse -from ..utils import int_or_none ++from ..utils import ( ++ int_or_none, ++ str_to_int, ++ xpath_text, +) class DaumIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:v/|.*?clipid=)(?P[^?#&]+)' + _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/v/(?P[^?#&]+)' IE_NAME = 'daum.net' _TESTS = [{ - 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', - 'info_dict': { - 'id': '52554690', - 'ext': 'mp4', - 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', - 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', - 'upload_date': '20130831', - 'duration': 3868, - }, - }, { - # Test for https://github.com/rg3/youtube-dl/issues/7949 - 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290', - 'md5': 'c92d78bcee4424451f1667f275c1dc97', + 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', 'info_dict': { - 'id': '73147290', + 'id': 'vab4dyeDBysyBssyukBUjBz', 'ext': 'mp4', - 'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218', - 'description': '싸이 - 나팔바지', - 'upload_date': '20151219', - 'duration': 232, + 'title': '마크 헌트 vs 안토니오 실바', + 'description': 'Mark Hunt vs Antonio Silva', + 'upload_date': '20131217', + 'duration': 2117, ++ 'view_count': int, ++ 'comment_count': int, }, - }, { - 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz', - 'only_matching': True, }, { 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24', 'only_matching': True, }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - canonical_url = 'http://tvpot.daum.net/v/%s' % video_id - webpage = self._download_webpage(canonical_url, video_id) - og_url = self._og_search_url(webpage, default=None) or self._search_regex( - r']+rel=(["\'])canonical\1[^>]+href=(["\'])(?P.+?)\2', - webpage, 'canonical url', group='url') - full_id = self._search_regex( - r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id') - query = compat_urllib_parse.urlencode({'vid': full_id}) + video_id = self._match_id(url) + query = compat_urllib_parse.urlencode({'vid': video_id}) info = self._download_xml( 'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id, 'Downloading video info') - urls = self._download_xml( - 'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query, + movie_data = self._download_json( + 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json?' + query, video_id, 'Downloading video formats info') formats = [] - for format_el in urls.findall('result/output_list/output_list'): - profile = format_el.attrib['profile'] + for format_el in movie_data['output_list']['output_list']: + profile = format_el['profile'] format_query = compat_urllib_parse.urlencode({ - 'vid': full_id, + 'vid': video_id, 'profile': profile, }) url_doc = self._download_xml( @@@ -76,14 -50,53 +56,57 @@@ formats.append({ 'url': format_url, 'format_id': profile, + 'width': int_or_none(format_el.get('width')), + 'height': int_or_none(format_el.get('height')), + 'filesize': int_or_none(format_el.get('filesize')), }) + self._sort_formats(formats) return { 'id': video_id, 'title': info.find('TITLE').text, 'formats': formats, - 'thumbnail': self._og_search_thumbnail(webpage), - 'thumbnail': info.find('THUMB_URL').text, -- 'description': info.find('CONTENTS').text, - 'duration': int(info.find('DURATION').text), - 'duration': int_or_none(info.find('DURATION').text), ++ 'thumbnail': xpath_text(info, 'THUMB_URL'), ++ 'description': xpath_text(info, 'CONTENTS'), ++ 'duration': int_or_none(xpath_text(info, 'DURATION')), 'upload_date': info.find('REGDTTM').text[:8], ++ 'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')), ++ 'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')), + } + + + class DaumClipIE(InfoExtractor): - _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/.*?clipid=(?P\d+)' ++ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.do|mypot/View.do)\?.*?clipid=(?P\d+)' + IE_NAME = 'daum.net' + + _TESTS = [{ + 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690', + 'info_dict': { + 'id': '52554690', + 'ext': 'mp4', + 'title': 'DOTA 2GETHER 시즌2 6회 - 2부', + 'description': 'DOTA 2GETHER 시즌2 6회 - 2부', + 'upload_date': '20130831', + 'duration': 3868, + 'view_count': int, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) - clip_info = self._download_json('http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, video_id)['clip_bean'] ++ clip_info = self._download_json( ++ 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id, ++ video_id, 'Downloading clip info')['clip_bean'] + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'], + 'title': clip_info['title'], + 'thumbnail': clip_info.get('thumb_url'), + 'description': clip_info.get('contents'), + 'duration': int_or_none(clip_info.get('duration')), + 'upload_date': clip_info.get('up_date')[:8], + 'view_count': int_or_none(clip_info.get('play_count')), + 'ie_key': 'Daum', }