git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import contextlib
  11 import ctypes
  12 import datetime
  13 import email.utils
  14 import email.header
  15 import errno
  16 import functools
  17 import gzip
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import operator
  24 import os
  25 import platform
  26 import random
  27 import re
  28 import socket
  29 import ssl
  30 import subprocess
  31 import sys
  32 import tempfile
  33 import traceback
  34 import xml.etree.ElementTree
  35 import zlib
  36
  37 from .compat import (
  38     compat_HTMLParseError,
  39     compat_HTMLParser,
  40     compat_basestring,
  41     compat_chr,
  42     compat_cookiejar,
  43     compat_ctypes_WINFUNCTYPE,
  44     compat_etree_fromstring,
  45     compat_expanduser,
  46     compat_html_entities,
  47     compat_html_entities_html5,
  48     compat_http_client,
  49     compat_kwargs,
  50     compat_os_name,
  51     compat_parse_qs,
  52     compat_shlex_quote,
  53     compat_str,
  54     compat_struct_pack,
  55     compat_struct_unpack,
  56     compat_urllib_error,
  57     compat_urllib_parse,
  58     compat_urllib_parse_urlencode,
  59     compat_urllib_parse_urlparse,
  60     compat_urllib_parse_unquote_plus,
  61     compat_urllib_request,
  62     compat_urlparse,
  63     compat_xpath,
  64 )
  65
  66 from .socks import (
  67     ProxyType,
  68     sockssocket,
  69 )
  70
  71
  72 def register_socks_protocols():
  73     # "Register" SOCKS protocols
  74     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  75     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  76     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  77         if scheme not in compat_urlparse.uses_netloc:
  78             compat_urlparse.uses_netloc.append(scheme)
  79
  80
  81 # This is not clearly defined otherwise
  82 compiled_regex_type = type(re.compile(''))
  83
  84
  85 def random_user_agent():
  86     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  87     _CHROME_VERSIONS = (
  88         '74.0.3729.129',
  89         '76.0.3780.3',
  90         '76.0.3780.2',
  91         '74.0.3729.128',
  92         '76.0.3780.1',
  93         '76.0.3780.0',
  94         '75.0.3770.15',
  95         '74.0.3729.127',
  96         '74.0.3729.126',
  97         '76.0.3779.1',
  98         '76.0.3779.0',
  99         '75.0.3770.14',
 100         '74.0.3729.125',
 101         '76.0.3778.1',
 102         '76.0.3778.0',
 103         '75.0.3770.13',
 104         '74.0.3729.124',
 105         '74.0.3729.123',
 106         '73.0.3683.121',
 107         '76.0.3777.1',
 108         '76.0.3777.0',
 109         '75.0.3770.12',
 110         '74.0.3729.122',
 111         '76.0.3776.4',
 112         '75.0.3770.11',
 113         '74.0.3729.121',
 114         '76.0.3776.3',
 115         '76.0.3776.2',
 116         '73.0.3683.120',
 117         '74.0.3729.120',
 118         '74.0.3729.119',
 119         '74.0.3729.118',
 120         '76.0.3776.1',
 121         '76.0.3776.0',
 122         '76.0.3775.5',
 123         '75.0.3770.10',
 124         '74.0.3729.117',
 125         '76.0.3775.4',
 126         '76.0.3775.3',
 127         '74.0.3729.116',
 128         '75.0.3770.9',
 129         '76.0.3775.2',
 130         '76.0.3775.1',
 131         '76.0.3775.0',
 132         '75.0.3770.8',
 133         '74.0.3729.115',
 134         '74.0.3729.114',
 135         '76.0.3774.1',
 136         '76.0.3774.0',
 137         '75.0.3770.7',
 138         '74.0.3729.113',
 139         '74.0.3729.112',
 140         '74.0.3729.111',
 141         '76.0.3773.1',
 142         '76.0.3773.0',
 143         '75.0.3770.6',
 144         '74.0.3729.110',
 145         '74.0.3729.109',
 146         '76.0.3772.1',
 147         '76.0.3772.0',
 148         '75.0.3770.5',
 149         '74.0.3729.108',
 150         '74.0.3729.107',
 151         '76.0.3771.1',
 152         '76.0.3771.0',
 153         '75.0.3770.4',
 154         '74.0.3729.106',
 155         '74.0.3729.105',
 156         '75.0.3770.3',
 157         '74.0.3729.104',
 158         '74.0.3729.103',
 159         '74.0.3729.102',
 160         '75.0.3770.2',
 161         '74.0.3729.101',
 162         '75.0.3770.1',
 163         '75.0.3770.0',
 164         '74.0.3729.100',
 165         '75.0.3769.5',
 166         '75.0.3769.4',
 167         '74.0.3729.99',
 168         '75.0.3769.3',
 169         '75.0.3769.2',
 170         '75.0.3768.6',
 171         '74.0.3729.98',
 172         '75.0.3769.1',
 173         '75.0.3769.0',
 174         '74.0.3729.97',
 175         '73.0.3683.119',
 176         '73.0.3683.118',
 177         '74.0.3729.96',
 178         '75.0.3768.5',
 179         '75.0.3768.4',
 180         '75.0.3768.3',
 181         '75.0.3768.2',
 182         '74.0.3729.95',
 183         '74.0.3729.94',
 184         '75.0.3768.1',
 185         '75.0.3768.0',
 186         '74.0.3729.93',
 187         '74.0.3729.92',
 188         '73.0.3683.117',
 189         '74.0.3729.91',
 190         '75.0.3766.3',
 191         '74.0.3729.90',
 192         '75.0.3767.2',
 193         '75.0.3767.1',
 194         '75.0.3767.0',
 195         '74.0.3729.89',
 196         '73.0.3683.116',
 197         '75.0.3766.2',
 198         '74.0.3729.88',
 199         '75.0.3766.1',
 200         '75.0.3766.0',
 201         '74.0.3729.87',
 202         '73.0.3683.115',
 203         '74.0.3729.86',
 204         '75.0.3765.1',
 205         '75.0.3765.0',
 206         '74.0.3729.85',
 207         '73.0.3683.114',
 208         '74.0.3729.84',
 209         '75.0.3764.1',
 210         '75.0.3764.0',
 211         '74.0.3729.83',
 212         '73.0.3683.113',
 213         '75.0.3763.2',
 214         '75.0.3761.4',
 215         '74.0.3729.82',
 216         '75.0.3763.1',
 217         '75.0.3763.0',
 218         '74.0.3729.81',
 219         '73.0.3683.112',
 220         '75.0.3762.1',
 221         '75.0.3762.0',
 222         '74.0.3729.80',
 223         '75.0.3761.3',
 224         '74.0.3729.79',
 225         '73.0.3683.111',
 226         '75.0.3761.2',
 227         '74.0.3729.78',
 228         '74.0.3729.77',
 229         '75.0.3761.1',
 230         '75.0.3761.0',
 231         '73.0.3683.110',
 232         '74.0.3729.76',
 233         '74.0.3729.75',
 234         '75.0.3760.0',
 235         '74.0.3729.74',
 236         '75.0.3759.8',
 237         '75.0.3759.7',
 238         '75.0.3759.6',
 239         '74.0.3729.73',
 240         '75.0.3759.5',
 241         '74.0.3729.72',
 242         '73.0.3683.109',
 243         '75.0.3759.4',
 244         '75.0.3759.3',
 245         '74.0.3729.71',
 246         '75.0.3759.2',
 247         '74.0.3729.70',
 248         '73.0.3683.108',
 249         '74.0.3729.69',
 250         '75.0.3759.1',
 251         '75.0.3759.0',
 252         '74.0.3729.68',
 253         '73.0.3683.107',
 254         '74.0.3729.67',
 255         '75.0.3758.1',
 256         '75.0.3758.0',
 257         '74.0.3729.66',
 258         '73.0.3683.106',
 259         '74.0.3729.65',
 260         '75.0.3757.1',
 261         '75.0.3757.0',
 262         '74.0.3729.64',
 263         '73.0.3683.105',
 264         '74.0.3729.63',
 265         '75.0.3756.1',
 266         '75.0.3756.0',
 267         '74.0.3729.62',
 268         '73.0.3683.104',
 269         '75.0.3755.3',
 270         '75.0.3755.2',
 271         '73.0.3683.103',
 272         '75.0.3755.1',
 273         '75.0.3755.0',
 274         '74.0.3729.61',
 275         '73.0.3683.102',
 276         '74.0.3729.60',
 277         '75.0.3754.2',
 278         '74.0.3729.59',
 279         '75.0.3753.4',
 280         '74.0.3729.58',
 281         '75.0.3754.1',
 282         '75.0.3754.0',
 283         '74.0.3729.57',
 284         '73.0.3683.101',
 285         '75.0.3753.3',
 286         '75.0.3752.2',
 287         '75.0.3753.2',
 288         '74.0.3729.56',
 289         '75.0.3753.1',
 290         '75.0.3753.0',
 291         '74.0.3729.55',
 292         '73.0.3683.100',
 293         '74.0.3729.54',
 294         '75.0.3752.1',
 295         '75.0.3752.0',
 296         '74.0.3729.53',
 297         '73.0.3683.99',
 298         '74.0.3729.52',
 299         '75.0.3751.1',
 300         '75.0.3751.0',
 301         '74.0.3729.51',
 302         '73.0.3683.98',
 303         '74.0.3729.50',
 304         '75.0.3750.0',
 305         '74.0.3729.49',
 306         '74.0.3729.48',
 307         '74.0.3729.47',
 308         '75.0.3749.3',
 309         '74.0.3729.46',
 310         '73.0.3683.97',
 311         '75.0.3749.2',
 312         '74.0.3729.45',
 313         '75.0.3749.1',
 314         '75.0.3749.0',
 315         '74.0.3729.44',
 316         '73.0.3683.96',
 317         '74.0.3729.43',
 318         '74.0.3729.42',
 319         '75.0.3748.1',
 320         '75.0.3748.0',
 321         '74.0.3729.41',
 322         '75.0.3747.1',
 323         '73.0.3683.95',
 324         '75.0.3746.4',
 325         '74.0.3729.40',
 326         '74.0.3729.39',
 327         '75.0.3747.0',
 328         '75.0.3746.3',
 329         '75.0.3746.2',
 330         '74.0.3729.38',
 331         '75.0.3746.1',
 332         '75.0.3746.0',
 333         '74.0.3729.37',
 334         '73.0.3683.94',
 335         '75.0.3745.5',
 336         '75.0.3745.4',
 337         '75.0.3745.3',
 338         '75.0.3745.2',
 339         '74.0.3729.36',
 340         '75.0.3745.1',
 341         '75.0.3745.0',
 342         '75.0.3744.2',
 343         '74.0.3729.35',
 344         '73.0.3683.93',
 345         '74.0.3729.34',
 346         '75.0.3744.1',
 347         '75.0.3744.0',
 348         '74.0.3729.33',
 349         '73.0.3683.92',
 350         '74.0.3729.32',
 351         '74.0.3729.31',
 352         '73.0.3683.91',
 353         '75.0.3741.2',
 354         '75.0.3740.5',
 355         '74.0.3729.30',
 356         '75.0.3741.1',
 357         '75.0.3741.0',
 358         '74.0.3729.29',
 359         '75.0.3740.4',
 360         '73.0.3683.90',
 361         '74.0.3729.28',
 362         '75.0.3740.3',
 363         '73.0.3683.89',
 364         '75.0.3740.2',
 365         '74.0.3729.27',
 366         '75.0.3740.1',
 367         '75.0.3740.0',
 368         '74.0.3729.26',
 369         '73.0.3683.88',
 370         '73.0.3683.87',
 371         '74.0.3729.25',
 372         '75.0.3739.1',
 373         '75.0.3739.0',
 374         '73.0.3683.86',
 375         '74.0.3729.24',
 376         '73.0.3683.85',
 377         '75.0.3738.4',
 378         '75.0.3738.3',
 379         '75.0.3738.2',
 380         '75.0.3738.1',
 381         '75.0.3738.0',
 382         '74.0.3729.23',
 383         '73.0.3683.84',
 384         '74.0.3729.22',
 385         '74.0.3729.21',
 386         '75.0.3737.1',
 387         '75.0.3737.0',
 388         '74.0.3729.20',
 389         '73.0.3683.83',
 390         '74.0.3729.19',
 391         '75.0.3736.1',
 392         '75.0.3736.0',
 393         '74.0.3729.18',
 394         '73.0.3683.82',
 395         '74.0.3729.17',
 396         '75.0.3735.1',
 397         '75.0.3735.0',
 398         '74.0.3729.16',
 399         '73.0.3683.81',
 400         '75.0.3734.1',
 401         '75.0.3734.0',
 402         '74.0.3729.15',
 403         '73.0.3683.80',
 404         '74.0.3729.14',
 405         '75.0.3733.1',
 406         '75.0.3733.0',
 407         '75.0.3732.1',
 408         '74.0.3729.13',
 409         '74.0.3729.12',
 410         '73.0.3683.79',
 411         '74.0.3729.11',
 412         '75.0.3732.0',
 413         '74.0.3729.10',
 414         '73.0.3683.78',
 415         '74.0.3729.9',
 416         '74.0.3729.8',
 417         '74.0.3729.7',
 418         '75.0.3731.3',
 419         '75.0.3731.2',
 420         '75.0.3731.0',
 421         '74.0.3729.6',
 422         '73.0.3683.77',
 423         '73.0.3683.76',
 424         '75.0.3730.5',
 425         '75.0.3730.4',
 426         '73.0.3683.75',
 427         '74.0.3729.5',
 428         '73.0.3683.74',
 429         '75.0.3730.3',
 430         '75.0.3730.2',
 431         '74.0.3729.4',
 432         '73.0.3683.73',
 433         '73.0.3683.72',
 434         '75.0.3730.1',
 435         '75.0.3730.0',
 436         '74.0.3729.3',
 437         '73.0.3683.71',
 438         '74.0.3729.2',
 439         '73.0.3683.70',
 440         '74.0.3729.1',
 441         '74.0.3729.0',
 442         '74.0.3726.4',
 443         '73.0.3683.69',
 444         '74.0.3726.3',
 445         '74.0.3728.0',
 446         '74.0.3726.2',
 447         '73.0.3683.68',
 448         '74.0.3726.1',
 449         '74.0.3726.0',
 450         '74.0.3725.4',
 451         '73.0.3683.67',
 452         '73.0.3683.66',
 453         '74.0.3725.3',
 454         '74.0.3725.2',
 455         '74.0.3725.1',
 456         '74.0.3724.8',
 457         '74.0.3725.0',
 458         '73.0.3683.65',
 459         '74.0.3724.7',
 460         '74.0.3724.6',
 461         '74.0.3724.5',
 462         '74.0.3724.4',
 463         '74.0.3724.3',
 464         '74.0.3724.2',
 465         '74.0.3724.1',
 466         '74.0.3724.0',
 467         '73.0.3683.64',
 468         '74.0.3723.1',
 469         '74.0.3723.0',
 470         '73.0.3683.63',
 471         '74.0.3722.1',
 472         '74.0.3722.0',
 473         '73.0.3683.62',
 474         '74.0.3718.9',
 475         '74.0.3702.3',
 476         '74.0.3721.3',
 477         '74.0.3721.2',
 478         '74.0.3721.1',
 479         '74.0.3721.0',
 480         '74.0.3720.6',
 481         '73.0.3683.61',
 482         '72.0.3626.122',
 483         '73.0.3683.60',
 484         '74.0.3720.5',
 485         '72.0.3626.121',
 486         '74.0.3718.8',
 487         '74.0.3720.4',
 488         '74.0.3720.3',
 489         '74.0.3718.7',
 490         '74.0.3720.2',
 491         '74.0.3720.1',
 492         '74.0.3720.0',
 493         '74.0.3718.6',
 494         '74.0.3719.5',
 495         '73.0.3683.59',
 496         '74.0.3718.5',
 497         '74.0.3718.4',
 498         '74.0.3719.4',
 499         '74.0.3719.3',
 500         '74.0.3719.2',
 501         '74.0.3719.1',
 502         '73.0.3683.58',
 503         '74.0.3719.0',
 504         '73.0.3683.57',
 505         '73.0.3683.56',
 506         '74.0.3718.3',
 507         '73.0.3683.55',
 508         '74.0.3718.2',
 509         '74.0.3718.1',
 510         '74.0.3718.0',
 511         '73.0.3683.54',
 512         '74.0.3717.2',
 513         '73.0.3683.53',
 514         '74.0.3717.1',
 515         '74.0.3717.0',
 516         '73.0.3683.52',
 517         '74.0.3716.1',
 518         '74.0.3716.0',
 519         '73.0.3683.51',
 520         '74.0.3715.1',
 521         '74.0.3715.0',
 522         '73.0.3683.50',
 523         '74.0.3711.2',
 524         '74.0.3714.2',
 525         '74.0.3713.3',
 526         '74.0.3714.1',
 527         '74.0.3714.0',
 528         '73.0.3683.49',
 529         '74.0.3713.1',
 530         '74.0.3713.0',
 531         '72.0.3626.120',
 532         '73.0.3683.48',
 533         '74.0.3712.2',
 534         '74.0.3712.1',
 535         '74.0.3712.0',
 536         '73.0.3683.47',
 537         '72.0.3626.119',
 538         '73.0.3683.46',
 539         '74.0.3710.2',
 540         '72.0.3626.118',
 541         '74.0.3711.1',
 542         '74.0.3711.0',
 543         '73.0.3683.45',
 544         '72.0.3626.117',
 545         '74.0.3710.1',
 546         '74.0.3710.0',
 547         '73.0.3683.44',
 548         '72.0.3626.116',
 549         '74.0.3709.1',
 550         '74.0.3709.0',
 551         '74.0.3704.9',
 552         '73.0.3683.43',
 553         '72.0.3626.115',
 554         '74.0.3704.8',
 555         '74.0.3704.7',
 556         '74.0.3708.0',
 557         '74.0.3706.7',
 558         '74.0.3704.6',
 559         '73.0.3683.42',
 560         '72.0.3626.114',
 561         '74.0.3706.6',
 562         '72.0.3626.113',
 563         '74.0.3704.5',
 564         '74.0.3706.5',
 565         '74.0.3706.4',
 566         '74.0.3706.3',
 567         '74.0.3706.2',
 568         '74.0.3706.1',
 569         '74.0.3706.0',
 570         '73.0.3683.41',
 571         '72.0.3626.112',
 572         '74.0.3705.1',
 573         '74.0.3705.0',
 574         '73.0.3683.40',
 575         '72.0.3626.111',
 576         '73.0.3683.39',
 577         '74.0.3704.4',
 578         '73.0.3683.38',
 579         '74.0.3704.3',
 580         '74.0.3704.2',
 581         '74.0.3704.1',
 582         '74.0.3704.0',
 583         '73.0.3683.37',
 584         '72.0.3626.110',
 585         '72.0.3626.109',
 586         '74.0.3703.3',
 587         '74.0.3703.2',
 588         '73.0.3683.36',
 589         '74.0.3703.1',
 590         '74.0.3703.0',
 591         '73.0.3683.35',
 592         '72.0.3626.108',
 593         '74.0.3702.2',
 594         '74.0.3699.3',
 595         '74.0.3702.1',
 596         '74.0.3702.0',
 597         '73.0.3683.34',
 598         '72.0.3626.107',
 599         '73.0.3683.33',
 600         '74.0.3701.1',
 601         '74.0.3701.0',
 602         '73.0.3683.32',
 603         '73.0.3683.31',
 604         '72.0.3626.105',
 605         '74.0.3700.1',
 606         '74.0.3700.0',
 607         '73.0.3683.29',
 608         '72.0.3626.103',
 609         '74.0.3699.2',
 610         '74.0.3699.1',
 611         '74.0.3699.0',
 612         '73.0.3683.28',
 613         '72.0.3626.102',
 614         '73.0.3683.27',
 615         '73.0.3683.26',
 616         '74.0.3698.0',
 617         '74.0.3696.2',
 618         '72.0.3626.101',
 619         '73.0.3683.25',
 620         '74.0.3696.1',
 621         '74.0.3696.0',
 622         '74.0.3694.8',
 623         '72.0.3626.100',
 624         '74.0.3694.7',
 625         '74.0.3694.6',
 626         '74.0.3694.5',
 627         '74.0.3694.4',
 628         '72.0.3626.99',
 629         '72.0.3626.98',
 630         '74.0.3694.3',
 631         '73.0.3683.24',
 632         '72.0.3626.97',
 633         '72.0.3626.96',
 634         '72.0.3626.95',
 635         '73.0.3683.23',
 636         '72.0.3626.94',
 637         '73.0.3683.22',
 638         '73.0.3683.21',
 639         '72.0.3626.93',
 640         '74.0.3694.2',
 641         '72.0.3626.92',
 642         '74.0.3694.1',
 643         '74.0.3694.0',
 644         '74.0.3693.6',
 645         '73.0.3683.20',
 646         '72.0.3626.91',
 647         '74.0.3693.5',
 648         '74.0.3693.4',
 649         '74.0.3693.3',
 650         '74.0.3693.2',
 651         '73.0.3683.19',
 652         '74.0.3693.1',
 653         '74.0.3693.0',
 654         '73.0.3683.18',
 655         '72.0.3626.90',
 656         '74.0.3692.1',
 657         '74.0.3692.0',
 658         '73.0.3683.17',
 659         '72.0.3626.89',
 660         '74.0.3687.3',
 661         '74.0.3691.1',
 662         '74.0.3691.0',
 663         '73.0.3683.16',
 664         '72.0.3626.88',
 665         '72.0.3626.87',
 666         '73.0.3683.15',
 667         '74.0.3690.1',
 668         '74.0.3690.0',
 669         '73.0.3683.14',
 670         '72.0.3626.86',
 671         '73.0.3683.13',
 672         '73.0.3683.12',
 673         '74.0.3689.1',
 674         '74.0.3689.0',
 675         '73.0.3683.11',
 676         '72.0.3626.85',
 677         '73.0.3683.10',
 678         '72.0.3626.84',
 679         '73.0.3683.9',
 680         '74.0.3688.1',
 681         '74.0.3688.0',
 682         '73.0.3683.8',
 683         '72.0.3626.83',
 684         '74.0.3687.2',
 685         '74.0.3687.1',
 686         '74.0.3687.0',
 687         '73.0.3683.7',
 688         '72.0.3626.82',
 689         '74.0.3686.4',
 690         '72.0.3626.81',
 691         '74.0.3686.3',
 692         '74.0.3686.2',
 693         '74.0.3686.1',
 694         '74.0.3686.0',
 695         '73.0.3683.6',
 696         '72.0.3626.80',
 697         '74.0.3685.1',
 698         '74.0.3685.0',
 699         '73.0.3683.5',
 700         '72.0.3626.79',
 701         '74.0.3684.1',
 702         '74.0.3684.0',
 703         '73.0.3683.4',
 704         '72.0.3626.78',
 705         '72.0.3626.77',
 706         '73.0.3683.3',
 707         '73.0.3683.2',
 708         '72.0.3626.76',
 709         '73.0.3683.1',
 710         '73.0.3683.0',
 711         '72.0.3626.75',
 712         '71.0.3578.141',
 713         '73.0.3682.1',
 714         '73.0.3682.0',
 715         '72.0.3626.74',
 716         '71.0.3578.140',
 717         '73.0.3681.4',
 718         '73.0.3681.3',
 719         '73.0.3681.2',
 720         '73.0.3681.1',
 721         '73.0.3681.0',
 722         '72.0.3626.73',
 723         '71.0.3578.139',
 724         '72.0.3626.72',
 725         '72.0.3626.71',
 726         '73.0.3680.1',
 727         '73.0.3680.0',
 728         '72.0.3626.70',
 729         '71.0.3578.138',
 730         '73.0.3678.2',
 731         '73.0.3679.1',
 732         '73.0.3679.0',
 733         '72.0.3626.69',
 734         '71.0.3578.137',
 735         '73.0.3678.1',
 736         '73.0.3678.0',
 737         '71.0.3578.136',
 738         '73.0.3677.1',
 739         '73.0.3677.0',
 740         '72.0.3626.68',
 741         '72.0.3626.67',
 742         '71.0.3578.135',
 743         '73.0.3676.1',
 744         '73.0.3676.0',
 745         '73.0.3674.2',
 746         '72.0.3626.66',
 747         '71.0.3578.134',
 748         '73.0.3674.1',
 749         '73.0.3674.0',
 750         '72.0.3626.65',
 751         '71.0.3578.133',
 752         '73.0.3673.2',
 753         '73.0.3673.1',
 754         '73.0.3673.0',
 755         '72.0.3626.64',
 756         '71.0.3578.132',
 757         '72.0.3626.63',
 758         '72.0.3626.62',
 759         '72.0.3626.61',
 760         '72.0.3626.60',
 761         '73.0.3672.1',
 762         '73.0.3672.0',
 763         '72.0.3626.59',
 764         '71.0.3578.131',
 765         '73.0.3671.3',
 766         '73.0.3671.2',
 767         '73.0.3671.1',
 768         '73.0.3671.0',
 769         '72.0.3626.58',
 770         '71.0.3578.130',
 771         '73.0.3670.1',
 772         '73.0.3670.0',
 773         '72.0.3626.57',
 774         '71.0.3578.129',
 775         '73.0.3669.1',
 776         '73.0.3669.0',
 777         '72.0.3626.56',
 778         '71.0.3578.128',
 779         '73.0.3668.2',
 780         '73.0.3668.1',
 781         '73.0.3668.0',
 782         '72.0.3626.55',
 783         '71.0.3578.127',
 784         '73.0.3667.2',
 785         '73.0.3667.1',
 786         '73.0.3667.0',
 787         '72.0.3626.54',
 788         '71.0.3578.126',
 789         '73.0.3666.1',
 790         '73.0.3666.0',
 791         '72.0.3626.53',
 792         '71.0.3578.125',
 793         '73.0.3665.4',
 794         '73.0.3665.3',
 795         '72.0.3626.52',
 796         '73.0.3665.2',
 797         '73.0.3664.4',
 798         '73.0.3665.1',
 799         '73.0.3665.0',
 800         '72.0.3626.51',
 801         '71.0.3578.124',
 802         '72.0.3626.50',
 803         '73.0.3664.3',
 804         '73.0.3664.2',
 805         '73.0.3664.1',
 806         '73.0.3664.0',
 807         '73.0.3663.2',
 808         '72.0.3626.49',
 809         '71.0.3578.123',
 810         '73.0.3663.1',
 811         '73.0.3663.0',
 812         '72.0.3626.48',
 813         '71.0.3578.122',
 814         '73.0.3662.1',
 815         '73.0.3662.0',
 816         '72.0.3626.47',
 817         '71.0.3578.121',
 818         '73.0.3661.1',
 819         '72.0.3626.46',
 820         '73.0.3661.0',
 821         '72.0.3626.45',
 822         '71.0.3578.120',
 823         '73.0.3660.2',
 824         '73.0.3660.1',
 825         '73.0.3660.0',
 826         '72.0.3626.44',
 827         '71.0.3578.119',
 828         '73.0.3659.1',
 829         '73.0.3659.0',
 830         '72.0.3626.43',
 831         '71.0.3578.118',
 832         '73.0.3658.1',
 833         '73.0.3658.0',
 834         '72.0.3626.42',
 835         '71.0.3578.117',
 836         '73.0.3657.1',
 837         '73.0.3657.0',
 838         '72.0.3626.41',
 839         '71.0.3578.116',
 840         '73.0.3656.1',
 841         '73.0.3656.0',
 842         '72.0.3626.40',
 843         '71.0.3578.115',
 844         '73.0.3655.1',
 845         '73.0.3655.0',
 846         '72.0.3626.39',
 847         '71.0.3578.114',
 848         '73.0.3654.1',
 849         '73.0.3654.0',
 850         '72.0.3626.38',
 851         '71.0.3578.113',
 852         '73.0.3653.1',
 853         '73.0.3653.0',
 854         '72.0.3626.37',
 855         '71.0.3578.112',
 856         '73.0.3652.1',
 857         '73.0.3652.0',
 858         '72.0.3626.36',
 859         '71.0.3578.111',
 860         '73.0.3651.1',
 861         '73.0.3651.0',
 862         '72.0.3626.35',
 863         '71.0.3578.110',
 864         '73.0.3650.1',
 865         '73.0.3650.0',
 866         '72.0.3626.34',
 867         '71.0.3578.109',
 868         '73.0.3649.1',
 869         '73.0.3649.0',
 870         '72.0.3626.33',
 871         '71.0.3578.108',
 872         '73.0.3648.2',
 873         '73.0.3648.1',
 874         '73.0.3648.0',
 875         '72.0.3626.32',
 876         '71.0.3578.107',
 877         '73.0.3647.2',
 878         '73.0.3647.1',
 879         '73.0.3647.0',
 880         '72.0.3626.31',
 881         '71.0.3578.106',
 882         '73.0.3635.3',
 883         '73.0.3646.2',
 884         '73.0.3646.1',
 885         '73.0.3646.0',
 886         '72.0.3626.30',
 887         '71.0.3578.105',
 888         '72.0.3626.29',
 889         '73.0.3645.2',
 890         '73.0.3645.1',
 891         '73.0.3645.0',
 892         '72.0.3626.28',
 893         '71.0.3578.104',
 894         '72.0.3626.27',
 895         '72.0.3626.26',
 896         '72.0.3626.25',
 897         '72.0.3626.24',
 898         '73.0.3644.0',
 899         '73.0.3643.2',
 900         '72.0.3626.23',
 901         '71.0.3578.103',
 902         '73.0.3643.1',
 903         '73.0.3643.0',
 904         '72.0.3626.22',
 905         '71.0.3578.102',
 906         '73.0.3642.1',
 907         '73.0.3642.0',
 908         '72.0.3626.21',
 909         '71.0.3578.101',
 910         '73.0.3641.1',
 911         '73.0.3641.0',
 912         '72.0.3626.20',
 913         '71.0.3578.100',
 914         '72.0.3626.19',
 915         '73.0.3640.1',
 916         '73.0.3640.0',
 917         '72.0.3626.18',
 918         '73.0.3639.1',
 919         '71.0.3578.99',
 920         '73.0.3639.0',
 921         '72.0.3626.17',
 922         '73.0.3638.2',
 923         '72.0.3626.16',
 924         '73.0.3638.1',
 925         '73.0.3638.0',
 926         '72.0.3626.15',
 927         '71.0.3578.98',
 928         '73.0.3635.2',
 929         '71.0.3578.97',
 930         '73.0.3637.1',
 931         '73.0.3637.0',
 932         '72.0.3626.14',
 933         '71.0.3578.96',
 934         '71.0.3578.95',
 935         '72.0.3626.13',
 936         '71.0.3578.94',
 937         '73.0.3636.2',
 938         '71.0.3578.93',
 939         '73.0.3636.1',
 940         '73.0.3636.0',
 941         '72.0.3626.12',
 942         '71.0.3578.92',
 943         '73.0.3635.1',
 944         '73.0.3635.0',
 945         '72.0.3626.11',
 946         '71.0.3578.91',
 947         '73.0.3634.2',
 948         '73.0.3634.1',
 949         '73.0.3634.0',
 950         '72.0.3626.10',
 951         '71.0.3578.90',
 952         '71.0.3578.89',
 953         '73.0.3633.2',
 954         '73.0.3633.1',
 955         '73.0.3633.0',
 956         '72.0.3610.4',
 957         '72.0.3626.9',
 958         '71.0.3578.88',
 959         '73.0.3632.5',
 960         '73.0.3632.4',
 961         '73.0.3632.3',
 962         '73.0.3632.2',
 963         '73.0.3632.1',
 964         '73.0.3632.0',
 965         '72.0.3626.8',
 966         '71.0.3578.87',
 967         '73.0.3631.2',
 968         '73.0.3631.1',
 969         '73.0.3631.0',
 970         '72.0.3626.7',
 971         '71.0.3578.86',
 972         '72.0.3626.6',
 973         '73.0.3630.1',
 974         '73.0.3630.0',
 975         '72.0.3626.5',
 976         '71.0.3578.85',
 977         '72.0.3626.4',
 978         '73.0.3628.3',
 979         '73.0.3628.2',
 980         '73.0.3629.1',
 981         '73.0.3629.0',
 982         '72.0.3626.3',
 983         '71.0.3578.84',
 984         '73.0.3628.1',
 985         '73.0.3628.0',
 986         '71.0.3578.83',
 987         '73.0.3627.1',
 988         '73.0.3627.0',
 989         '72.0.3626.2',
 990         '71.0.3578.82',
 991         '71.0.3578.81',
 992         '71.0.3578.80',
 993         '72.0.3626.1',
 994         '72.0.3626.0',
 995         '71.0.3578.79',
 996         '70.0.3538.124',
 997         '71.0.3578.78',
 998         '72.0.3623.4',
 999         '72.0.3625.2',
1000         '72.0.3625.1',
1001         '72.0.3625.0',
1002         '71.0.3578.77',
1003         '70.0.3538.123',
1004         '72.0.3624.4',
1005         '72.0.3624.3',
1006         '72.0.3624.2',
1007         '71.0.3578.76',
1008         '72.0.3624.1',
1009         '72.0.3624.0',
1010         '72.0.3623.3',
1011         '71.0.3578.75',
1012         '70.0.3538.122',
1013         '71.0.3578.74',
1014         '72.0.3623.2',
1015         '72.0.3610.3',
1016         '72.0.3623.1',
1017         '72.0.3623.0',
1018         '72.0.3622.3',
1019         '72.0.3622.2',
1020         '71.0.3578.73',
1021         '70.0.3538.121',
1022         '72.0.3622.1',
1023         '72.0.3622.0',
1024         '71.0.3578.72',
1025         '70.0.3538.120',
1026         '72.0.3621.1',
1027         '72.0.3621.0',
1028         '71.0.3578.71',
1029         '70.0.3538.119',
1030         '72.0.3620.1',
1031         '72.0.3620.0',
1032         '71.0.3578.70',
1033         '70.0.3538.118',
1034         '71.0.3578.69',
1035         '72.0.3619.1',
1036         '72.0.3619.0',
1037         '71.0.3578.68',
1038         '70.0.3538.117',
1039         '71.0.3578.67',
1040         '72.0.3618.1',
1041         '72.0.3618.0',
1042         '71.0.3578.66',
1043         '70.0.3538.116',
1044         '72.0.3617.1',
1045         '72.0.3617.0',
1046         '71.0.3578.65',
1047         '70.0.3538.115',
1048         '72.0.3602.3',
1049         '71.0.3578.64',
1050         '72.0.3616.1',
1051         '72.0.3616.0',
1052         '71.0.3578.63',
1053         '70.0.3538.114',
1054         '71.0.3578.62',
1055         '72.0.3615.1',
1056         '72.0.3615.0',
1057         '71.0.3578.61',
1058         '70.0.3538.113',
1059         '72.0.3614.1',
1060         '72.0.3614.0',
1061         '71.0.3578.60',
1062         '70.0.3538.112',
1063         '72.0.3613.1',
1064         '72.0.3613.0',
1065         '71.0.3578.59',
1066         '70.0.3538.111',
1067         '72.0.3612.2',
1068         '72.0.3612.1',
1069         '72.0.3612.0',
1070         '70.0.3538.110',
1071         '71.0.3578.58',
1072         '70.0.3538.109',
1073         '72.0.3611.2',
1074         '72.0.3611.1',
1075         '72.0.3611.0',
1076         '71.0.3578.57',
1077         '70.0.3538.108',
1078         '72.0.3610.2',
1079         '71.0.3578.56',
1080         '71.0.3578.55',
1081         '72.0.3610.1',
1082         '72.0.3610.0',
1083         '71.0.3578.54',
1084         '70.0.3538.107',
1085         '71.0.3578.53',
1086         '72.0.3609.3',
1087         '71.0.3578.52',
1088         '72.0.3609.2',
1089         '71.0.3578.51',
1090         '72.0.3608.5',
1091         '72.0.3609.1',
1092         '72.0.3609.0',
1093         '71.0.3578.50',
1094         '70.0.3538.106',
1095         '72.0.3608.4',
1096         '72.0.3608.3',
1097         '72.0.3608.2',
1098         '71.0.3578.49',
1099         '72.0.3608.1',
1100         '72.0.3608.0',
1101         '70.0.3538.105',
1102         '71.0.3578.48',
1103         '72.0.3607.1',
1104         '72.0.3607.0',
1105         '71.0.3578.47',
1106         '70.0.3538.104',
1107         '72.0.3606.2',
1108         '72.0.3606.1',
1109         '72.0.3606.0',
1110         '71.0.3578.46',
1111         '70.0.3538.103',
1112         '70.0.3538.102',
1113         '72.0.3605.3',
1114         '72.0.3605.2',
1115         '72.0.3605.1',
1116         '72.0.3605.0',
1117         '71.0.3578.45',
1118         '70.0.3538.101',
1119         '71.0.3578.44',
1120         '71.0.3578.43',
1121         '70.0.3538.100',
1122         '70.0.3538.99',
1123         '71.0.3578.42',
1124         '72.0.3604.1',
1125         '72.0.3604.0',
1126         '71.0.3578.41',
1127         '70.0.3538.98',
1128         '71.0.3578.40',
1129         '72.0.3603.2',
1130         '72.0.3603.1',
1131         '72.0.3603.0',
1132         '71.0.3578.39',
1133         '70.0.3538.97',
1134         '72.0.3602.2',
1135         '71.0.3578.38',
1136         '71.0.3578.37',
1137         '72.0.3602.1',
1138         '72.0.3602.0',
1139         '71.0.3578.36',
1140         '70.0.3538.96',
1141         '72.0.3601.1',
1142         '72.0.3601.0',
1143         '71.0.3578.35',
1144         '70.0.3538.95',
1145         '72.0.3600.1',
1146         '72.0.3600.0',
1147         '71.0.3578.34',
1148         '70.0.3538.94',
1149         '72.0.3599.3',
1150         '72.0.3599.2',
1151         '72.0.3599.1',
1152         '72.0.3599.0',
1153         '71.0.3578.33',
1154         '70.0.3538.93',
1155         '72.0.3598.1',
1156         '72.0.3598.0',
1157         '71.0.3578.32',
1158         '70.0.3538.87',
1159         '72.0.3597.1',
1160         '72.0.3597.0',
1161         '72.0.3596.2',
1162         '71.0.3578.31',
1163         '70.0.3538.86',
1164         '71.0.3578.30',
1165         '71.0.3578.29',
1166         '72.0.3596.1',
1167         '72.0.3596.0',
1168         '71.0.3578.28',
1169         '70.0.3538.85',
1170         '72.0.3595.2',
1171         '72.0.3591.3',
1172         '72.0.3595.1',
1173         '72.0.3595.0',
1174         '71.0.3578.27',
1175         '70.0.3538.84',
1176         '72.0.3594.1',
1177         '72.0.3594.0',
1178         '71.0.3578.26',
1179         '70.0.3538.83',
1180         '72.0.3593.2',
1181         '72.0.3593.1',
1182         '72.0.3593.0',
1183         '71.0.3578.25',
1184         '70.0.3538.82',
1185         '72.0.3589.3',
1186         '72.0.3592.2',
1187         '72.0.3592.1',
1188         '72.0.3592.0',
1189         '71.0.3578.24',
1190         '72.0.3589.2',
1191         '70.0.3538.81',
1192         '70.0.3538.80',
1193         '72.0.3591.2',
1194         '72.0.3591.1',
1195         '72.0.3591.0',
1196         '71.0.3578.23',
1197         '70.0.3538.79',
1198         '71.0.3578.22',
1199         '72.0.3590.1',
1200         '72.0.3590.0',
1201         '71.0.3578.21',
1202         '70.0.3538.78',
1203         '70.0.3538.77',
1204         '72.0.3589.1',
1205         '72.0.3589.0',
1206         '71.0.3578.20',
1207         '70.0.3538.76',
1208         '71.0.3578.19',
1209         '70.0.3538.75',
1210         '72.0.3588.1',
1211         '72.0.3588.0',
1212         '71.0.3578.18',
1213         '70.0.3538.74',
1214         '72.0.3586.2',
1215         '72.0.3587.0',
1216         '71.0.3578.17',
1217         '70.0.3538.73',
1218         '72.0.3586.1',
1219         '72.0.3586.0',
1220         '71.0.3578.16',
1221         '70.0.3538.72',
1222         '72.0.3585.1',
1223         '72.0.3585.0',
1224         '71.0.3578.15',
1225         '70.0.3538.71',
1226         '71.0.3578.14',
1227         '72.0.3584.1',
1228         '72.0.3584.0',
1229         '71.0.3578.13',
1230         '70.0.3538.70',
1231         '72.0.3583.2',
1232         '71.0.3578.12',
1233         '72.0.3583.1',
1234         '72.0.3583.0',
1235         '71.0.3578.11',
1236         '70.0.3538.69',
1237         '71.0.3578.10',
1238         '72.0.3582.0',
1239         '72.0.3581.4',
1240         '71.0.3578.9',
1241         '70.0.3538.67',
1242         '72.0.3581.3',
1243         '72.0.3581.2',
1244         '72.0.3581.1',
1245         '72.0.3581.0',
1246         '71.0.3578.8',
1247         '70.0.3538.66',
1248         '72.0.3580.1',
1249         '72.0.3580.0',
1250         '71.0.3578.7',
1251         '70.0.3538.65',
1252         '71.0.3578.6',
1253         '72.0.3579.1',
1254         '72.0.3579.0',
1255         '71.0.3578.5',
1256         '70.0.3538.64',
1257         '71.0.3578.4',
1258         '71.0.3578.3',
1259         '71.0.3578.2',
1260         '71.0.3578.1',
1261         '71.0.3578.0',
1262         '70.0.3538.63',
1263         '69.0.3497.128',
1264         '70.0.3538.62',
1265         '70.0.3538.61',
1266         '70.0.3538.60',
1267         '70.0.3538.59',
1268         '71.0.3577.1',
1269         '71.0.3577.0',
1270         '70.0.3538.58',
1271         '69.0.3497.127',
1272         '71.0.3576.2',
1273         '71.0.3576.1',
1274         '71.0.3576.0',
1275         '70.0.3538.57',
1276         '70.0.3538.56',
1277         '71.0.3575.2',
1278         '70.0.3538.55',
1279         '69.0.3497.126',
1280         '70.0.3538.54',
1281         '71.0.3575.1',
1282         '71.0.3575.0',
1283         '71.0.3574.1',
1284         '71.0.3574.0',
1285         '70.0.3538.53',
1286         '69.0.3497.125',
1287         '70.0.3538.52',
1288         '71.0.3573.1',
1289         '71.0.3573.0',
1290         '70.0.3538.51',
1291         '69.0.3497.124',
1292         '71.0.3572.1',
1293         '71.0.3572.0',
1294         '70.0.3538.50',
1295         '69.0.3497.123',
1296         '71.0.3571.2',
1297         '70.0.3538.49',
1298         '69.0.3497.122',
1299         '71.0.3571.1',
1300         '71.0.3571.0',
1301         '70.0.3538.48',
1302         '69.0.3497.121',
1303         '71.0.3570.1',
1304         '71.0.3570.0',
1305         '70.0.3538.47',
1306         '69.0.3497.120',
1307         '71.0.3568.2',
1308         '71.0.3569.1',
1309         '71.0.3569.0',
1310         '70.0.3538.46',
1311         '69.0.3497.119',
1312         '70.0.3538.45',
1313         '71.0.3568.1',
1314         '71.0.3568.0',
1315         '70.0.3538.44',
1316         '69.0.3497.118',
1317         '70.0.3538.43',
1318         '70.0.3538.42',
1319         '71.0.3567.1',
1320         '71.0.3567.0',
1321         '70.0.3538.41',
1322         '69.0.3497.117',
1323         '71.0.3566.1',
1324         '71.0.3566.0',
1325         '70.0.3538.40',
1326         '69.0.3497.116',
1327         '71.0.3565.1',
1328         '71.0.3565.0',
1329         '70.0.3538.39',
1330         '69.0.3497.115',
1331         '71.0.3564.1',
1332         '71.0.3564.0',
1333         '70.0.3538.38',
1334         '69.0.3497.114',
1335         '71.0.3563.0',
1336         '71.0.3562.2',
1337         '70.0.3538.37',
1338         '69.0.3497.113',
1339         '70.0.3538.36',
1340         '70.0.3538.35',
1341         '71.0.3562.1',
1342         '71.0.3562.0',
1343         '70.0.3538.34',
1344         '69.0.3497.112',
1345         '70.0.3538.33',
1346         '71.0.3561.1',
1347         '71.0.3561.0',
1348         '70.0.3538.32',
1349         '69.0.3497.111',
1350         '71.0.3559.6',
1351         '71.0.3560.1',
1352         '71.0.3560.0',
1353         '71.0.3559.5',
1354         '71.0.3559.4',
1355         '70.0.3538.31',
1356         '69.0.3497.110',
1357         '71.0.3559.3',
1358         '70.0.3538.30',
1359         '69.0.3497.109',
1360         '71.0.3559.2',
1361         '71.0.3559.1',
1362         '71.0.3559.0',
1363         '70.0.3538.29',
1364         '69.0.3497.108',
1365         '71.0.3558.2',
1366         '71.0.3558.1',
1367         '71.0.3558.0',
1368         '70.0.3538.28',
1369         '69.0.3497.107',
1370         '71.0.3557.2',
1371         '71.0.3557.1',
1372         '71.0.3557.0',
1373         '70.0.3538.27',
1374         '69.0.3497.106',
1375         '71.0.3554.4',
1376         '70.0.3538.26',
1377         '71.0.3556.1',
1378         '71.0.3556.0',
1379         '70.0.3538.25',
1380         '71.0.3554.3',
1381         '69.0.3497.105',
1382         '71.0.3554.2',
1383         '70.0.3538.24',
1384         '69.0.3497.104',
1385         '71.0.3555.2',
1386         '70.0.3538.23',
1387         '71.0.3555.1',
1388         '71.0.3555.0',
1389         '70.0.3538.22',
1390         '69.0.3497.103',
1391         '71.0.3554.1',
1392         '71.0.3554.0',
1393         '70.0.3538.21',
1394         '69.0.3497.102',
1395         '71.0.3553.3',
1396         '70.0.3538.20',
1397         '69.0.3497.101',
1398         '71.0.3553.2',
1399         '69.0.3497.100',
1400         '71.0.3553.1',
1401         '71.0.3553.0',
1402         '70.0.3538.19',
1403         '69.0.3497.99',
1404         '69.0.3497.98',
1405         '69.0.3497.97',
1406         '71.0.3552.6',
1407         '71.0.3552.5',
1408         '71.0.3552.4',
1409         '71.0.3552.3',
1410         '71.0.3552.2',
1411         '71.0.3552.1',
1412         '71.0.3552.0',
1413         '70.0.3538.18',
1414         '69.0.3497.96',
1415         '71.0.3551.3',
1416         '71.0.3551.2',
1417         '71.0.3551.1',
1418         '71.0.3551.0',
1419         '70.0.3538.17',
1420         '69.0.3497.95',
1421         '71.0.3550.3',
1422         '71.0.3550.2',
1423         '71.0.3550.1',
1424         '71.0.3550.0',
1425         '70.0.3538.16',
1426         '69.0.3497.94',
1427         '71.0.3549.1',
1428         '71.0.3549.0',
1429         '70.0.3538.15',
1430         '69.0.3497.93',
1431         '69.0.3497.92',
1432         '71.0.3548.1',
1433         '71.0.3548.0',
1434         '70.0.3538.14',
1435         '69.0.3497.91',
1436         '71.0.3547.1',
1437         '71.0.3547.0',
1438         '70.0.3538.13',
1439         '69.0.3497.90',
1440         '71.0.3546.2',
1441         '69.0.3497.89',
1442         '71.0.3546.1',
1443         '71.0.3546.0',
1444         '70.0.3538.12',
1445         '69.0.3497.88',
1446         '71.0.3545.4',
1447         '71.0.3545.3',
1448         '71.0.3545.2',
1449         '71.0.3545.1',
1450         '71.0.3545.0',
1451         '70.0.3538.11',
1452         '69.0.3497.87',
1453         '71.0.3544.5',
1454         '71.0.3544.4',
1455         '71.0.3544.3',
1456         '71.0.3544.2',
1457         '71.0.3544.1',
1458         '71.0.3544.0',
1459         '69.0.3497.86',
1460         '70.0.3538.10',
1461         '69.0.3497.85',
1462         '70.0.3538.9',
1463         '69.0.3497.84',
1464         '71.0.3543.4',
1465         '70.0.3538.8',
1466         '71.0.3543.3',
1467         '71.0.3543.2',
1468         '71.0.3543.1',
1469         '71.0.3543.0',
1470         '70.0.3538.7',
1471         '69.0.3497.83',
1472         '71.0.3542.2',
1473         '71.0.3542.1',
1474         '71.0.3542.0',
1475         '70.0.3538.6',
1476         '69.0.3497.82',
1477         '69.0.3497.81',
1478         '71.0.3541.1',
1479         '71.0.3541.0',
1480         '70.0.3538.5',
1481         '69.0.3497.80',
1482         '71.0.3540.1',
1483         '71.0.3540.0',
1484         '70.0.3538.4',
1485         '69.0.3497.79',
1486         '70.0.3538.3',
1487         '71.0.3539.1',
1488         '71.0.3539.0',
1489         '69.0.3497.78',
1490         '68.0.3440.134',
1491         '69.0.3497.77',
1492         '70.0.3538.2',
1493         '70.0.3538.1',
1494         '70.0.3538.0',
1495         '69.0.3497.76',
1496         '68.0.3440.133',
1497         '69.0.3497.75',
1498         '70.0.3537.2',
1499         '70.0.3537.1',
1500         '70.0.3537.0',
1501         '69.0.3497.74',
1502         '68.0.3440.132',
1503         '70.0.3536.0',
1504         '70.0.3535.5',
1505         '70.0.3535.4',
1506         '70.0.3535.3',
1507         '69.0.3497.73',
1508         '68.0.3440.131',
1509         '70.0.3532.8',
1510         '70.0.3532.7',
1511         '69.0.3497.72',
1512         '69.0.3497.71',
1513         '70.0.3535.2',
1514         '70.0.3535.1',
1515         '70.0.3535.0',
1516         '69.0.3497.70',
1517         '68.0.3440.130',
1518         '69.0.3497.69',
1519         '68.0.3440.129',
1520         '70.0.3534.4',
1521         '70.0.3534.3',
1522         '70.0.3534.2',
1523         '70.0.3534.1',
1524         '70.0.3534.0',
1525         '69.0.3497.68',
1526         '68.0.3440.128',
1527         '70.0.3533.2',
1528         '70.0.3533.1',
1529         '70.0.3533.0',
1530         '69.0.3497.67',
1531         '68.0.3440.127',
1532         '70.0.3532.6',
1533         '70.0.3532.5',
1534         '70.0.3532.4',
1535         '69.0.3497.66',
1536         '68.0.3440.126',
1537         '70.0.3532.3',
1538         '70.0.3532.2',
1539         '70.0.3532.1',
1540         '69.0.3497.60',
1541         '69.0.3497.65',
1542         '69.0.3497.64',
1543         '70.0.3532.0',
1544         '70.0.3531.0',
1545         '70.0.3530.4',
1546         '70.0.3530.3',
1547         '70.0.3530.2',
1548         '69.0.3497.58',
1549         '68.0.3440.125',
1550         '69.0.3497.57',
1551         '69.0.3497.56',
1552         '69.0.3497.55',
1553         '69.0.3497.54',
1554         '70.0.3530.1',
1555         '70.0.3530.0',
1556         '69.0.3497.53',
1557         '68.0.3440.124',
1558         '69.0.3497.52',
1559         '70.0.3529.3',
1560         '70.0.3529.2',
1561         '70.0.3529.1',
1562         '70.0.3529.0',
1563         '69.0.3497.51',
1564         '70.0.3528.4',
1565         '68.0.3440.123',
1566         '70.0.3528.3',
1567         '70.0.3528.2',
1568         '70.0.3528.1',
1569         '70.0.3528.0',
1570         '69.0.3497.50',
1571         '68.0.3440.122',
1572         '70.0.3527.1',
1573         '70.0.3527.0',
1574         '69.0.3497.49',
1575         '68.0.3440.121',
1576         '70.0.3526.1',
1577         '70.0.3526.0',
1578         '68.0.3440.120',
1579         '69.0.3497.48',
1580         '69.0.3497.47',
1581         '68.0.3440.119',
1582         '68.0.3440.118',
1583         '70.0.3525.5',
1584         '70.0.3525.4',
1585         '70.0.3525.3',
1586         '68.0.3440.117',
1587         '69.0.3497.46',
1588         '70.0.3525.2',
1589         '70.0.3525.1',
1590         '70.0.3525.0',
1591         '69.0.3497.45',
1592         '68.0.3440.116',
1593         '70.0.3524.4',
1594         '70.0.3524.3',
1595         '69.0.3497.44',
1596         '70.0.3524.2',
1597         '70.0.3524.1',
1598         '70.0.3524.0',
1599         '70.0.3523.2',
1600         '69.0.3497.43',
1601         '68.0.3440.115',
1602         '70.0.3505.9',
1603         '69.0.3497.42',
1604         '70.0.3505.8',
1605         '70.0.3523.1',
1606         '70.0.3523.0',
1607         '69.0.3497.41',
1608         '68.0.3440.114',
1609         '70.0.3505.7',
1610         '69.0.3497.40',
1611         '70.0.3522.1',
1612         '70.0.3522.0',
1613         '70.0.3521.2',
1614         '69.0.3497.39',
1615         '68.0.3440.113',
1616         '70.0.3505.6',
1617         '70.0.3521.1',
1618         '70.0.3521.0',
1619         '69.0.3497.38',
1620         '68.0.3440.112',
1621         '70.0.3520.1',
1622         '70.0.3520.0',
1623         '69.0.3497.37',
1624         '68.0.3440.111',
1625         '70.0.3519.3',
1626         '70.0.3519.2',
1627         '70.0.3519.1',
1628         '70.0.3519.0',
1629         '69.0.3497.36',
1630         '68.0.3440.110',
1631         '70.0.3518.1',
1632         '70.0.3518.0',
1633         '69.0.3497.35',
1634         '69.0.3497.34',
1635         '68.0.3440.109',
1636         '70.0.3517.1',
1637         '70.0.3517.0',
1638         '69.0.3497.33',
1639         '68.0.3440.108',
1640         '69.0.3497.32',
1641         '70.0.3516.3',
1642         '70.0.3516.2',
1643         '70.0.3516.1',
1644         '70.0.3516.0',
1645         '69.0.3497.31',
1646         '68.0.3440.107',
1647         '70.0.3515.4',
1648         '68.0.3440.106',
1649         '70.0.3515.3',
1650         '70.0.3515.2',
1651         '70.0.3515.1',
1652         '70.0.3515.0',
1653         '69.0.3497.30',
1654         '68.0.3440.105',
1655         '68.0.3440.104',
1656         '70.0.3514.2',
1657         '70.0.3514.1',
1658         '70.0.3514.0',
1659         '69.0.3497.29',
1660         '68.0.3440.103',
1661         '70.0.3513.1',
1662         '70.0.3513.0',
1663         '69.0.3497.28',
1664     )
1665     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1666
1667
1668 std_headers = {
1669     'User-Agent': random_user_agent(),
1670     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1671     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1672     'Accept-Encoding': 'gzip, deflate',
1673     'Accept-Language': 'en-us,en;q=0.5',
1674 }
1675
1676
1677 USER_AGENTS = {
1678     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1679 }
1680
1681
1682 NO_DEFAULT = object()
1683
1684 ENGLISH_MONTH_NAMES = [
1685     'January', 'February', 'March', 'April', 'May', 'June',
1686     'July', 'August', 'September', 'October', 'November', 'December']
1687
1688 MONTH_NAMES = {
1689     'en': ENGLISH_MONTH_NAMES,
1690     'fr': [
1691         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1692         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1693 }
1694
1695 KNOWN_EXTENSIONS = (
1696     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1697     'flv', 'f4v', 'f4a', 'f4b',
1698     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1699     'mkv', 'mka', 'mk3d',
1700     'avi', 'divx',
1701     'mov',
1702     'asf', 'wmv', 'wma',
1703     '3gp', '3g2',
1704     'mp3',
1705     'flac',
1706     'ape',
1707     'wav',
1708     'f4f', 'f4m', 'm3u8', 'smil')
1709
1710 # needed for sanitizing filenames in restricted mode
1711 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1712                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1713                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1714
1715 DATE_FORMATS = (
1716     '%d %B %Y',
1717     '%d %b %Y',
1718     '%B %d %Y',
1719     '%B %dst %Y',
1720     '%B %dnd %Y',
1721     '%B %drd %Y',
1722     '%B %dth %Y',
1723     '%b %d %Y',
1724     '%b %dst %Y',
1725     '%b %dnd %Y',
1726     '%b %drd %Y',
1727     '%b %dth %Y',
1728     '%b %dst %Y %I:%M',
1729     '%b %dnd %Y %I:%M',
1730     '%b %drd %Y %I:%M',
1731     '%b %dth %Y %I:%M',
1732     '%Y %m %d',
1733     '%Y-%m-%d',
1734     '%Y/%m/%d',
1735     '%Y/%m/%d %H:%M',
1736     '%Y/%m/%d %H:%M:%S',
1737     '%Y-%m-%d %H:%M',
1738     '%Y-%m-%d %H:%M:%S',
1739     '%Y-%m-%d %H:%M:%S.%f',
1740     '%d.%m.%Y %H:%M',
1741     '%d.%m.%Y %H.%M',
1742     '%Y-%m-%dT%H:%M:%SZ',
1743     '%Y-%m-%dT%H:%M:%S.%fZ',
1744     '%Y-%m-%dT%H:%M:%S.%f0Z',
1745     '%Y-%m-%dT%H:%M:%S',
1746     '%Y-%m-%dT%H:%M:%S.%f',
1747     '%Y-%m-%dT%H:%M',
1748     '%b %d %Y at %H:%M',
1749     '%b %d %Y at %H:%M:%S',
1750     '%B %d %Y at %H:%M',
1751     '%B %d %Y at %H:%M:%S',
1752 )
1753
1754 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1755 DATE_FORMATS_DAY_FIRST.extend([
1756     '%d-%m-%Y',
1757     '%d.%m.%Y',
1758     '%d.%m.%y',
1759     '%d/%m/%Y',
1760     '%d/%m/%y',
1761     '%d/%m/%Y %H:%M:%S',
1762 ])
1763
1764 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_MONTH_FIRST.extend([
1766     '%m-%d-%Y',
1767     '%m.%d.%Y',
1768     '%m/%d/%Y',
1769     '%m/%d/%y',
1770     '%m/%d/%Y %H:%M:%S',
1771 ])
1772
1773 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1774 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1775
1776
1777 def preferredencoding():
1778     """Get preferred encoding.
1779
1780     Returns the best encoding scheme for the system, based on
1781     locale.getpreferredencoding() and some further tweaks.
1782     """
1783     try:
1784         pref = locale.getpreferredencoding()
1785         'TEST'.encode(pref)
1786     except Exception:
1787         pref = 'UTF-8'
1788
1789     return pref
1790
1791
1792 def write_json_file(obj, fn):
1793     """ Encode obj as JSON and write it to fn, atomically if possible """
1794
1795     fn = encodeFilename(fn)
1796     if sys.version_info < (3, 0) and sys.platform != 'win32':
1797         encoding = get_filesystem_encoding()
1798         # os.path.basename returns a bytes object, but NamedTemporaryFile
1799         # will fail if the filename contains non ascii characters unless we
1800         # use a unicode object
1801         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1802         # the same for os.path.dirname
1803         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1804     else:
1805         path_basename = os.path.basename
1806         path_dirname = os.path.dirname
1807
1808     args = {
1809         'suffix': '.tmp',
1810         'prefix': path_basename(fn) + '.',
1811         'dir': path_dirname(fn),
1812         'delete': False,
1813     }
1814
1815     # In Python 2.x, json.dump expects a bytestream.
1816     # In Python 3.x, it writes to a character stream
1817     if sys.version_info < (3, 0):
1818         args['mode'] = 'wb'
1819     else:
1820         args.update({
1821             'mode': 'w',
1822             'encoding': 'utf-8',
1823         })
1824
1825     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1826
1827     try:
1828         with tf:
1829             json.dump(obj, tf)
1830         if sys.platform == 'win32':
1831             # Need to remove existing file on Windows, else os.rename raises
1832             # WindowsError or FileExistsError.
1833             try:
1834                 os.unlink(fn)
1835             except OSError:
1836                 pass
1837         os.rename(tf.name, fn)
1838     except Exception:
1839         try:
1840             os.remove(tf.name)
1841         except OSError:
1842             pass
1843         raise
1844
1845
1846 if sys.version_info >= (2, 7):
1847     def find_xpath_attr(node, xpath, key, val=None):
1848         """ Find the xpath xpath[@key=val] """
1849         assert re.match(r'^[a-zA-Z_-]+$', key)
1850         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1851         return node.find(expr)
1852 else:
1853     def find_xpath_attr(node, xpath, key, val=None):
1854         for f in node.findall(compat_xpath(xpath)):
1855             if key not in f.attrib:
1856                 continue
1857             if val is None or f.attrib.get(key) == val:
1858                 return f
1859         return None
1860
1861 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1862 # the namespace parameter
1863
1864
1865 def xpath_with_ns(path, ns_map):
1866     components = [c.split(':') for c in path.split('/')]
1867     replaced = []
1868     for c in components:
1869         if len(c) == 1:
1870             replaced.append(c[0])
1871         else:
1872             ns, tag = c
1873             replaced.append('{%s}%s' % (ns_map[ns], tag))
1874     return '/'.join(replaced)
1875
1876
1877 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1878     def _find_xpath(xpath):
1879         return node.find(compat_xpath(xpath))
1880
1881     if isinstance(xpath, (str, compat_str)):
1882         n = _find_xpath(xpath)
1883     else:
1884         for xp in xpath:
1885             n = _find_xpath(xp)
1886             if n is not None:
1887                 break
1888
1889     if n is None:
1890         if default is not NO_DEFAULT:
1891             return default
1892         elif fatal:
1893             name = xpath if name is None else name
1894             raise ExtractorError('Could not find XML element %s' % name)
1895         else:
1896             return None
1897     return n
1898
1899
1900 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1901     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1902     if n is None or n == default:
1903         return n
1904     if n.text is None:
1905         if default is not NO_DEFAULT:
1906             return default
1907         elif fatal:
1908             name = xpath if name is None else name
1909             raise ExtractorError('Could not find XML element\'s text %s' % name)
1910         else:
1911             return None
1912     return n.text
1913
1914
1915 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1916     n = find_xpath_attr(node, xpath, key)
1917     if n is None:
1918         if default is not NO_DEFAULT:
1919             return default
1920         elif fatal:
1921             name = '%s[@%s]' % (xpath, key) if name is None else name
1922             raise ExtractorError('Could not find XML attribute %s' % name)
1923         else:
1924             return None
1925     return n.attrib[key]
1926
1927
1928 def get_element_by_id(id, html):
1929     """Return the content of the tag with the specified ID in the passed HTML document"""
1930     return get_element_by_attribute('id', id, html)
1931
1932
1933 def get_element_by_class(class_name, html):
1934     """Return the content of the first tag with the specified class in the passed HTML document"""
1935     retval = get_elements_by_class(class_name, html)
1936     return retval[0] if retval else None
1937
1938
1939 def get_element_by_attribute(attribute, value, html, escape_value=True):
1940     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1941     return retval[0] if retval else None
1942
1943
1944 def get_elements_by_class(class_name, html):
1945     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1946     return get_elements_by_attribute(
1947         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1948         html, escape_value=False)
1949
1950
1951 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1952     """Return the content of the tag with the specified attribute in the passed HTML document"""
1953
1954     value = re.escape(value) if escape_value else value
1955
1956     retlist = []
1957     for m in re.finditer(r'''(?xs)
1958         <([a-zA-Z0-9:._-]+)
1959          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1960          \s+%s=['"]?%s['"]?
1961          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1962         \s*>
1963         (?P<content>.*?)
1964         </\1>
1965     ''' % (re.escape(attribute), value), html):
1966         res = m.group('content')
1967
1968         if res.startswith('"') or res.startswith("'"):
1969             res = res[1:-1]
1970
1971         retlist.append(unescapeHTML(res))
1972
1973     return retlist
1974
1975
1976 class HTMLAttributeParser(compat_HTMLParser):
1977     """Trivial HTML parser to gather the attributes for a single element"""
1978     def __init__(self):
1979         self.attrs = {}
1980         compat_HTMLParser.__init__(self)
1981
1982     def handle_starttag(self, tag, attrs):
1983         self.attrs = dict(attrs)
1984
1985
1986 def extract_attributes(html_element):
1987     """Given a string for an HTML element such as
1988     <el
1989          a="foo" B="bar" c="&98;az" d=boz
1990          empty= noval entity="&amp;"
1991          sq='"' dq="'"
1992     >
1993     Decode and return a dictionary of attributes.
1994     {
1995         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1996         'empty': '', 'noval': None, 'entity': '&',
1997         'sq': '"', 'dq': '\''
1998     }.
1999     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2000     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2001     """
2002     parser = HTMLAttributeParser()
2003     try:
2004         parser.feed(html_element)
2005         parser.close()
2006     # Older Python may throw HTMLParseError in case of malformed HTML
2007     except compat_HTMLParseError:
2008         pass
2009     return parser.attrs
2010
2011
2012 def clean_html(html):
2013     """Clean an HTML snippet into a readable string"""
2014
2015     if html is None:  # Convenience for sanitizing descriptions etc.
2016         return html
2017
2018     # Newline vs <br />
2019     html = html.replace('\n', ' ')
2020     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2021     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2022     # Strip html tags
2023     html = re.sub('<.*?>', '', html)
2024     # Replace html entities
2025     html = unescapeHTML(html)
2026     return html.strip()
2027
2028
2029 def sanitize_open(filename, open_mode):
2030     """Try to open the given filename, and slightly tweak it if this fails.
2031
2032     Attempts to open the given filename. If this fails, it tries to change
2033     the filename slightly, step by step, until it's either able to open it
2034     or it fails and raises a final exception, like the standard open()
2035     function.
2036
2037     It returns the tuple (stream, definitive_file_name).
2038     """
2039     try:
2040         if filename == '-':
2041             if sys.platform == 'win32':
2042                 import msvcrt
2043                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2044             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2045         stream = open(encodeFilename(filename), open_mode)
2046         return (stream, filename)
2047     except (IOError, OSError) as err:
2048         if err.errno in (errno.EACCES,):
2049             raise
2050
2051         # In case of error, try to remove win32 forbidden chars
2052         alt_filename = sanitize_path(filename)
2053         if alt_filename == filename:
2054             raise
2055         else:
2056             # An exception here should be caught in the caller
2057             stream = open(encodeFilename(alt_filename), open_mode)
2058             return (stream, alt_filename)
2059
2060
2061 def timeconvert(timestr):
2062     """Convert RFC 2822 defined time string into system timestamp"""
2063     timestamp = None
2064     timetuple = email.utils.parsedate_tz(timestr)
2065     if timetuple is not None:
2066         timestamp = email.utils.mktime_tz(timetuple)
2067     return timestamp
2068
2069
2070 def sanitize_filename(s, restricted=False, is_id=False):
2071     """Sanitizes a string so it could be used as part of a filename.
2072     If restricted is set, use a stricter subset of allowed characters.
2073     Set is_id if this is not an arbitrary string, but an ID that should be kept
2074     if possible.
2075     """
2076     def replace_insane(char):
2077         if restricted and char in ACCENT_CHARS:
2078             return ACCENT_CHARS[char]
2079         if char == '?' or ord(char) < 32 or ord(char) == 127:
2080             return ''
2081         elif char == '"':
2082             return '' if restricted else '\''
2083         elif char == ':':
2084             return '_-' if restricted else ' -'
2085         elif char in '\\/|*<>':
2086             return '_'
2087         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2088             return '_'
2089         if restricted and ord(char) > 127:
2090             return '_'
2091         return char
2092
2093     # Handle timestamps
2094     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2095     result = ''.join(map(replace_insane, s))
2096     if not is_id:
2097         while '__' in result:
2098             result = result.replace('__', '_')
2099         result = result.strip('_')
2100         # Common case of "Foreign band name - English song title"
2101         if restricted and result.startswith('-_'):
2102             result = result[2:]
2103         if result.startswith('-'):
2104             result = '_' + result[len('-'):]
2105         result = result.lstrip('.')
2106         if not result:
2107             result = '_'
2108     return result
2109
2110
2111 def sanitize_path(s):
2112     """Sanitizes and normalizes path on Windows"""
2113     if sys.platform != 'win32':
2114         return s
2115     drive_or_unc, _ = os.path.splitdrive(s)
2116     if sys.version_info < (2, 7) and not drive_or_unc:
2117         drive_or_unc, _ = os.path.splitunc(s)
2118     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2119     if drive_or_unc:
2120         norm_path.pop(0)
2121     sanitized_path = [
2122         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2123         for path_part in norm_path]
2124     if drive_or_unc:
2125         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2126     return os.path.join(*sanitized_path)
2127
2128
2129 def sanitize_url(url):
2130     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2131     # the number of unwanted failures due to missing protocol
2132     if url.startswith('//'):
2133         return 'http:%s' % url
2134     # Fix some common typos seen so far
2135     COMMON_TYPOS = (
2136         # https://github.com/ytdl-org/youtube-dl/issues/15649
2137         (r'^httpss://', r'https://'),
2138         # https://bx1.be/lives/direct-tv/
2139         (r'^rmtp([es]?)://', r'rtmp\1://'),
2140     )
2141     for mistake, fixup in COMMON_TYPOS:
2142         if re.match(mistake, url):
2143             return re.sub(mistake, fixup, url)
2144     return url
2145
2146
2147 def sanitized_Request(url, *args, **kwargs):
2148     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2149
2150
2151 def expand_path(s):
2152     """Expand shell variables and ~"""
2153     return os.path.expandvars(compat_expanduser(s))
2154
2155
2156 def orderedSet(iterable):
2157     """ Remove all duplicates from the input iterable """
2158     res = []
2159     for el in iterable:
2160         if el not in res:
2161             res.append(el)
2162     return res
2163
2164
2165 def _htmlentity_transform(entity_with_semicolon):
2166     """Transforms an HTML entity to a character."""
2167     entity = entity_with_semicolon[:-1]
2168
2169     # Known non-numeric HTML entity
2170     if entity in compat_html_entities.name2codepoint:
2171         return compat_chr(compat_html_entities.name2codepoint[entity])
2172
2173     # TODO: HTML5 allows entities without a semicolon. For example,
2174     # '&Eacuteric' should be decoded as 'Éric'.
2175     if entity_with_semicolon in compat_html_entities_html5:
2176         return compat_html_entities_html5[entity_with_semicolon]
2177
2178     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2179     if mobj is not None:
2180         numstr = mobj.group(1)
2181         if numstr.startswith('x'):
2182             base = 16
2183             numstr = '0%s' % numstr
2184         else:
2185             base = 10
2186         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2187         try:
2188             return compat_chr(int(numstr, base))
2189         except ValueError:
2190             pass
2191
2192     # Unknown entity in name, return its literal representation
2193     return '&%s;' % entity
2194
2195
2196 def unescapeHTML(s):
2197     if s is None:
2198         return None
2199     assert type(s) == compat_str
2200
2201     return re.sub(
2202         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2203
2204
2205 def get_subprocess_encoding():
2206     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2207         # For subprocess calls, encode with locale encoding
2208         # Refer to http://stackoverflow.com/a/9951851/35070
2209         encoding = preferredencoding()
2210     else:
2211         encoding = sys.getfilesystemencoding()
2212     if encoding is None:
2213         encoding = 'utf-8'
2214     return encoding
2215
2216
2217 def encodeFilename(s, for_subprocess=False):
2218     """
2219     @param s The name of the file
2220     """
2221
2222     assert type(s) == compat_str
2223
2224     # Python 3 has a Unicode API
2225     if sys.version_info >= (3, 0):
2226         return s
2227
2228     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2229     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2230     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2231     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2232         return s
2233
2234     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2235     if sys.platform.startswith('java'):
2236         return s
2237
2238     return s.encode(get_subprocess_encoding(), 'ignore')
2239
2240
2241 def decodeFilename(b, for_subprocess=False):
2242
2243     if sys.version_info >= (3, 0):
2244         return b
2245
2246     if not isinstance(b, bytes):
2247         return b
2248
2249     return b.decode(get_subprocess_encoding(), 'ignore')
2250
2251
2252 def encodeArgument(s):
2253     if not isinstance(s, compat_str):
2254         # Legacy code that uses byte strings
2255         # Uncomment the following line after fixing all post processors
2256         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2257         s = s.decode('ascii')
2258     return encodeFilename(s, True)
2259
2260
2261 def decodeArgument(b):
2262     return decodeFilename(b, True)
2263
2264
2265 def decodeOption(optval):
2266     if optval is None:
2267         return optval
2268     if isinstance(optval, bytes):
2269         optval = optval.decode(preferredencoding())
2270
2271     assert isinstance(optval, compat_str)
2272     return optval
2273
2274
2275 def formatSeconds(secs):
2276     if secs > 3600:
2277         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2278     elif secs > 60:
2279         return '%d:%02d' % (secs // 60, secs % 60)
2280     else:
2281         return '%d' % secs
2282
2283
2284 def make_HTTPS_handler(params, **kwargs):
2285     opts_no_check_certificate = params.get('nocheckcertificate', False)
2286     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2287         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2288         if opts_no_check_certificate:
2289             context.check_hostname = False
2290             context.verify_mode = ssl.CERT_NONE
2291         try:
2292             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2293         except TypeError:
2294             # Python 2.7.8
2295             # (create_default_context present but HTTPSHandler has no context=)
2296             pass
2297
2298     if sys.version_info < (3, 2):
2299         return YoutubeDLHTTPSHandler(params, **kwargs)
2300     else:  # Python < 3.4
2301         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2302         context.verify_mode = (ssl.CERT_NONE
2303                                if opts_no_check_certificate
2304                                else ssl.CERT_REQUIRED)
2305         context.set_default_verify_paths()
2306         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2307
2308
2309 def bug_reports_message():
2310     if ytdl_is_updateable():
2311         update_cmd = 'type  youtube-dl -U  to update'
2312     else:
2313         update_cmd = 'see  https://yt-dl.org/update  on how to update'
2314     msg = '; please report this issue on https://yt-dl.org/bug .'
2315     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2316     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2317     return msg
2318
2319
2320 class YoutubeDLError(Exception):
2321     """Base exception for YoutubeDL errors."""
2322     pass
2323
2324
2325 class ExtractorError(YoutubeDLError):
2326     """Error during info extraction."""
2327
2328     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2329         """ tb, if given, is the original traceback (so that it can be printed out).
2330         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2331         """
2332
2333         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2334             expected = True
2335         if video_id is not None:
2336             msg = video_id + ': ' + msg
2337         if cause:
2338             msg += ' (caused by %r)' % cause
2339         if not expected:
2340             msg += bug_reports_message()
2341         super(ExtractorError, self).__init__(msg)
2342
2343         self.traceback = tb
2344         self.exc_info = sys.exc_info()  # preserve original exception
2345         self.cause = cause
2346         self.video_id = video_id
2347
2348     def format_traceback(self):
2349         if self.traceback is None:
2350             return None
2351         return ''.join(traceback.format_tb(self.traceback))
2352
2353
2354 class UnsupportedError(ExtractorError):
2355     def __init__(self, url):
2356         super(UnsupportedError, self).__init__(
2357             'Unsupported URL: %s' % url, expected=True)
2358         self.url = url
2359
2360
2361 class RegexNotFoundError(ExtractorError):
2362     """Error when a regex didn't match"""
2363     pass
2364
2365
2366 class GeoRestrictedError(ExtractorError):
2367     """Geographic restriction Error exception.
2368
2369     This exception may be thrown when a video is not available from your
2370     geographic location due to geographic restrictions imposed by a website.
2371     """
2372     def __init__(self, msg, countries=None):
2373         super(GeoRestrictedError, self).__init__(msg, expected=True)
2374         self.msg = msg
2375         self.countries = countries
2376
2377
2378 class DownloadError(YoutubeDLError):
2379     """Download Error exception.
2380
2381     This exception may be thrown by FileDownloader objects if they are not
2382     configured to continue on errors. They will contain the appropriate
2383     error message.
2384     """
2385
2386     def __init__(self, msg, exc_info=None):
2387         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2388         super(DownloadError, self).__init__(msg)
2389         self.exc_info = exc_info
2390
2391
2392 class SameFileError(YoutubeDLError):
2393     """Same File exception.
2394
2395     This exception will be thrown by FileDownloader objects if they detect
2396     multiple files would have to be downloaded to the same file on disk.
2397     """
2398     pass
2399
2400
2401 class PostProcessingError(YoutubeDLError):
2402     """Post Processing exception.
2403
2404     This exception may be raised by PostProcessor's .run() method to
2405     indicate an error in the postprocessing task.
2406     """
2407
2408     def __init__(self, msg):
2409         super(PostProcessingError, self).__init__(msg)
2410         self.msg = msg
2411
2412
2413 class MaxDownloadsReached(YoutubeDLError):
2414     """ --max-downloads limit has been reached. """
2415     pass
2416
2417
2418 class UnavailableVideoError(YoutubeDLError):
2419     """Unavailable Format exception.
2420
2421     This exception will be thrown when a video is requested
2422     in a format that is not available for that video.
2423     """
2424     pass
2425
2426
2427 class ContentTooShortError(YoutubeDLError):
2428     """Content Too Short exception.
2429
2430     This exception may be raised by FileDownloader objects when a file they
2431     download is too small for what the server announced first, indicating
2432     the connection was probably interrupted.
2433     """
2434
2435     def __init__(self, downloaded, expected):
2436         super(ContentTooShortError, self).__init__(
2437             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2438         )
2439         # Both in bytes
2440         self.downloaded = downloaded
2441         self.expected = expected
2442
2443
2444 class XAttrMetadataError(YoutubeDLError):
2445     def __init__(self, code=None, msg='Unknown error'):
2446         super(XAttrMetadataError, self).__init__(msg)
2447         self.code = code
2448         self.msg = msg
2449
2450         # Parsing code and msg
2451         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2452                 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2453             self.reason = 'NO_SPACE'
2454         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2455             self.reason = 'VALUE_TOO_LONG'
2456         else:
2457             self.reason = 'NOT_SUPPORTED'
2458
2459
2460 class XAttrUnavailableError(YoutubeDLError):
2461     pass
2462
2463
2464 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2465     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2466     # expected HTTP responses to meet HTTP/1.0 or later (see also
2467     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2468     if sys.version_info < (3, 0):
2469         kwargs['strict'] = True
2470     hc = http_class(*args, **compat_kwargs(kwargs))
2471     source_address = ydl_handler._params.get('source_address')
2472
2473     if source_address is not None:
2474         # This is to workaround _create_connection() from socket where it will try all
2475         # address data from getaddrinfo() including IPv6. This filters the result from
2476         # getaddrinfo() based on the source_address value.
2477         # This is based on the cpython socket.create_connection() function.
2478         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2479         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2480             host, port = address
2481             err = None
2482             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2483             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2484             ip_addrs = [addr for addr in addrs if addr[0] == af]
2485             if addrs and not ip_addrs:
2486                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2487                 raise socket.error(
2488                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2489                     % (ip_version, source_address[0]))
2490             for res in ip_addrs:
2491                 af, socktype, proto, canonname, sa = res
2492                 sock = None
2493                 try:
2494                     sock = socket.socket(af, socktype, proto)
2495                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2496                         sock.settimeout(timeout)
2497                     sock.bind(source_address)
2498                     sock.connect(sa)
2499                     err = None  # Explicitly break reference cycle
2500                     return sock
2501                 except socket.error as _:
2502                     err = _
2503                     if sock is not None:
2504                         sock.close()
2505             if err is not None:
2506                 raise err
2507             else:
2508                 raise socket.error('getaddrinfo returns an empty list')
2509         if hasattr(hc, '_create_connection'):
2510             hc._create_connection = _create_connection
2511         sa = (source_address, 0)
2512         if hasattr(hc, 'source_address'):  # Python 2.7+
2513             hc.source_address = sa
2514         else:  # Python 2.6
2515             def _hc_connect(self, *args, **kwargs):
2516                 sock = _create_connection(
2517                     (self.host, self.port), self.timeout, sa)
2518                 if is_https:
2519                     self.sock = ssl.wrap_socket(
2520                         sock, self.key_file, self.cert_file,
2521                         ssl_version=ssl.PROTOCOL_TLSv1)
2522                 else:
2523                     self.sock = sock
2524             hc.connect = functools.partial(_hc_connect, hc)
2525
2526     return hc
2527
2528
2529 def handle_youtubedl_headers(headers):
2530     filtered_headers = headers
2531
2532     if 'Youtubedl-no-compression' in filtered_headers:
2533         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2534         del filtered_headers['Youtubedl-no-compression']
2535
2536     return filtered_headers
2537
2538
2539 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2540     """Handler for HTTP requests and responses.
2541
2542     This class, when installed with an OpenerDirector, automatically adds
2543     the standard headers to every HTTP request and handles gzipped and
2544     deflated responses from web servers. If compression is to be avoided in
2545     a particular request, the original request in the program code only has
2546     to include the HTTP header "Youtubedl-no-compression", which will be
2547     removed before making the real request.
2548
2549     Part of this code was copied from:
2550
2551     http://techknack.net/python-urllib2-handlers/
2552
2553     Andrew Rowls, the author of that code, agreed to release it to the
2554     public domain.
2555     """
2556
2557     def __init__(self, params, *args, **kwargs):
2558         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2559         self._params = params
2560
2561     def http_open(self, req):
2562         conn_class = compat_http_client.HTTPConnection
2563
2564         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2565         if socks_proxy:
2566             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2567             del req.headers['Ytdl-socks-proxy']
2568
2569         return self.do_open(functools.partial(
2570             _create_http_connection, self, conn_class, False),
2571             req)
2572
2573     @staticmethod
2574     def deflate(data):
2575         try:
2576             return zlib.decompress(data, -zlib.MAX_WBITS)
2577         except zlib.error:
2578             return zlib.decompress(data)
2579
2580     def http_request(self, req):
2581         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2582         # always respected by websites, some tend to give out URLs with non percent-encoded
2583         # non-ASCII characters (see telemb.py, ard.py [#3412])
2584         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2585         # To work around aforementioned issue we will replace request's original URL with
2586         # percent-encoded one
2587         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2588         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2589         url = req.get_full_url()
2590         url_escaped = escape_url(url)
2591
2592         # Substitute URL if any change after escaping
2593         if url != url_escaped:
2594             req = update_Request(req, url=url_escaped)
2595
2596         for h, v in std_headers.items():
2597             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2598             # The dict keys are capitalized because of this bug by urllib
2599             if h.capitalize() not in req.headers:
2600                 req.add_header(h, v)
2601
2602         req.headers = handle_youtubedl_headers(req.headers)
2603
2604         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2605             # Python 2.6 is brain-dead when it comes to fragments
2606             req._Request__original = req._Request__original.partition('#')[0]
2607             req._Request__r_type = req._Request__r_type.partition('#')[0]
2608
2609         return req
2610
2611     def http_response(self, req, resp):
2612         old_resp = resp
2613         # gzip
2614         if resp.headers.get('Content-encoding', '') == 'gzip':
2615             content = resp.read()
2616             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2617             try:
2618                 uncompressed = io.BytesIO(gz.read())
2619             except IOError as original_ioerror:
2620                 # There may be junk add the end of the file
2621                 # See http://stackoverflow.com/q/4928560/35070 for details
2622                 for i in range(1, 1024):
2623                     try:
2624                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2625                         uncompressed = io.BytesIO(gz.read())
2626                     except IOError:
2627                         continue
2628                     break
2629                 else:
2630                     raise original_ioerror
2631             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2632             resp.msg = old_resp.msg
2633             del resp.headers['Content-encoding']
2634         # deflate
2635         if resp.headers.get('Content-encoding', '') == 'deflate':
2636             gz = io.BytesIO(self.deflate(resp.read()))
2637             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2638             resp.msg = old_resp.msg
2639             del resp.headers['Content-encoding']
2640         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2641         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2642         if 300 <= resp.code < 400:
2643             location = resp.headers.get('Location')
2644             if location:
2645                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2646                 if sys.version_info >= (3, 0):
2647                     location = location.encode('iso-8859-1').decode('utf-8')
2648                 else:
2649                     location = location.decode('utf-8')
2650                 location_escaped = escape_url(location)
2651                 if location != location_escaped:
2652                     del resp.headers['Location']
2653                     if sys.version_info < (3, 0):
2654                         location_escaped = location_escaped.encode('utf-8')
2655                     resp.headers['Location'] = location_escaped
2656         return resp
2657
2658     https_request = http_request
2659     https_response = http_response
2660
2661
2662 def make_socks_conn_class(base_class, socks_proxy):
2663     assert issubclass(base_class, (
2664         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2665
2666     url_components = compat_urlparse.urlparse(socks_proxy)
2667     if url_components.scheme.lower() == 'socks5':
2668         socks_type = ProxyType.SOCKS5
2669     elif url_components.scheme.lower() in ('socks', 'socks4'):
2670         socks_type = ProxyType.SOCKS4
2671     elif url_components.scheme.lower() == 'socks4a':
2672         socks_type = ProxyType.SOCKS4A
2673
2674     def unquote_if_non_empty(s):
2675         if not s:
2676             return s
2677         return compat_urllib_parse_unquote_plus(s)
2678
2679     proxy_args = (
2680         socks_type,
2681         url_components.hostname, url_components.port or 1080,
2682         True,  # Remote DNS
2683         unquote_if_non_empty(url_components.username),
2684         unquote_if_non_empty(url_components.password),
2685     )
2686
2687     class SocksConnection(base_class):
2688         def connect(self):
2689             self.sock = sockssocket()
2690             self.sock.setproxy(*proxy_args)
2691             if type(self.timeout) in (int, float):
2692                 self.sock.settimeout(self.timeout)
2693             self.sock.connect((self.host, self.port))
2694
2695             if isinstance(self, compat_http_client.HTTPSConnection):
2696                 if hasattr(self, '_context'):  # Python > 2.6
2697                     self.sock = self._context.wrap_socket(
2698                         self.sock, server_hostname=self.host)
2699                 else:
2700                     self.sock = ssl.wrap_socket(self.sock)
2701
2702     return SocksConnection
2703
2704
2705 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2706     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2707         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2708         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2709         self._params = params
2710
2711     def https_open(self, req):
2712         kwargs = {}
2713         conn_class = self._https_conn_class
2714
2715         if hasattr(self, '_context'):  # python > 2.6
2716             kwargs['context'] = self._context
2717         if hasattr(self, '_check_hostname'):  # python 3.x
2718             kwargs['check_hostname'] = self._check_hostname
2719
2720         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2721         if socks_proxy:
2722             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2723             del req.headers['Ytdl-socks-proxy']
2724
2725         return self.do_open(functools.partial(
2726             _create_http_connection, self, conn_class, True),
2727             req, **kwargs)
2728
2729
2730 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2731     _HTTPONLY_PREFIX = '#HttpOnly_'
2732
2733     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2734         # Store session cookies with `expires` set to 0 instead of an empty
2735         # string
2736         for cookie in self:
2737             if cookie.expires is None:
2738                 cookie.expires = 0
2739         compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
2740
2741     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2742         """Load cookies from a file."""
2743         if filename is None:
2744             if self.filename is not None:
2745                 filename = self.filename
2746             else:
2747                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2748
2749         cf = io.StringIO()
2750         with open(filename) as f:
2751             for line in f:
2752                 if line.startswith(self._HTTPONLY_PREFIX):
2753                     line = line[len(self._HTTPONLY_PREFIX):]
2754                 cf.write(compat_str(line))
2755         cf.seek(0)
2756         self._really_load(cf, filename, ignore_discard, ignore_expires)
2757         # Session cookies are denoted by either `expires` field set to
2758         # an empty string or 0. MozillaCookieJar only recognizes the former
2759         # (see [1]). So we need force the latter to be recognized as session
2760         # cookies on our own.
2761         # Session cookies may be important for cookies-based authentication,
2762         # e.g. usually, when user does not check 'Remember me' check box while
2763         # logging in on a site, some important cookies are stored as session
2764         # cookies so that not recognizing them will result in failed login.
2765         # 1. https://bugs.python.org/issue17164
2766         for cookie in self:
2767             # Treat `expires=0` cookies as session cookies
2768             if cookie.expires == 0:
2769                 cookie.expires = None
2770                 cookie.discard = True
2771
2772
2773 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2774     def __init__(self, cookiejar=None):
2775         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2776
2777     def http_response(self, request, response):
2778         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2779         # characters in Set-Cookie HTTP header of last response (see
2780         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2781         # In order to at least prevent crashing we will percent encode Set-Cookie
2782         # header before HTTPCookieProcessor starts processing it.
2783         # if sys.version_info < (3, 0) and response.headers:
2784         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2785         #         set_cookie = response.headers.get(set_cookie_header)
2786         #         if set_cookie:
2787         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2788         #             if set_cookie != set_cookie_escaped:
2789         #                 del response.headers[set_cookie_header]
2790         #                 response.headers[set_cookie_header] = set_cookie_escaped
2791         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2792
2793     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2794     https_response = http_response
2795
2796
2797 def extract_timezone(date_str):
2798     m = re.search(
2799         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2800         date_str)
2801     if not m:
2802         timezone = datetime.timedelta()
2803     else:
2804         date_str = date_str[:-len(m.group('tz'))]
2805         if not m.group('sign'):
2806             timezone = datetime.timedelta()
2807         else:
2808             sign = 1 if m.group('sign') == '+' else -1
2809             timezone = datetime.timedelta(
2810                 hours=sign * int(m.group('hours')),
2811                 minutes=sign * int(m.group('minutes')))
2812     return timezone, date_str
2813
2814
2815 def parse_iso8601(date_str, delimiter='T', timezone=None):
2816     """ Return a UNIX timestamp from the given date """
2817
2818     if date_str is None:
2819         return None
2820
2821     date_str = re.sub(r'\.[0-9]+', '', date_str)
2822
2823     if timezone is None:
2824         timezone, date_str = extract_timezone(date_str)
2825
2826     try:
2827         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2828         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2829         return calendar.timegm(dt.timetuple())
2830     except ValueError:
2831         pass
2832
2833
2834 def date_formats(day_first=True):
2835     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2836
2837
2838 def unified_strdate(date_str, day_first=True):
2839     """Return a string with the date in the format YYYYMMDD"""
2840
2841     if date_str is None:
2842         return None
2843     upload_date = None
2844     # Replace commas
2845     date_str = date_str.replace(',', ' ')
2846     # Remove AM/PM + timezone
2847     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2848     _, date_str = extract_timezone(date_str)
2849
2850     for expression in date_formats(day_first):
2851         try:
2852             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2853         except ValueError:
2854             pass
2855     if upload_date is None:
2856         timetuple = email.utils.parsedate_tz(date_str)
2857         if timetuple:
2858             try:
2859                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2860             except ValueError:
2861                 pass
2862     if upload_date is not None:
2863         return compat_str(upload_date)
2864
2865
2866 def unified_timestamp(date_str, day_first=True):
2867     if date_str is None:
2868         return None
2869
2870     date_str = re.sub(r'[,|]', '', date_str)
2871
2872     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2873     timezone, date_str = extract_timezone(date_str)
2874
2875     # Remove AM/PM + timezone
2876     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2877
2878     # Remove unrecognized timezones from ISO 8601 alike timestamps
2879     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2880     if m:
2881         date_str = date_str[:-len(m.group('tz'))]
2882
2883     # Python only supports microseconds, so remove nanoseconds
2884     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2885     if m:
2886         date_str = m.group(1)
2887
2888     for expression in date_formats(day_first):
2889         try:
2890             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2891             return calendar.timegm(dt.timetuple())
2892         except ValueError:
2893             pass
2894     timetuple = email.utils.parsedate_tz(date_str)
2895     if timetuple:
2896         return calendar.timegm(timetuple) + pm_delta * 3600
2897
2898
2899 def determine_ext(url, default_ext='unknown_video'):
2900     if url is None or '.' not in url:
2901         return default_ext
2902     guess = url.partition('?')[0].rpartition('.')[2]
2903     if re.match(r'^[A-Za-z0-9]+$', guess):
2904         return guess
2905     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2906     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
2907         return guess.rstrip('/')
2908     else:
2909         return default_ext
2910
2911
2912 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
2913     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
2914
2915
2916 def date_from_str(date_str):
2917     """
2918     Return a datetime object from a string in the format YYYYMMDD or
2919     (now|today)[+-][0-9](day|week|month|year)(s)?"""
2920     today = datetime.date.today()
2921     if date_str in ('now', 'today'):
2922         return today
2923     if date_str == 'yesterday':
2924         return today - datetime.timedelta(days=1)
2925     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
2926     if match is not None:
2927         sign = match.group('sign')
2928         time = int(match.group('time'))
2929         if sign == '-':
2930             time = -time
2931         unit = match.group('unit')
2932         # A bad approximation?
2933         if unit == 'month':
2934             unit = 'day'
2935             time *= 30
2936         elif unit == 'year':
2937             unit = 'day'
2938             time *= 365
2939         unit += 's'
2940         delta = datetime.timedelta(**{unit: time})
2941         return today + delta
2942     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
2943
2944
2945 def hyphenate_date(date_str):
2946     """
2947     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
2948     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
2949     if match is not None:
2950         return '-'.join(match.groups())
2951     else:
2952         return date_str
2953
2954
2955 class DateRange(object):
2956     """Represents a time interval between two dates"""
2957
2958     def __init__(self, start=None, end=None):
2959         """start and end must be strings in the format accepted by date"""
2960         if start is not None:
2961             self.start = date_from_str(start)
2962         else:
2963             self.start = datetime.datetime.min.date()
2964         if end is not None:
2965             self.end = date_from_str(end)
2966         else:
2967             self.end = datetime.datetime.max.date()
2968         if self.start > self.end:
2969             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
2970
2971     @classmethod
2972     def day(cls, day):
2973         """Returns a range that only contains the given day"""
2974         return cls(day, day)
2975
2976     def __contains__(self, date):
2977         """Check if the date is in the range"""
2978         if not isinstance(date, datetime.date):
2979             date = date_from_str(date)
2980         return self.start <= date <= self.end
2981
2982     def __str__(self):
2983         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
2984
2985
2986 def platform_name():
2987     """ Returns the platform name as a compat_str """
2988     res = platform.platform()
2989     if isinstance(res, bytes):
2990         res = res.decode(preferredencoding())
2991
2992     assert isinstance(res, compat_str)
2993     return res
2994
2995
2996 def _windows_write_string(s, out):
2997     """ Returns True if the string was written using special methods,
2998     False if it has yet to be written out."""
2999     # Adapted from http://stackoverflow.com/a/3259271/35070
3000
3001     import ctypes
3002     import ctypes.wintypes
3003
3004     WIN_OUTPUT_IDS = {
3005         1: -11,
3006         2: -12,
3007     }
3008
3009     try:
3010         fileno = out.fileno()
3011     except AttributeError:
3012         # If the output stream doesn't have a fileno, it's virtual
3013         return False
3014     except io.UnsupportedOperation:
3015         # Some strange Windows pseudo files?
3016         return False
3017     if fileno not in WIN_OUTPUT_IDS:
3018         return False
3019
3020     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3021         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3022         ('GetStdHandle', ctypes.windll.kernel32))
3023     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3024
3025     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3026         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3027         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3028         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3029     written = ctypes.wintypes.DWORD(0)
3030
3031     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3032     FILE_TYPE_CHAR = 0x0002
3033     FILE_TYPE_REMOTE = 0x8000
3034     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3035         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3036         ctypes.POINTER(ctypes.wintypes.DWORD))(
3037         ('GetConsoleMode', ctypes.windll.kernel32))
3038     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3039
3040     def not_a_console(handle):
3041         if handle == INVALID_HANDLE_VALUE or handle is None:
3042             return True
3043         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3044                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3045
3046     if not_a_console(h):
3047         return False
3048
3049     def next_nonbmp_pos(s):
3050         try:
3051             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3052         except StopIteration:
3053             return len(s)
3054
3055     while s:
3056         count = min(next_nonbmp_pos(s), 1024)
3057
3058         ret = WriteConsoleW(
3059             h, s, count if count else 2, ctypes.byref(written), None)
3060         if ret == 0:
3061             raise OSError('Failed to write string')
3062         if not count:  # We just wrote a non-BMP character
3063             assert written.value == 2
3064             s = s[1:]
3065         else:
3066             assert written.value > 0
3067             s = s[written.value:]
3068     return True
3069
3070
3071 def write_string(s, out=None, encoding=None):
3072     if out is None:
3073         out = sys.stderr
3074     assert type(s) == compat_str
3075
3076     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3077         if _windows_write_string(s, out):
3078             return
3079
3080     if ('b' in getattr(out, 'mode', '')
3081             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3082         byt = s.encode(encoding or preferredencoding(), 'ignore')
3083         out.write(byt)
3084     elif hasattr(out, 'buffer'):
3085         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3086         byt = s.encode(enc, 'ignore')
3087         out.buffer.write(byt)
3088     else:
3089         out.write(s)
3090     out.flush()
3091
3092
3093 def bytes_to_intlist(bs):
3094     if not bs:
3095         return []
3096     if isinstance(bs[0], int):  # Python 3
3097         return list(bs)
3098     else:
3099         return [ord(c) for c in bs]
3100
3101
3102 def intlist_to_bytes(xs):
3103     if not xs:
3104         return b''
3105     return compat_struct_pack('%dB' % len(xs), *xs)
3106
3107
3108 # Cross-platform file locking
3109 if sys.platform == 'win32':
3110     import ctypes.wintypes
3111     import msvcrt
3112
3113     class OVERLAPPED(ctypes.Structure):
3114         _fields_ = [
3115             ('Internal', ctypes.wintypes.LPVOID),
3116             ('InternalHigh', ctypes.wintypes.LPVOID),
3117             ('Offset', ctypes.wintypes.DWORD),
3118             ('OffsetHigh', ctypes.wintypes.DWORD),
3119             ('hEvent', ctypes.wintypes.HANDLE),
3120         ]
3121
3122     kernel32 = ctypes.windll.kernel32
3123     LockFileEx = kernel32.LockFileEx
3124     LockFileEx.argtypes = [
3125         ctypes.wintypes.HANDLE,     # hFile
3126         ctypes.wintypes.DWORD,      # dwFlags
3127         ctypes.wintypes.DWORD,      # dwReserved
3128         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3129         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3130         ctypes.POINTER(OVERLAPPED)  # Overlapped
3131     ]
3132     LockFileEx.restype = ctypes.wintypes.BOOL
3133     UnlockFileEx = kernel32.UnlockFileEx
3134     UnlockFileEx.argtypes = [
3135         ctypes.wintypes.HANDLE,     # hFile
3136         ctypes.wintypes.DWORD,      # dwReserved
3137         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3138         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3139         ctypes.POINTER(OVERLAPPED)  # Overlapped
3140     ]
3141     UnlockFileEx.restype = ctypes.wintypes.BOOL
3142     whole_low = 0xffffffff
3143     whole_high = 0x7fffffff
3144
3145     def _lock_file(f, exclusive):
3146         overlapped = OVERLAPPED()
3147         overlapped.Offset = 0
3148         overlapped.OffsetHigh = 0
3149         overlapped.hEvent = 0
3150         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3151         handle = msvcrt.get_osfhandle(f.fileno())
3152         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3153                           whole_low, whole_high, f._lock_file_overlapped_p):
3154             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3155
3156     def _unlock_file(f):
3157         assert f._lock_file_overlapped_p
3158         handle = msvcrt.get_osfhandle(f.fileno())
3159         if not UnlockFileEx(handle, 0,
3160                             whole_low, whole_high, f._lock_file_overlapped_p):
3161             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3162
3163 else:
3164     # Some platforms, such as Jython, is missing fcntl
3165     try:
3166         import fcntl
3167
3168         def _lock_file(f, exclusive):
3169             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3170
3171         def _unlock_file(f):
3172             fcntl.flock(f, fcntl.LOCK_UN)
3173     except ImportError:
3174         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3175
3176         def _lock_file(f, exclusive):
3177             raise IOError(UNSUPPORTED_MSG)
3178
3179         def _unlock_file(f):
3180             raise IOError(UNSUPPORTED_MSG)
3181
3182
3183 class locked_file(object):
3184     def __init__(self, filename, mode, encoding=None):
3185         assert mode in ['r', 'a', 'w']
3186         self.f = io.open(filename, mode, encoding=encoding)
3187         self.mode = mode
3188
3189     def __enter__(self):
3190         exclusive = self.mode != 'r'
3191         try:
3192             _lock_file(self.f, exclusive)
3193         except IOError:
3194             self.f.close()
3195             raise
3196         return self
3197
3198     def __exit__(self, etype, value, traceback):
3199         try:
3200             _unlock_file(self.f)
3201         finally:
3202             self.f.close()
3203
3204     def __iter__(self):
3205         return iter(self.f)
3206
3207     def write(self, *args):
3208         return self.f.write(*args)
3209
3210     def read(self, *args):
3211         return self.f.read(*args)
3212
3213
3214 def get_filesystem_encoding():
3215     encoding = sys.getfilesystemencoding()
3216     return encoding if encoding is not None else 'utf-8'
3217
3218
3219 def shell_quote(args):
3220     quoted_args = []
3221     encoding = get_filesystem_encoding()
3222     for a in args:
3223         if isinstance(a, bytes):
3224             # We may get a filename encoded with 'encodeFilename'
3225             a = a.decode(encoding)
3226         quoted_args.append(compat_shlex_quote(a))
3227     return ' '.join(quoted_args)
3228
3229
3230 def smuggle_url(url, data):
3231     """ Pass additional data in a URL for internal use. """
3232
3233     url, idata = unsmuggle_url(url, {})
3234     data.update(idata)
3235     sdata = compat_urllib_parse_urlencode(
3236         {'__youtubedl_smuggle': json.dumps(data)})
3237     return url + '#' + sdata
3238
3239
3240 def unsmuggle_url(smug_url, default=None):
3241     if '#__youtubedl_smuggle' not in smug_url:
3242         return smug_url, default
3243     url, _, sdata = smug_url.rpartition('#')
3244     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3245     data = json.loads(jsond)
3246     return url, data
3247
3248
3249 def format_bytes(bytes):
3250     if bytes is None:
3251         return 'N/A'
3252     if type(bytes) is str:
3253         bytes = float(bytes)
3254     if bytes == 0.0:
3255         exponent = 0
3256     else:
3257         exponent = int(math.log(bytes, 1024.0))
3258     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3259     converted = float(bytes) / float(1024 ** exponent)
3260     return '%.2f%s' % (converted, suffix)
3261
3262
3263 def lookup_unit_table(unit_table, s):
3264     units_re = '|'.join(re.escape(u) for u in unit_table)
3265     m = re.match(
3266         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3267     if not m:
3268         return None
3269     num_str = m.group('num').replace(',', '.')
3270     mult = unit_table[m.group('unit')]
3271     return int(float(num_str) * mult)
3272
3273
3274 def parse_filesize(s):
3275     if s is None:
3276         return None
3277
3278     # The lower-case forms are of course incorrect and unofficial,
3279     # but we support those too
3280     _UNIT_TABLE = {
3281         'B': 1,
3282         'b': 1,
3283         'bytes': 1,
3284         'KiB': 1024,
3285         'KB': 1000,
3286         'kB': 1024,
3287         'Kb': 1000,
3288         'kb': 1000,
3289         'kilobytes': 1000,
3290         'kibibytes': 1024,
3291         'MiB': 1024 ** 2,
3292         'MB': 1000 ** 2,
3293         'mB': 1024 ** 2,
3294         'Mb': 1000 ** 2,
3295         'mb': 1000 ** 2,
3296         'megabytes': 1000 ** 2,
3297         'mebibytes': 1024 ** 2,
3298         'GiB': 1024 ** 3,
3299         'GB': 1000 ** 3,
3300         'gB': 1024 ** 3,
3301         'Gb': 1000 ** 3,
3302         'gb': 1000 ** 3,
3303         'gigabytes': 1000 ** 3,
3304         'gibibytes': 1024 ** 3,
3305         'TiB': 1024 ** 4,
3306         'TB': 1000 ** 4,
3307         'tB': 1024 ** 4,
3308         'Tb': 1000 ** 4,
3309         'tb': 1000 ** 4,
3310         'terabytes': 1000 ** 4,
3311         'tebibytes': 1024 ** 4,
3312         'PiB': 1024 ** 5,
3313         'PB': 1000 ** 5,
3314         'pB': 1024 ** 5,
3315         'Pb': 1000 ** 5,
3316         'pb': 1000 ** 5,
3317         'petabytes': 1000 ** 5,
3318         'pebibytes': 1024 ** 5,
3319         'EiB': 1024 ** 6,
3320         'EB': 1000 ** 6,
3321         'eB': 1024 ** 6,
3322         'Eb': 1000 ** 6,
3323         'eb': 1000 ** 6,
3324         'exabytes': 1000 ** 6,
3325         'exbibytes': 1024 ** 6,
3326         'ZiB': 1024 ** 7,
3327         'ZB': 1000 ** 7,
3328         'zB': 1024 ** 7,
3329         'Zb': 1000 ** 7,
3330         'zb': 1000 ** 7,
3331         'zettabytes': 1000 ** 7,
3332         'zebibytes': 1024 ** 7,
3333         'YiB': 1024 ** 8,
3334         'YB': 1000 ** 8,
3335         'yB': 1024 ** 8,
3336         'Yb': 1000 ** 8,
3337         'yb': 1000 ** 8,
3338         'yottabytes': 1000 ** 8,
3339         'yobibytes': 1024 ** 8,
3340     }
3341
3342     return lookup_unit_table(_UNIT_TABLE, s)
3343
3344
3345 def parse_count(s):
3346     if s is None:
3347         return None
3348
3349     s = s.strip()
3350
3351     if re.match(r'^[\d,.]+$', s):
3352         return str_to_int(s)
3353
3354     _UNIT_TABLE = {
3355         'k': 1000,
3356         'K': 1000,
3357         'm': 1000 ** 2,
3358         'M': 1000 ** 2,
3359         'kk': 1000 ** 2,
3360         'KK': 1000 ** 2,
3361     }
3362
3363     return lookup_unit_table(_UNIT_TABLE, s)
3364
3365
3366 def parse_resolution(s):
3367     if s is None:
3368         return {}
3369
3370     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3371     if mobj:
3372         return {
3373             'width': int(mobj.group('w')),
3374             'height': int(mobj.group('h')),
3375         }
3376
3377     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3378     if mobj:
3379         return {'height': int(mobj.group(1))}
3380
3381     mobj = re.search(r'\b([48])[kK]\b', s)
3382     if mobj:
3383         return {'height': int(mobj.group(1)) * 540}
3384
3385     return {}
3386
3387
3388 def parse_bitrate(s):
3389     if not isinstance(s, compat_str):
3390         return
3391     mobj = re.search(r'\b(\d+)\s*kbps', s)
3392     if mobj:
3393         return int(mobj.group(1))
3394
3395
3396 def month_by_name(name, lang='en'):
3397     """ Return the number of a month by (locale-independently) English name """
3398
3399     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3400
3401     try:
3402         return month_names.index(name) + 1
3403     except ValueError:
3404         return None
3405
3406
3407 def month_by_abbreviation(abbrev):
3408     """ Return the number of a month by (locale-independently) English
3409         abbreviations """
3410
3411     try:
3412         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3413     except ValueError:
3414         return None
3415
3416
3417 def fix_xml_ampersands(xml_str):
3418     """Replace all the '&' by '&amp;' in XML"""
3419     return re.sub(
3420         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3421         '&amp;',
3422         xml_str)
3423
3424
3425 def setproctitle(title):
3426     assert isinstance(title, compat_str)
3427
3428     # ctypes in Jython is not complete
3429     # http://bugs.jython.org/issue2148
3430     if sys.platform.startswith('java'):
3431         return
3432
3433     try:
3434         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3435     except OSError:
3436         return
3437     except TypeError:
3438         # LoadLibrary in Windows Python 2.7.13 only expects
3439         # a bytestring, but since unicode_literals turns
3440         # every string into a unicode string, it fails.
3441         return
3442     title_bytes = title.encode('utf-8')
3443     buf = ctypes.create_string_buffer(len(title_bytes))
3444     buf.value = title_bytes
3445     try:
3446         libc.prctl(15, buf, 0, 0, 0)
3447     except AttributeError:
3448         return  # Strange libc, just skip this
3449
3450
3451 def remove_start(s, start):
3452     return s[len(start):] if s is not None and s.startswith(start) else s
3453
3454
3455 def remove_end(s, end):
3456     return s[:-len(end)] if s is not None and s.endswith(end) else s
3457
3458
3459 def remove_quotes(s):
3460     if s is None or len(s) < 2:
3461         return s
3462     for quote in ('"', "'", ):
3463         if s[0] == quote and s[-1] == quote:
3464             return s[1:-1]
3465     return s
3466
3467
3468 def url_basename(url):
3469     path = compat_urlparse.urlparse(url).path
3470     return path.strip('/').split('/')[-1]
3471
3472
3473 def base_url(url):
3474     return re.match(r'https?://[^?#&]+/', url).group()
3475
3476
3477 def urljoin(base, path):
3478     if isinstance(path, bytes):
3479         path = path.decode('utf-8')
3480     if not isinstance(path, compat_str) or not path:
3481         return None
3482     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3483         return path
3484     if isinstance(base, bytes):
3485         base = base.decode('utf-8')
3486     if not isinstance(base, compat_str) or not re.match(
3487             r'^(?:https?:)?//', base):
3488         return None
3489     return compat_urlparse.urljoin(base, path)
3490
3491
3492 class HEADRequest(compat_urllib_request.Request):
3493     def get_method(self):
3494         return 'HEAD'
3495
3496
3497 class PUTRequest(compat_urllib_request.Request):
3498     def get_method(self):
3499         return 'PUT'
3500
3501
3502 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3503     if get_attr:
3504         if v is not None:
3505             v = getattr(v, get_attr, None)
3506     if v == '':
3507         v = None
3508     if v is None:
3509         return default
3510     try:
3511         return int(v) * invscale // scale
3512     except (ValueError, TypeError):
3513         return default
3514
3515
3516 def str_or_none(v, default=None):
3517     return default if v is None else compat_str(v)
3518
3519
3520 def str_to_int(int_str):
3521     """ A more relaxed version of int_or_none """
3522     if not isinstance(int_str, compat_str):
3523         return int_str
3524     int_str = re.sub(r'[,\.\+]', '', int_str)
3525     return int(int_str)
3526
3527
3528 def float_or_none(v, scale=1, invscale=1, default=None):
3529     if v is None:
3530         return default
3531     try:
3532         return float(v) * invscale / scale
3533     except (ValueError, TypeError):
3534         return default
3535
3536
3537 def bool_or_none(v, default=None):
3538     return v if isinstance(v, bool) else default
3539
3540
3541 def strip_or_none(v, default=None):
3542     return v.strip() if isinstance(v, compat_str) else default
3543
3544
3545 def url_or_none(url):
3546     if not url or not isinstance(url, compat_str):
3547         return None
3548     url = url.strip()
3549     return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3550
3551
3552 def parse_duration(s):
3553     if not isinstance(s, compat_basestring):
3554         return None
3555
3556     s = s.strip()
3557
3558     days, hours, mins, secs, ms = [None] * 5
3559     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3560     if m:
3561         days, hours, mins, secs, ms = m.groups()
3562     else:
3563         m = re.match(
3564             r'''(?ix)(?:P?
3565                 (?:
3566                     [0-9]+\s*y(?:ears?)?\s*
3567                 )?
3568                 (?:
3569                     [0-9]+\s*m(?:onths?)?\s*
3570                 )?
3571                 (?:
3572                     [0-9]+\s*w(?:eeks?)?\s*
3573                 )?
3574                 (?:
3575                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3576                 )?
3577                 T)?
3578                 (?:
3579                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3580                 )?
3581                 (?:
3582                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3583                 )?
3584                 (?:
3585                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3586                 )?Z?$''', s)
3587         if m:
3588             days, hours, mins, secs, ms = m.groups()
3589         else:
3590             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3591             if m:
3592                 hours, mins = m.groups()
3593             else:
3594                 return None
3595
3596     duration = 0
3597     if secs:
3598         duration += float(secs)
3599     if mins:
3600         duration += float(mins) * 60
3601     if hours:
3602         duration += float(hours) * 60 * 60
3603     if days:
3604         duration += float(days) * 24 * 60 * 60
3605     if ms:
3606         duration += float(ms)
3607     return duration
3608
3609
3610 def prepend_extension(filename, ext, expected_real_ext=None):
3611     name, real_ext = os.path.splitext(filename)
3612     return (
3613         '{0}.{1}{2}'.format(name, ext, real_ext)
3614         if not expected_real_ext or real_ext[1:] == expected_real_ext
3615         else '{0}.{1}'.format(filename, ext))
3616
3617
3618 def replace_extension(filename, ext, expected_real_ext=None):
3619     name, real_ext = os.path.splitext(filename)
3620     return '{0}.{1}'.format(
3621         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3622         ext)
3623
3624
3625 def check_executable(exe, args=[]):
3626     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3627     args can be a list of arguments for a short output (like -version) """
3628     try:
3629         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3630     except OSError:
3631         return False
3632     return exe
3633
3634
3635 def get_exe_version(exe, args=['--version'],
3636                     version_re=None, unrecognized='present'):
3637     """ Returns the version of the specified executable,
3638     or False if the executable is not present """
3639     try:
3640         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3641         # SIGTTOU if youtube-dl is run in the background.
3642         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3643         out, _ = subprocess.Popen(
3644             [encodeArgument(exe)] + args,
3645             stdin=subprocess.PIPE,
3646             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3647     except OSError:
3648         return False
3649     if isinstance(out, bytes):  # Python 2.x
3650         out = out.decode('ascii', 'ignore')
3651     return detect_exe_version(out, version_re, unrecognized)
3652
3653
3654 def detect_exe_version(output, version_re=None, unrecognized='present'):
3655     assert isinstance(output, compat_str)
3656     if version_re is None:
3657         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3658     m = re.search(version_re, output)
3659     if m:
3660         return m.group(1)
3661     else:
3662         return unrecognized
3663
3664
3665 class PagedList(object):
3666     def __len__(self):
3667         # This is only useful for tests
3668         return len(self.getslice())
3669
3670
3671 class OnDemandPagedList(PagedList):
3672     def __init__(self, pagefunc, pagesize, use_cache=True):
3673         self._pagefunc = pagefunc
3674         self._pagesize = pagesize
3675         self._use_cache = use_cache
3676         if use_cache:
3677             self._cache = {}
3678
3679     def getslice(self, start=0, end=None):
3680         res = []
3681         for pagenum in itertools.count(start // self._pagesize):
3682             firstid = pagenum * self._pagesize
3683             nextfirstid = pagenum * self._pagesize + self._pagesize
3684             if start >= nextfirstid:
3685                 continue
3686
3687             page_results = None
3688             if self._use_cache:
3689                 page_results = self._cache.get(pagenum)
3690             if page_results is None:
3691                 page_results = list(self._pagefunc(pagenum))
3692             if self._use_cache:
3693                 self._cache[pagenum] = page_results
3694
3695             startv = (
3696                 start % self._pagesize
3697                 if firstid <= start < nextfirstid
3698                 else 0)
3699
3700             endv = (
3701                 ((end - 1) % self._pagesize) + 1
3702                 if (end is not None and firstid <= end <= nextfirstid)
3703                 else None)
3704
3705             if startv != 0 or endv is not None:
3706                 page_results = page_results[startv:endv]
3707             res.extend(page_results)
3708
3709             # A little optimization - if current page is not "full", ie. does
3710             # not contain page_size videos then we can assume that this page
3711             # is the last one - there are no more ids on further pages -
3712             # i.e. no need to query again.
3713             if len(page_results) + startv < self._pagesize:
3714                 break
3715
3716             # If we got the whole page, but the next page is not interesting,
3717             # break out early as well
3718             if end == nextfirstid:
3719                 break
3720         return res
3721
3722
3723 class InAdvancePagedList(PagedList):
3724     def __init__(self, pagefunc, pagecount, pagesize):
3725         self._pagefunc = pagefunc
3726         self._pagecount = pagecount
3727         self._pagesize = pagesize
3728
3729     def getslice(self, start=0, end=None):
3730         res = []
3731         start_page = start // self._pagesize
3732         end_page = (
3733             self._pagecount if end is None else (end // self._pagesize + 1))
3734         skip_elems = start - start_page * self._pagesize
3735         only_more = None if end is None else end - start
3736         for pagenum in range(start_page, end_page):
3737             page = list(self._pagefunc(pagenum))
3738             if skip_elems:
3739                 page = page[skip_elems:]
3740                 skip_elems = None
3741             if only_more is not None:
3742                 if len(page) < only_more:
3743                     only_more -= len(page)
3744                 else:
3745                     page = page[:only_more]
3746                     res.extend(page)
3747                     break
3748             res.extend(page)
3749         return res
3750
3751
3752 def uppercase_escape(s):
3753     unicode_escape = codecs.getdecoder('unicode_escape')
3754     return re.sub(
3755         r'\\U[0-9a-fA-F]{8}',
3756         lambda m: unicode_escape(m.group(0))[0],
3757         s)
3758
3759
3760 def lowercase_escape(s):
3761     unicode_escape = codecs.getdecoder('unicode_escape')
3762     return re.sub(
3763         r'\\u[0-9a-fA-F]{4}',
3764         lambda m: unicode_escape(m.group(0))[0],
3765         s)
3766
3767
3768 def escape_rfc3986(s):
3769     """Escape non-ASCII characters as suggested by RFC 3986"""
3770     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3771         s = s.encode('utf-8')
3772     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3773
3774
3775 def escape_url(url):
3776     """Escape URL as suggested by RFC 3986"""
3777     url_parsed = compat_urllib_parse_urlparse(url)
3778     return url_parsed._replace(
3779         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3780         path=escape_rfc3986(url_parsed.path),
3781         params=escape_rfc3986(url_parsed.params),
3782         query=escape_rfc3986(url_parsed.query),
3783         fragment=escape_rfc3986(url_parsed.fragment)
3784     ).geturl()
3785
3786
3787 def read_batch_urls(batch_fd):
3788     def fixup(url):
3789         if not isinstance(url, compat_str):
3790             url = url.decode('utf-8', 'replace')
3791         BOM_UTF8 = '\xef\xbb\xbf'
3792         if url.startswith(BOM_UTF8):
3793             url = url[len(BOM_UTF8):]
3794         url = url.strip()
3795         if url.startswith(('#', ';', ']')):
3796             return False
3797         return url
3798
3799     with contextlib.closing(batch_fd) as fd:
3800         return [url for url in map(fixup, fd) if url]
3801
3802
3803 def urlencode_postdata(*args, **kargs):
3804     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3805
3806
3807 def update_url_query(url, query):
3808     if not query:
3809         return url
3810     parsed_url = compat_urlparse.urlparse(url)
3811     qs = compat_parse_qs(parsed_url.query)
3812     qs.update(query)
3813     return compat_urlparse.urlunparse(parsed_url._replace(
3814         query=compat_urllib_parse_urlencode(qs, True)))
3815
3816
3817 def update_Request(req, url=None, data=None, headers={}, query={}):
3818     req_headers = req.headers.copy()
3819     req_headers.update(headers)
3820     req_data = data or req.data
3821     req_url = update_url_query(url or req.get_full_url(), query)
3822     req_get_method = req.get_method()
3823     if req_get_method == 'HEAD':
3824         req_type = HEADRequest
3825     elif req_get_method == 'PUT':
3826         req_type = PUTRequest
3827     else:
3828         req_type = compat_urllib_request.Request
3829     new_req = req_type(
3830         req_url, data=req_data, headers=req_headers,
3831         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3832     if hasattr(req, 'timeout'):
3833         new_req.timeout = req.timeout
3834     return new_req
3835
3836
3837 def _multipart_encode_impl(data, boundary):
3838     content_type = 'multipart/form-data; boundary=%s' % boundary
3839
3840     out = b''
3841     for k, v in data.items():
3842         out += b'--' + boundary.encode('ascii') + b'\r\n'
3843         if isinstance(k, compat_str):
3844             k = k.encode('utf-8')
3845         if isinstance(v, compat_str):
3846             v = v.encode('utf-8')
3847         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3848         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3849         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3850         if boundary.encode('ascii') in content:
3851             raise ValueError('Boundary overlaps with data')
3852         out += content
3853
3854     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3855
3856     return out, content_type
3857
3858
3859 def multipart_encode(data, boundary=None):
3860     '''
3861     Encode a dict to RFC 7578-compliant form-data
3862
3863     data:
3864         A dict where keys and values can be either Unicode or bytes-like
3865         objects.
3866     boundary:
3867         If specified a Unicode object, it's used as the boundary. Otherwise
3868         a random boundary is generated.
3869
3870     Reference: https://tools.ietf.org/html/rfc7578
3871     '''
3872     has_specified_boundary = boundary is not None
3873
3874     while True:
3875         if boundary is None:
3876             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3877
3878         try:
3879             out, content_type = _multipart_encode_impl(data, boundary)
3880             break
3881         except ValueError:
3882             if has_specified_boundary:
3883                 raise
3884             boundary = None
3885
3886     return out, content_type
3887
3888
3889 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3890     if isinstance(key_or_keys, (list, tuple)):
3891         for key in key_or_keys:
3892             if key not in d or d[key] is None or skip_false_values and not d[key]:
3893                 continue
3894             return d[key]
3895         return default
3896     return d.get(key_or_keys, default)
3897
3898
3899 def try_get(src, getter, expected_type=None):
3900     if not isinstance(getter, (list, tuple)):
3901         getter = [getter]
3902     for get in getter:
3903         try:
3904             v = get(src)
3905         except (AttributeError, KeyError, TypeError, IndexError):
3906             pass
3907         else:
3908             if expected_type is None or isinstance(v, expected_type):
3909                 return v
3910
3911
3912 def merge_dicts(*dicts):
3913     merged = {}
3914     for a_dict in dicts:
3915         for k, v in a_dict.items():
3916             if v is None:
3917                 continue
3918             if (k not in merged
3919                     or (isinstance(v, compat_str) and v
3920                         and isinstance(merged[k], compat_str)
3921                         and not merged[k])):
3922                 merged[k] = v
3923     return merged
3924
3925
3926 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3927     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3928
3929
3930 US_RATINGS = {
3931     'G': 0,
3932     'PG': 10,
3933     'PG-13': 13,
3934     'R': 16,
3935     'NC': 18,
3936 }
3937
3938
3939 TV_PARENTAL_GUIDELINES = {
3940     'TV-Y': 0,
3941     'TV-Y7': 7,
3942     'TV-G': 0,
3943     'TV-PG': 0,
3944     'TV-14': 14,
3945     'TV-MA': 17,
3946 }
3947
3948
3949 def parse_age_limit(s):
3950     if type(s) == int:
3951         return s if 0 <= s <= 21 else None
3952     if not isinstance(s, compat_basestring):
3953         return None
3954     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
3955     if m:
3956         return int(m.group('age'))
3957     if s in US_RATINGS:
3958         return US_RATINGS[s]
3959     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
3960     if m:
3961         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
3962     return None
3963
3964
3965 def strip_jsonp(code):
3966     return re.sub(
3967         r'''(?sx)^
3968             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
3969             (?:\s*&&\s*(?P=func_name))?
3970             \s*\(\s*(?P<callback_data>.*)\);?
3971             \s*?(?://[^\n]*)*$''',
3972         r'\g<callback_data>', code)
3973
3974
3975 def js_to_json(code):
3976     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
3977     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3978     INTEGER_TABLE = (
3979         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3980         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3981     )
3982
3983     def fix_kv(m):
3984         v = m.group(0)
3985         if v in ('true', 'false', 'null'):
3986             return v
3987         elif v.startswith('/*') or v.startswith('//') or v == ',':
3988             return ""
3989
3990         if v[0] in ("'", '"'):
3991             v = re.sub(r'(?s)\\.|"', lambda m: {
3992                 '"': '\\"',
3993                 "\\'": "'",
3994                 '\\\n': '',
3995                 '\\x': '\\u00',
3996             }.get(m.group(0), m.group(0)), v[1:-1])
3997
3998         for regex, base in INTEGER_TABLE:
3999             im = re.match(regex, v)
4000             if im:
4001                 i = int(im.group(1), base)
4002                 return '"%d":' % i if v.endswith(':') else '%d' % i
4003
4004         return '"%s"' % v
4005
4006     return re.sub(r'''(?sx)
4007         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4008         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4009         {comment}|,(?={skip}[\]}}])|
4010         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4011         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4012         [0-9]+(?={skip}:)
4013         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4014
4015
4016 def qualities(quality_ids):
4017     """ Get a numeric quality value out of a list of possible values """
4018     def q(qid):
4019         try:
4020             return quality_ids.index(qid)
4021         except ValueError:
4022             return -1
4023     return q
4024
4025
4026 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4027
4028
4029 def limit_length(s, length):
4030     """ Add ellipses to overly long strings """
4031     if s is None:
4032         return None
4033     ELLIPSES = '...'
4034     if len(s) > length:
4035         return s[:length - len(ELLIPSES)] + ELLIPSES
4036     return s
4037
4038
4039 def version_tuple(v):
4040     return tuple(int(e) for e in re.split(r'[-.]', v))
4041
4042
4043 def is_outdated_version(version, limit, assume_new=True):
4044     if not version:
4045         return not assume_new
4046     try:
4047         return version_tuple(version) < version_tuple(limit)
4048     except ValueError:
4049         return not assume_new
4050
4051
4052 def ytdl_is_updateable():
4053     """ Returns if youtube-dl can be updated with -U """
4054     from zipimport import zipimporter
4055
4056     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4057
4058
4059 def args_to_str(args):
4060     # Get a short string representation for a subprocess command
4061     return ' '.join(compat_shlex_quote(a) for a in args)
4062
4063
4064 def error_to_compat_str(err):
4065     err_str = str(err)
4066     # On python 2 error byte string must be decoded with proper
4067     # encoding rather than ascii
4068     if sys.version_info[0] < 3:
4069         err_str = err_str.decode(preferredencoding())
4070     return err_str
4071
4072
4073 def mimetype2ext(mt):
4074     if mt is None:
4075         return None
4076
4077     ext = {
4078         'audio/mp4': 'm4a',
4079         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4080         # it's the most popular one
4081         'audio/mpeg': 'mp3',
4082     }.get(mt)
4083     if ext is not None:
4084         return ext
4085
4086     _, _, res = mt.rpartition('/')
4087     res = res.split(';')[0].strip().lower()
4088
4089     return {
4090         '3gpp': '3gp',
4091         'smptett+xml': 'tt',
4092         'ttaf+xml': 'dfxp',
4093         'ttml+xml': 'ttml',
4094         'x-flv': 'flv',
4095         'x-mp4-fragmented': 'mp4',
4096         'x-ms-sami': 'sami',
4097         'x-ms-wmv': 'wmv',
4098         'mpegurl': 'm3u8',
4099         'x-mpegurl': 'm3u8',
4100         'vnd.apple.mpegurl': 'm3u8',
4101         'dash+xml': 'mpd',
4102         'f4m+xml': 'f4m',
4103         'hds+xml': 'f4m',
4104         'vnd.ms-sstr+xml': 'ism',
4105         'quicktime': 'mov',
4106         'mp2t': 'ts',
4107     }.get(res, res)
4108
4109
4110 def parse_codecs(codecs_str):
4111     # http://tools.ietf.org/html/rfc6381
4112     if not codecs_str:
4113         return {}
4114     splited_codecs = list(filter(None, map(
4115         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4116     vcodec, acodec = None, None
4117     for full_codec in splited_codecs:
4118         codec = full_codec.split('.')[0]
4119         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4120             if not vcodec:
4121                 vcodec = full_codec
4122         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4123             if not acodec:
4124                 acodec = full_codec
4125         else:
4126             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4127     if not vcodec and not acodec:
4128         if len(splited_codecs) == 2:
4129             return {
4130                 'vcodec': splited_codecs[0],
4131                 'acodec': splited_codecs[1],
4132             }
4133     else:
4134         return {
4135             'vcodec': vcodec or 'none',
4136             'acodec': acodec or 'none',
4137         }
4138     return {}
4139
4140
4141 def urlhandle_detect_ext(url_handle):
4142     getheader = url_handle.headers.get
4143
4144     cd = getheader('Content-Disposition')
4145     if cd:
4146         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4147         if m:
4148             e = determine_ext(m.group('filename'), default_ext=None)
4149             if e:
4150                 return e
4151
4152     return mimetype2ext(getheader('Content-Type'))
4153
4154
4155 def encode_data_uri(data, mime_type):
4156     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4157
4158
4159 def age_restricted(content_limit, age_limit):
4160     """ Returns True iff the content should be blocked """
4161
4162     if age_limit is None:  # No limit set
4163         return False
4164     if content_limit is None:
4165         return False  # Content available for everyone
4166     return age_limit < content_limit
4167
4168
4169 def is_html(first_bytes):
4170     """ Detect whether a file contains HTML by examining its first bytes. """
4171
4172     BOMS = [
4173         (b'\xef\xbb\xbf', 'utf-8'),
4174         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4175         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4176         (b'\xff\xfe', 'utf-16-le'),
4177         (b'\xfe\xff', 'utf-16-be'),
4178     ]
4179     for bom, enc in BOMS:
4180         if first_bytes.startswith(bom):
4181             s = first_bytes[len(bom):].decode(enc, 'replace')
4182             break
4183     else:
4184         s = first_bytes.decode('utf-8', 'replace')
4185
4186     return re.match(r'^\s*<', s)
4187
4188
4189 def determine_protocol(info_dict):
4190     protocol = info_dict.get('protocol')
4191     if protocol is not None:
4192         return protocol
4193
4194     url = info_dict['url']
4195     if url.startswith('rtmp'):
4196         return 'rtmp'
4197     elif url.startswith('mms'):
4198         return 'mms'
4199     elif url.startswith('rtsp'):
4200         return 'rtsp'
4201
4202     ext = determine_ext(url)
4203     if ext == 'm3u8':
4204         return 'm3u8'
4205     elif ext == 'f4m':
4206         return 'f4m'
4207
4208     return compat_urllib_parse_urlparse(url).scheme
4209
4210
4211 def render_table(header_row, data):
4212     """ Render a list of rows, each as a list of values """
4213     table = [header_row] + data
4214     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4215     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4216     return '\n'.join(format_str % tuple(row) for row in table)
4217
4218
4219 def _match_one(filter_part, dct):
4220     COMPARISON_OPERATORS = {
4221         '<': operator.lt,
4222         '<=': operator.le,
4223         '>': operator.gt,
4224         '>=': operator.ge,
4225         '=': operator.eq,
4226         '!=': operator.ne,
4227     }
4228     operator_rex = re.compile(r'''(?x)\s*
4229         (?P<key>[a-z_]+)
4230         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4231         (?:
4232             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4233             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4234             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4235         )
4236         \s*$
4237         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4238     m = operator_rex.search(filter_part)
4239     if m:
4240         op = COMPARISON_OPERATORS[m.group('op')]
4241         actual_value = dct.get(m.group('key'))
4242         if (m.group('quotedstrval') is not None
4243             or m.group('strval') is not None
4244             # If the original field is a string and matching comparisonvalue is
4245             # a number we should respect the origin of the original field
4246             # and process comparison value as a string (see
4247             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4248             or actual_value is not None and m.group('intval') is not None
4249                 and isinstance(actual_value, compat_str)):
4250             if m.group('op') not in ('=', '!='):
4251                 raise ValueError(
4252                     'Operator %s does not support string values!' % m.group('op'))
4253             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4254             quote = m.group('quote')
4255             if quote is not None:
4256                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4257         else:
4258             try:
4259                 comparison_value = int(m.group('intval'))
4260             except ValueError:
4261                 comparison_value = parse_filesize(m.group('intval'))
4262                 if comparison_value is None:
4263                     comparison_value = parse_filesize(m.group('intval') + 'B')
4264                 if comparison_value is None:
4265                     raise ValueError(
4266                         'Invalid integer value %r in filter part %r' % (
4267                             m.group('intval'), filter_part))
4268         if actual_value is None:
4269             return m.group('none_inclusive')
4270         return op(actual_value, comparison_value)
4271
4272     UNARY_OPERATORS = {
4273         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4274         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4275     }
4276     operator_rex = re.compile(r'''(?x)\s*
4277         (?P<op>%s)\s*(?P<key>[a-z_]+)
4278         \s*$
4279         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4280     m = operator_rex.search(filter_part)
4281     if m:
4282         op = UNARY_OPERATORS[m.group('op')]
4283         actual_value = dct.get(m.group('key'))
4284         return op(actual_value)
4285
4286     raise ValueError('Invalid filter part %r' % filter_part)
4287
4288
4289 def match_str(filter_str, dct):
4290     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4291
4292     return all(
4293         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4294
4295
4296 def match_filter_func(filter_str):
4297     def _match_func(info_dict):
4298         if match_str(filter_str, info_dict):
4299             return None
4300         else:
4301             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4302             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4303     return _match_func
4304
4305
4306 def parse_dfxp_time_expr(time_expr):
4307     if not time_expr:
4308         return
4309
4310     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4311     if mobj:
4312         return float(mobj.group('time_offset'))
4313
4314     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4315     if mobj:
4316         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4317
4318
4319 def srt_subtitles_timecode(seconds):
4320     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4321
4322
4323 def dfxp2srt(dfxp_data):
4324     '''
4325     @param dfxp_data A bytes-like object containing DFXP data
4326     @returns A unicode object containing converted SRT data
4327     '''
4328     LEGACY_NAMESPACES = (
4329         (b'http://www.w3.org/ns/ttml', [
4330             b'http://www.w3.org/2004/11/ttaf1',
4331             b'http://www.w3.org/2006/04/ttaf1',
4332             b'http://www.w3.org/2006/10/ttaf1',
4333         ]),
4334         (b'http://www.w3.org/ns/ttml#styling', [
4335             b'http://www.w3.org/ns/ttml#style',
4336         ]),
4337     )
4338
4339     SUPPORTED_STYLING = [
4340         'color',
4341         'fontFamily',
4342         'fontSize',
4343         'fontStyle',
4344         'fontWeight',
4345         'textDecoration'
4346     ]
4347
4348     _x = functools.partial(xpath_with_ns, ns_map={
4349         'xml': 'http://www.w3.org/XML/1998/namespace',
4350         'ttml': 'http://www.w3.org/ns/ttml',
4351         'tts': 'http://www.w3.org/ns/ttml#styling',
4352     })
4353
4354     styles = {}
4355     default_style = {}
4356
4357     class TTMLPElementParser(object):
4358         _out = ''
4359         _unclosed_elements = []
4360         _applied_styles = []
4361
4362         def start(self, tag, attrib):
4363             if tag in (_x('ttml:br'), 'br'):
4364                 self._out += '\n'
4365             else:
4366                 unclosed_elements = []
4367                 style = {}
4368                 element_style_id = attrib.get('style')
4369                 if default_style:
4370                     style.update(default_style)
4371                 if element_style_id:
4372                     style.update(styles.get(element_style_id, {}))
4373                 for prop in SUPPORTED_STYLING:
4374                     prop_val = attrib.get(_x('tts:' + prop))
4375                     if prop_val:
4376                         style[prop] = prop_val
4377                 if style:
4378                     font = ''
4379                     for k, v in sorted(style.items()):
4380                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4381                             continue
4382                         if k == 'color':
4383                             font += ' color="%s"' % v
4384                         elif k == 'fontSize':
4385                             font += ' size="%s"' % v
4386                         elif k == 'fontFamily':
4387                             font += ' face="%s"' % v
4388                         elif k == 'fontWeight' and v == 'bold':
4389                             self._out += '<b>'
4390                             unclosed_elements.append('b')
4391                         elif k == 'fontStyle' and v == 'italic':
4392                             self._out += '<i>'
4393                             unclosed_elements.append('i')
4394                         elif k == 'textDecoration' and v == 'underline':
4395                             self._out += '<u>'
4396                             unclosed_elements.append('u')
4397                     if font:
4398                         self._out += '<font' + font + '>'
4399                         unclosed_elements.append('font')
4400                     applied_style = {}
4401                     if self._applied_styles:
4402                         applied_style.update(self._applied_styles[-1])
4403                     applied_style.update(style)
4404                     self._applied_styles.append(applied_style)
4405                 self._unclosed_elements.append(unclosed_elements)
4406
4407         def end(self, tag):
4408             if tag not in (_x('ttml:br'), 'br'):
4409                 unclosed_elements = self._unclosed_elements.pop()
4410                 for element in reversed(unclosed_elements):
4411                     self._out += '</%s>' % element
4412                 if unclosed_elements and self._applied_styles:
4413                     self._applied_styles.pop()
4414
4415         def data(self, data):
4416             self._out += data
4417
4418         def close(self):
4419             return self._out.strip()
4420
4421     def parse_node(node):
4422         target = TTMLPElementParser()
4423         parser = xml.etree.ElementTree.XMLParser(target=target)
4424         parser.feed(xml.etree.ElementTree.tostring(node))
4425         return parser.close()
4426
4427     for k, v in LEGACY_NAMESPACES:
4428         for ns in v:
4429             dfxp_data = dfxp_data.replace(ns, k)
4430
4431     dfxp = compat_etree_fromstring(dfxp_data)
4432     out = []
4433     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4434
4435     if not paras:
4436         raise ValueError('Invalid dfxp/TTML subtitle')
4437
4438     repeat = False
4439     while True:
4440         for style in dfxp.findall(_x('.//ttml:style')):
4441             style_id = style.get('id') or style.get(_x('xml:id'))
4442             if not style_id:
4443                 continue
4444             parent_style_id = style.get('style')
4445             if parent_style_id:
4446                 if parent_style_id not in styles:
4447                     repeat = True
4448                     continue
4449                 styles[style_id] = styles[parent_style_id].copy()
4450             for prop in SUPPORTED_STYLING:
4451                 prop_val = style.get(_x('tts:' + prop))
4452                 if prop_val:
4453                     styles.setdefault(style_id, {})[prop] = prop_val
4454         if repeat:
4455             repeat = False
4456         else:
4457             break
4458
4459     for p in ('body', 'div'):
4460         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4461         if ele is None:
4462             continue
4463         style = styles.get(ele.get('style'))
4464         if not style:
4465             continue
4466         default_style.update(style)
4467
4468     for para, index in zip(paras, itertools.count(1)):
4469         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4470         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4471         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4472         if begin_time is None:
4473             continue
4474         if not end_time:
4475             if not dur:
4476                 continue
4477             end_time = begin_time + dur
4478         out.append('%d\n%s --> %s\n%s\n\n' % (
4479             index,
4480             srt_subtitles_timecode(begin_time),
4481             srt_subtitles_timecode(end_time),
4482             parse_node(para)))
4483
4484     return ''.join(out)
4485
4486
4487 def cli_option(params, command_option, param):
4488     param = params.get(param)
4489     if param:
4490         param = compat_str(param)
4491     return [command_option, param] if param is not None else []
4492
4493
4494 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4495     param = params.get(param)
4496     if param is None:
4497         return []
4498     assert isinstance(param, bool)
4499     if separator:
4500         return [command_option + separator + (true_value if param else false_value)]
4501     return [command_option, true_value if param else false_value]
4502
4503
4504 def cli_valueless_option(params, command_option, param, expected_value=True):
4505     param = params.get(param)
4506     return [command_option] if param == expected_value else []
4507
4508
4509 def cli_configuration_args(params, param, default=[]):
4510     ex_args = params.get(param)
4511     if ex_args is None:
4512         return default
4513     assert isinstance(ex_args, list)
4514     return ex_args
4515
4516
4517 class ISO639Utils(object):
4518     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4519     _lang_map = {
4520         'aa': 'aar',
4521         'ab': 'abk',
4522         'ae': 'ave',
4523         'af': 'afr',
4524         'ak': 'aka',
4525         'am': 'amh',
4526         'an': 'arg',
4527         'ar': 'ara',
4528         'as': 'asm',
4529         'av': 'ava',
4530         'ay': 'aym',
4531         'az': 'aze',
4532         'ba': 'bak',
4533         'be': 'bel',
4534         'bg': 'bul',
4535         'bh': 'bih',
4536         'bi': 'bis',
4537         'bm': 'bam',
4538         'bn': 'ben',
4539         'bo': 'bod',
4540         'br': 'bre',
4541         'bs': 'bos',
4542         'ca': 'cat',
4543         'ce': 'che',
4544         'ch': 'cha',
4545         'co': 'cos',
4546         'cr': 'cre',
4547         'cs': 'ces',
4548         'cu': 'chu',
4549         'cv': 'chv',
4550         'cy': 'cym',
4551         'da': 'dan',
4552         'de': 'deu',
4553         'dv': 'div',
4554         'dz': 'dzo',
4555         'ee': 'ewe',
4556         'el': 'ell',
4557         'en': 'eng',
4558         'eo': 'epo',
4559         'es': 'spa',
4560         'et': 'est',
4561         'eu': 'eus',
4562         'fa': 'fas',
4563         'ff': 'ful',
4564         'fi': 'fin',
4565         'fj': 'fij',
4566         'fo': 'fao',
4567         'fr': 'fra',
4568         'fy': 'fry',
4569         'ga': 'gle',
4570         'gd': 'gla',
4571         'gl': 'glg',
4572         'gn': 'grn',
4573         'gu': 'guj',
4574         'gv': 'glv',
4575         'ha': 'hau',
4576         'he': 'heb',
4577         'iw': 'heb',  # Replaced by he in 1989 revision
4578         'hi': 'hin',
4579         'ho': 'hmo',
4580         'hr': 'hrv',
4581         'ht': 'hat',
4582         'hu': 'hun',
4583         'hy': 'hye',
4584         'hz': 'her',
4585         'ia': 'ina',
4586         'id': 'ind',
4587         'in': 'ind',  # Replaced by id in 1989 revision
4588         'ie': 'ile',
4589         'ig': 'ibo',
4590         'ii': 'iii',
4591         'ik': 'ipk',
4592         'io': 'ido',
4593         'is': 'isl',
4594         'it': 'ita',
4595         'iu': 'iku',
4596         'ja': 'jpn',
4597         'jv': 'jav',
4598         'ka': 'kat',
4599         'kg': 'kon',
4600         'ki': 'kik',
4601         'kj': 'kua',
4602         'kk': 'kaz',
4603         'kl': 'kal',
4604         'km': 'khm',
4605         'kn': 'kan',
4606         'ko': 'kor',
4607         'kr': 'kau',
4608         'ks': 'kas',
4609         'ku': 'kur',
4610         'kv': 'kom',
4611         'kw': 'cor',
4612         'ky': 'kir',
4613         'la': 'lat',
4614         'lb': 'ltz',
4615         'lg': 'lug',
4616         'li': 'lim',
4617         'ln': 'lin',
4618         'lo': 'lao',
4619         'lt': 'lit',
4620         'lu': 'lub',
4621         'lv': 'lav',
4622         'mg': 'mlg',
4623         'mh': 'mah',
4624         'mi': 'mri',
4625         'mk': 'mkd',
4626         'ml': 'mal',
4627         'mn': 'mon',
4628         'mr': 'mar',
4629         'ms': 'msa',
4630         'mt': 'mlt',
4631         'my': 'mya',
4632         'na': 'nau',
4633         'nb': 'nob',
4634         'nd': 'nde',
4635         'ne': 'nep',
4636         'ng': 'ndo',
4637         'nl': 'nld',
4638         'nn': 'nno',
4639         'no': 'nor',
4640         'nr': 'nbl',
4641         'nv': 'nav',
4642         'ny': 'nya',
4643         'oc': 'oci',
4644         'oj': 'oji',
4645         'om': 'orm',
4646         'or': 'ori',
4647         'os': 'oss',
4648         'pa': 'pan',
4649         'pi': 'pli',
4650         'pl': 'pol',
4651         'ps': 'pus',
4652         'pt': 'por',
4653         'qu': 'que',
4654         'rm': 'roh',
4655         'rn': 'run',
4656         'ro': 'ron',
4657         'ru': 'rus',
4658         'rw': 'kin',
4659         'sa': 'san',
4660         'sc': 'srd',
4661         'sd': 'snd',
4662         'se': 'sme',
4663         'sg': 'sag',
4664         'si': 'sin',
4665         'sk': 'slk',
4666         'sl': 'slv',
4667         'sm': 'smo',
4668         'sn': 'sna',
4669         'so': 'som',
4670         'sq': 'sqi',
4671         'sr': 'srp',
4672         'ss': 'ssw',
4673         'st': 'sot',
4674         'su': 'sun',
4675         'sv': 'swe',
4676         'sw': 'swa',
4677         'ta': 'tam',
4678         'te': 'tel',
4679         'tg': 'tgk',
4680         'th': 'tha',
4681         'ti': 'tir',
4682         'tk': 'tuk',
4683         'tl': 'tgl',
4684         'tn': 'tsn',
4685         'to': 'ton',
4686         'tr': 'tur',
4687         'ts': 'tso',
4688         'tt': 'tat',
4689         'tw': 'twi',
4690         'ty': 'tah',
4691         'ug': 'uig',
4692         'uk': 'ukr',
4693         'ur': 'urd',
4694         'uz': 'uzb',
4695         've': 'ven',
4696         'vi': 'vie',
4697         'vo': 'vol',
4698         'wa': 'wln',
4699         'wo': 'wol',
4700         'xh': 'xho',
4701         'yi': 'yid',
4702         'ji': 'yid',  # Replaced by yi in 1989 revision
4703         'yo': 'yor',
4704         'za': 'zha',
4705         'zh': 'zho',
4706         'zu': 'zul',
4707     }
4708
4709     @classmethod
4710     def short2long(cls, code):
4711         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4712         return cls._lang_map.get(code[:2])
4713
4714     @classmethod
4715     def long2short(cls, code):
4716         """Convert language code from ISO 639-2/T to ISO 639-1"""
4717         for short_name, long_name in cls._lang_map.items():
4718             if long_name == code:
4719                 return short_name
4720
4721
4722 class ISO3166Utils(object):
4723     # From http://data.okfn.org/data/core/country-list
4724     _country_map = {
4725         'AF': 'Afghanistan',
4726         'AX': 'Åland Islands',
4727         'AL': 'Albania',
4728         'DZ': 'Algeria',
4729         'AS': 'American Samoa',
4730         'AD': 'Andorra',
4731         'AO': 'Angola',
4732         'AI': 'Anguilla',
4733         'AQ': 'Antarctica',
4734         'AG': 'Antigua and Barbuda',
4735         'AR': 'Argentina',
4736         'AM': 'Armenia',
4737         'AW': 'Aruba',
4738         'AU': 'Australia',
4739         'AT': 'Austria',
4740         'AZ': 'Azerbaijan',
4741         'BS': 'Bahamas',
4742         'BH': 'Bahrain',
4743         'BD': 'Bangladesh',
4744         'BB': 'Barbados',
4745         'BY': 'Belarus',
4746         'BE': 'Belgium',
4747         'BZ': 'Belize',
4748         'BJ': 'Benin',
4749         'BM': 'Bermuda',
4750         'BT': 'Bhutan',
4751         'BO': 'Bolivia, Plurinational State of',
4752         'BQ': 'Bonaire, Sint Eustatius and Saba',
4753         'BA': 'Bosnia and Herzegovina',
4754         'BW': 'Botswana',
4755         'BV': 'Bouvet Island',
4756         'BR': 'Brazil',
4757         'IO': 'British Indian Ocean Territory',
4758         'BN': 'Brunei Darussalam',
4759         'BG': 'Bulgaria',
4760         'BF': 'Burkina Faso',
4761         'BI': 'Burundi',
4762         'KH': 'Cambodia',
4763         'CM': 'Cameroon',
4764         'CA': 'Canada',
4765         'CV': 'Cape Verde',
4766         'KY': 'Cayman Islands',
4767         'CF': 'Central African Republic',
4768         'TD': 'Chad',
4769         'CL': 'Chile',
4770         'CN': 'China',
4771         'CX': 'Christmas Island',
4772         'CC': 'Cocos (Keeling) Islands',
4773         'CO': 'Colombia',
4774         'KM': 'Comoros',
4775         'CG': 'Congo',
4776         'CD': 'Congo, the Democratic Republic of the',
4777         'CK': 'Cook Islands',
4778         'CR': 'Costa Rica',
4779         'CI': 'Côte d\'Ivoire',
4780         'HR': 'Croatia',
4781         'CU': 'Cuba',
4782         'CW': 'Curaçao',
4783         'CY': 'Cyprus',
4784         'CZ': 'Czech Republic',
4785         'DK': 'Denmark',
4786         'DJ': 'Djibouti',
4787         'DM': 'Dominica',
4788         'DO': 'Dominican Republic',
4789         'EC': 'Ecuador',
4790         'EG': 'Egypt',
4791         'SV': 'El Salvador',
4792         'GQ': 'Equatorial Guinea',
4793         'ER': 'Eritrea',
4794         'EE': 'Estonia',
4795         'ET': 'Ethiopia',
4796         'FK': 'Falkland Islands (Malvinas)',
4797         'FO': 'Faroe Islands',
4798         'FJ': 'Fiji',
4799         'FI': 'Finland',
4800         'FR': 'France',
4801         'GF': 'French Guiana',
4802         'PF': 'French Polynesia',
4803         'TF': 'French Southern Territories',
4804         'GA': 'Gabon',
4805         'GM': 'Gambia',
4806         'GE': 'Georgia',
4807         'DE': 'Germany',
4808         'GH': 'Ghana',
4809         'GI': 'Gibraltar',
4810         'GR': 'Greece',
4811         'GL': 'Greenland',
4812         'GD': 'Grenada',
4813         'GP': 'Guadeloupe',
4814         'GU': 'Guam',
4815         'GT': 'Guatemala',
4816         'GG': 'Guernsey',
4817         'GN': 'Guinea',
4818         'GW': 'Guinea-Bissau',
4819         'GY': 'Guyana',
4820         'HT': 'Haiti',
4821         'HM': 'Heard Island and McDonald Islands',
4822         'VA': 'Holy See (Vatican City State)',
4823         'HN': 'Honduras',
4824         'HK': 'Hong Kong',
4825         'HU': 'Hungary',
4826         'IS': 'Iceland',
4827         'IN': 'India',
4828         'ID': 'Indonesia',
4829         'IR': 'Iran, Islamic Republic of',
4830         'IQ': 'Iraq',
4831         'IE': 'Ireland',
4832         'IM': 'Isle of Man',
4833         'IL': 'Israel',
4834         'IT': 'Italy',
4835         'JM': 'Jamaica',
4836         'JP': 'Japan',
4837         'JE': 'Jersey',
4838         'JO': 'Jordan',
4839         'KZ': 'Kazakhstan',
4840         'KE': 'Kenya',
4841         'KI': 'Kiribati',
4842         'KP': 'Korea, Democratic People\'s Republic of',
4843         'KR': 'Korea, Republic of',
4844         'KW': 'Kuwait',
4845         'KG': 'Kyrgyzstan',
4846         'LA': 'Lao People\'s Democratic Republic',
4847         'LV': 'Latvia',
4848         'LB': 'Lebanon',
4849         'LS': 'Lesotho',
4850         'LR': 'Liberia',
4851         'LY': 'Libya',
4852         'LI': 'Liechtenstein',
4853         'LT': 'Lithuania',
4854         'LU': 'Luxembourg',
4855         'MO': 'Macao',
4856         'MK': 'Macedonia, the Former Yugoslav Republic of',
4857         'MG': 'Madagascar',
4858         'MW': 'Malawi',
4859         'MY': 'Malaysia',
4860         'MV': 'Maldives',
4861         'ML': 'Mali',
4862         'MT': 'Malta',
4863         'MH': 'Marshall Islands',
4864         'MQ': 'Martinique',
4865         'MR': 'Mauritania',
4866         'MU': 'Mauritius',
4867         'YT': 'Mayotte',
4868         'MX': 'Mexico',
4869         'FM': 'Micronesia, Federated States of',
4870         'MD': 'Moldova, Republic of',
4871         'MC': 'Monaco',
4872         'MN': 'Mongolia',
4873         'ME': 'Montenegro',
4874         'MS': 'Montserrat',
4875         'MA': 'Morocco',
4876         'MZ': 'Mozambique',
4877         'MM': 'Myanmar',
4878         'NA': 'Namibia',
4879         'NR': 'Nauru',
4880         'NP': 'Nepal',
4881         'NL': 'Netherlands',
4882         'NC': 'New Caledonia',
4883         'NZ': 'New Zealand',
4884         'NI': 'Nicaragua',
4885         'NE': 'Niger',
4886         'NG': 'Nigeria',
4887         'NU': 'Niue',
4888         'NF': 'Norfolk Island',
4889         'MP': 'Northern Mariana Islands',
4890         'NO': 'Norway',
4891         'OM': 'Oman',
4892         'PK': 'Pakistan',
4893         'PW': 'Palau',
4894         'PS': 'Palestine, State of',
4895         'PA': 'Panama',
4896         'PG': 'Papua New Guinea',
4897         'PY': 'Paraguay',
4898         'PE': 'Peru',
4899         'PH': 'Philippines',
4900         'PN': 'Pitcairn',
4901         'PL': 'Poland',
4902         'PT': 'Portugal',
4903         'PR': 'Puerto Rico',
4904         'QA': 'Qatar',
4905         'RE': 'Réunion',
4906         'RO': 'Romania',
4907         'RU': 'Russian Federation',
4908         'RW': 'Rwanda',
4909         'BL': 'Saint Barthélemy',
4910         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4911         'KN': 'Saint Kitts and Nevis',
4912         'LC': 'Saint Lucia',
4913         'MF': 'Saint Martin (French part)',
4914         'PM': 'Saint Pierre and Miquelon',
4915         'VC': 'Saint Vincent and the Grenadines',
4916         'WS': 'Samoa',
4917         'SM': 'San Marino',
4918         'ST': 'Sao Tome and Principe',
4919         'SA': 'Saudi Arabia',
4920         'SN': 'Senegal',
4921         'RS': 'Serbia',
4922         'SC': 'Seychelles',
4923         'SL': 'Sierra Leone',
4924         'SG': 'Singapore',
4925         'SX': 'Sint Maarten (Dutch part)',
4926         'SK': 'Slovakia',
4927         'SI': 'Slovenia',
4928         'SB': 'Solomon Islands',
4929         'SO': 'Somalia',
4930         'ZA': 'South Africa',
4931         'GS': 'South Georgia and the South Sandwich Islands',
4932         'SS': 'South Sudan',
4933         'ES': 'Spain',
4934         'LK': 'Sri Lanka',
4935         'SD': 'Sudan',
4936         'SR': 'Suriname',
4937         'SJ': 'Svalbard and Jan Mayen',
4938         'SZ': 'Swaziland',
4939         'SE': 'Sweden',
4940         'CH': 'Switzerland',
4941         'SY': 'Syrian Arab Republic',
4942         'TW': 'Taiwan, Province of China',
4943         'TJ': 'Tajikistan',
4944         'TZ': 'Tanzania, United Republic of',
4945         'TH': 'Thailand',
4946         'TL': 'Timor-Leste',
4947         'TG': 'Togo',
4948         'TK': 'Tokelau',
4949         'TO': 'Tonga',
4950         'TT': 'Trinidad and Tobago',
4951         'TN': 'Tunisia',
4952         'TR': 'Turkey',
4953         'TM': 'Turkmenistan',
4954         'TC': 'Turks and Caicos Islands',
4955         'TV': 'Tuvalu',
4956         'UG': 'Uganda',
4957         'UA': 'Ukraine',
4958         'AE': 'United Arab Emirates',
4959         'GB': 'United Kingdom',
4960         'US': 'United States',
4961         'UM': 'United States Minor Outlying Islands',
4962         'UY': 'Uruguay',
4963         'UZ': 'Uzbekistan',
4964         'VU': 'Vanuatu',
4965         'VE': 'Venezuela, Bolivarian Republic of',
4966         'VN': 'Viet Nam',
4967         'VG': 'Virgin Islands, British',
4968         'VI': 'Virgin Islands, U.S.',
4969         'WF': 'Wallis and Futuna',
4970         'EH': 'Western Sahara',
4971         'YE': 'Yemen',
4972         'ZM': 'Zambia',
4973         'ZW': 'Zimbabwe',
4974     }
4975
4976     @classmethod
4977     def short2full(cls, code):
4978         """Convert an ISO 3166-2 country code to the corresponding full name"""
4979         return cls._country_map.get(code.upper())
4980
4981
4982 class GeoUtils(object):
4983     # Major IPv4 address blocks per country
4984     _country_ip_map = {
4985         'AD': '46.172.224.0/19',
4986         'AE': '94.200.0.0/13',
4987         'AF': '149.54.0.0/17',
4988         'AG': '209.59.64.0/18',
4989         'AI': '204.14.248.0/21',
4990         'AL': '46.99.0.0/16',
4991         'AM': '46.70.0.0/15',
4992         'AO': '105.168.0.0/13',
4993         'AP': '182.50.184.0/21',
4994         'AQ': '23.154.160.0/24',
4995         'AR': '181.0.0.0/12',
4996         'AS': '202.70.112.0/20',
4997         'AT': '77.116.0.0/14',
4998         'AU': '1.128.0.0/11',
4999         'AW': '181.41.0.0/18',
5000         'AX': '185.217.4.0/22',
5001         'AZ': '5.197.0.0/16',
5002         'BA': '31.176.128.0/17',
5003         'BB': '65.48.128.0/17',
5004         'BD': '114.130.0.0/16',
5005         'BE': '57.0.0.0/8',
5006         'BF': '102.178.0.0/15',
5007         'BG': '95.42.0.0/15',
5008         'BH': '37.131.0.0/17',
5009         'BI': '154.117.192.0/18',
5010         'BJ': '137.255.0.0/16',
5011         'BL': '185.212.72.0/23',
5012         'BM': '196.12.64.0/18',
5013         'BN': '156.31.0.0/16',
5014         'BO': '161.56.0.0/16',
5015         'BQ': '161.0.80.0/20',
5016         'BR': '191.128.0.0/12',
5017         'BS': '24.51.64.0/18',
5018         'BT': '119.2.96.0/19',
5019         'BW': '168.167.0.0/16',
5020         'BY': '178.120.0.0/13',
5021         'BZ': '179.42.192.0/18',
5022         'CA': '99.224.0.0/11',
5023         'CD': '41.243.0.0/16',
5024         'CF': '197.242.176.0/21',
5025         'CG': '160.113.0.0/16',
5026         'CH': '85.0.0.0/13',
5027         'CI': '102.136.0.0/14',
5028         'CK': '202.65.32.0/19',
5029         'CL': '152.172.0.0/14',
5030         'CM': '102.244.0.0/14',
5031         'CN': '36.128.0.0/10',
5032         'CO': '181.240.0.0/12',
5033         'CR': '201.192.0.0/12',
5034         'CU': '152.206.0.0/15',
5035         'CV': '165.90.96.0/19',
5036         'CW': '190.88.128.0/17',
5037         'CY': '31.153.0.0/16',
5038         'CZ': '88.100.0.0/14',
5039         'DE': '53.0.0.0/8',
5040         'DJ': '197.241.0.0/17',
5041         'DK': '87.48.0.0/12',
5042         'DM': '192.243.48.0/20',
5043         'DO': '152.166.0.0/15',
5044         'DZ': '41.96.0.0/12',
5045         'EC': '186.68.0.0/15',
5046         'EE': '90.190.0.0/15',
5047         'EG': '156.160.0.0/11',
5048         'ER': '196.200.96.0/20',
5049         'ES': '88.0.0.0/11',
5050         'ET': '196.188.0.0/14',
5051         'EU': '2.16.0.0/13',
5052         'FI': '91.152.0.0/13',
5053         'FJ': '144.120.0.0/16',
5054         'FK': '80.73.208.0/21',
5055         'FM': '119.252.112.0/20',
5056         'FO': '88.85.32.0/19',
5057         'FR': '90.0.0.0/9',
5058         'GA': '41.158.0.0/15',
5059         'GB': '25.0.0.0/8',
5060         'GD': '74.122.88.0/21',
5061         'GE': '31.146.0.0/16',
5062         'GF': '161.22.64.0/18',
5063         'GG': '62.68.160.0/19',
5064         'GH': '154.160.0.0/12',
5065         'GI': '95.164.0.0/16',
5066         'GL': '88.83.0.0/19',
5067         'GM': '160.182.0.0/15',
5068         'GN': '197.149.192.0/18',
5069         'GP': '104.250.0.0/19',
5070         'GQ': '105.235.224.0/20',
5071         'GR': '94.64.0.0/13',
5072         'GT': '168.234.0.0/16',
5073         'GU': '168.123.0.0/16',
5074         'GW': '197.214.80.0/20',
5075         'GY': '181.41.64.0/18',
5076         'HK': '113.252.0.0/14',
5077         'HN': '181.210.0.0/16',
5078         'HR': '93.136.0.0/13',
5079         'HT': '148.102.128.0/17',
5080         'HU': '84.0.0.0/14',
5081         'ID': '39.192.0.0/10',
5082         'IE': '87.32.0.0/12',
5083         'IL': '79.176.0.0/13',
5084         'IM': '5.62.80.0/20',
5085         'IN': '117.192.0.0/10',
5086         'IO': '203.83.48.0/21',
5087         'IQ': '37.236.0.0/14',
5088         'IR': '2.176.0.0/12',
5089         'IS': '82.221.0.0/16',
5090         'IT': '79.0.0.0/10',
5091         'JE': '87.244.64.0/18',
5092         'JM': '72.27.0.0/17',
5093         'JO': '176.29.0.0/16',
5094         'JP': '133.0.0.0/8',
5095         'KE': '105.48.0.0/12',
5096         'KG': '158.181.128.0/17',
5097         'KH': '36.37.128.0/17',
5098         'KI': '103.25.140.0/22',
5099         'KM': '197.255.224.0/20',
5100         'KN': '198.167.192.0/19',
5101         'KP': '175.45.176.0/22',
5102         'KR': '175.192.0.0/10',
5103         'KW': '37.36.0.0/14',
5104         'KY': '64.96.0.0/15',
5105         'KZ': '2.72.0.0/13',
5106         'LA': '115.84.64.0/18',
5107         'LB': '178.135.0.0/16',
5108         'LC': '24.92.144.0/20',
5109         'LI': '82.117.0.0/19',
5110         'LK': '112.134.0.0/15',
5111         'LR': '102.183.0.0/16',
5112         'LS': '129.232.0.0/17',
5113         'LT': '78.56.0.0/13',
5114         'LU': '188.42.0.0/16',
5115         'LV': '46.109.0.0/16',
5116         'LY': '41.252.0.0/14',
5117         'MA': '105.128.0.0/11',
5118         'MC': '88.209.64.0/18',
5119         'MD': '37.246.0.0/16',
5120         'ME': '178.175.0.0/17',
5121         'MF': '74.112.232.0/21',
5122         'MG': '154.126.0.0/17',
5123         'MH': '117.103.88.0/21',
5124         'MK': '77.28.0.0/15',
5125         'ML': '154.118.128.0/18',
5126         'MM': '37.111.0.0/17',
5127         'MN': '49.0.128.0/17',
5128         'MO': '60.246.0.0/16',
5129         'MP': '202.88.64.0/20',
5130         'MQ': '109.203.224.0/19',
5131         'MR': '41.188.64.0/18',
5132         'MS': '208.90.112.0/22',
5133         'MT': '46.11.0.0/16',
5134         'MU': '105.16.0.0/12',
5135         'MV': '27.114.128.0/18',
5136         'MW': '102.70.0.0/15',
5137         'MX': '187.192.0.0/11',
5138         'MY': '175.136.0.0/13',
5139         'MZ': '197.218.0.0/15',
5140         'NA': '41.182.0.0/16',
5141         'NC': '101.101.0.0/18',
5142         'NE': '197.214.0.0/18',
5143         'NF': '203.17.240.0/22',
5144         'NG': '105.112.0.0/12',
5145         'NI': '186.76.0.0/15',
5146         'NL': '145.96.0.0/11',
5147         'NO': '84.208.0.0/13',
5148         'NP': '36.252.0.0/15',
5149         'NR': '203.98.224.0/19',
5150         'NU': '49.156.48.0/22',
5151         'NZ': '49.224.0.0/14',
5152         'OM': '5.36.0.0/15',
5153         'PA': '186.72.0.0/15',
5154         'PE': '186.160.0.0/14',
5155         'PF': '123.50.64.0/18',
5156         'PG': '124.240.192.0/19',
5157         'PH': '49.144.0.0/13',
5158         'PK': '39.32.0.0/11',
5159         'PL': '83.0.0.0/11',
5160         'PM': '70.36.0.0/20',
5161         'PR': '66.50.0.0/16',
5162         'PS': '188.161.0.0/16',
5163         'PT': '85.240.0.0/13',
5164         'PW': '202.124.224.0/20',
5165         'PY': '181.120.0.0/14',
5166         'QA': '37.210.0.0/15',
5167         'RE': '102.35.0.0/16',
5168         'RO': '79.112.0.0/13',
5169         'RS': '93.86.0.0/15',
5170         'RU': '5.136.0.0/13',
5171         'RW': '41.186.0.0/16',
5172         'SA': '188.48.0.0/13',
5173         'SB': '202.1.160.0/19',
5174         'SC': '154.192.0.0/11',
5175         'SD': '102.120.0.0/13',
5176         'SE': '78.64.0.0/12',
5177         'SG': '8.128.0.0/10',
5178         'SI': '188.196.0.0/14',
5179         'SK': '78.98.0.0/15',
5180         'SL': '102.143.0.0/17',
5181         'SM': '89.186.32.0/19',
5182         'SN': '41.82.0.0/15',
5183         'SO': '154.115.192.0/18',
5184         'SR': '186.179.128.0/17',
5185         'SS': '105.235.208.0/21',
5186         'ST': '197.159.160.0/19',
5187         'SV': '168.243.0.0/16',
5188         'SX': '190.102.0.0/20',
5189         'SY': '5.0.0.0/16',
5190         'SZ': '41.84.224.0/19',
5191         'TC': '65.255.48.0/20',
5192         'TD': '154.68.128.0/19',
5193         'TG': '196.168.0.0/14',
5194         'TH': '171.96.0.0/13',
5195         'TJ': '85.9.128.0/18',
5196         'TK': '27.96.24.0/21',
5197         'TL': '180.189.160.0/20',
5198         'TM': '95.85.96.0/19',
5199         'TN': '197.0.0.0/11',
5200         'TO': '175.176.144.0/21',
5201         'TR': '78.160.0.0/11',
5202         'TT': '186.44.0.0/15',
5203         'TV': '202.2.96.0/19',
5204         'TW': '120.96.0.0/11',
5205         'TZ': '156.156.0.0/14',
5206         'UA': '37.52.0.0/14',
5207         'UG': '102.80.0.0/13',
5208         'US': '6.0.0.0/8',
5209         'UY': '167.56.0.0/13',
5210         'UZ': '84.54.64.0/18',
5211         'VA': '212.77.0.0/19',
5212         'VC': '207.191.240.0/21',
5213         'VE': '186.88.0.0/13',
5214         'VG': '66.81.192.0/20',
5215         'VI': '146.226.0.0/16',
5216         'VN': '14.160.0.0/11',
5217         'VU': '202.80.32.0/20',
5218         'WF': '117.20.32.0/21',
5219         'WS': '202.4.32.0/19',
5220         'YE': '134.35.0.0/16',
5221         'YT': '41.242.116.0/22',
5222         'ZA': '41.0.0.0/11',
5223         'ZM': '102.144.0.0/13',
5224         'ZW': '102.177.192.0/18',
5225     }
5226
5227     @classmethod
5228     def random_ipv4(cls, code_or_block):
5229         if len(code_or_block) == 2:
5230             block = cls._country_ip_map.get(code_or_block.upper())
5231             if not block:
5232                 return None
5233         else:
5234             block = code_or_block
5235         addr, preflen = block.split('/')
5236         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5237         addr_max = addr_min | (0xffffffff >> int(preflen))
5238         return compat_str(socket.inet_ntoa(
5239             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5240
5241
5242 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5243     def __init__(self, proxies=None):
5244         # Set default handlers
5245         for type in ('http', 'https'):
5246             setattr(self, '%s_open' % type,
5247                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5248                         meth(r, proxy, type))
5249         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5250
5251     def proxy_open(self, req, proxy, type):
5252         req_proxy = req.headers.get('Ytdl-request-proxy')
5253         if req_proxy is not None:
5254             proxy = req_proxy
5255             del req.headers['Ytdl-request-proxy']
5256
5257         if proxy == '__noproxy__':
5258             return None  # No Proxy
5259         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5260             req.add_header('Ytdl-socks-proxy', proxy)
5261             # youtube-dl's http/https handlers do wrapping the socket with socks
5262             return None
5263         return compat_urllib_request.ProxyHandler.proxy_open(
5264             self, req, proxy, type)
5265
5266
5267 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5268 # released into Public Domain
5269 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5270
5271 def long_to_bytes(n, blocksize=0):
5272     """long_to_bytes(n:long, blocksize:int) : string
5273     Convert a long integer to a byte string.
5274
5275     If optional blocksize is given and greater than zero, pad the front of the
5276     byte string with binary zeros so that the length is a multiple of
5277     blocksize.
5278     """
5279     # after much testing, this algorithm was deemed to be the fastest
5280     s = b''
5281     n = int(n)
5282     while n > 0:
5283         s = compat_struct_pack('>I', n & 0xffffffff) + s
5284         n = n >> 32
5285     # strip off leading zeros
5286     for i in range(len(s)):
5287         if s[i] != b'\000'[0]:
5288             break
5289     else:
5290         # only happens when n == 0
5291         s = b'\000'
5292         i = 0
5293     s = s[i:]
5294     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5295     # de-padding being done above, but sigh...
5296     if blocksize > 0 and len(s) % blocksize:
5297         s = (blocksize - len(s) % blocksize) * b'\000' + s
5298     return s
5299
5300
5301 def bytes_to_long(s):
5302     """bytes_to_long(string) : long
5303     Convert a byte string to a long integer.
5304
5305     This is (essentially) the inverse of long_to_bytes().
5306     """
5307     acc = 0
5308     length = len(s)
5309     if length % 4:
5310         extra = (4 - length % 4)
5311         s = b'\000' * extra + s
5312         length = length + extra
5313     for i in range(0, length, 4):
5314         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5315     return acc
5316
5317
5318 def ohdave_rsa_encrypt(data, exponent, modulus):
5319     '''
5320     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5321
5322     Input:
5323         data: data to encrypt, bytes-like object
5324         exponent, modulus: parameter e and N of RSA algorithm, both integer
5325     Output: hex string of encrypted data
5326
5327     Limitation: supports one block encryption only
5328     '''
5329
5330     payload = int(binascii.hexlify(data[::-1]), 16)
5331     encrypted = pow(payload, exponent, modulus)
5332     return '%x' % encrypted
5333
5334
5335 def pkcs1pad(data, length):
5336     """
5337     Padding input data with PKCS#1 scheme
5338
5339     @param {int[]} data        input data
5340     @param {int}   length      target length
5341     @returns {int[]}           padded data
5342     """
5343     if len(data) > length - 11:
5344         raise ValueError('Input data too long for PKCS#1 padding')
5345
5346     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5347     return [0, 2] + pseudo_random + [0] + data
5348
5349
5350 def encode_base_n(num, n, table=None):
5351     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5352     if not table:
5353         table = FULL_TABLE[:n]
5354
5355     if n > len(table):
5356         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5357
5358     if num == 0:
5359         return table[0]
5360
5361     ret = ''
5362     while num:
5363         ret = table[num % n] + ret
5364         num = num // n
5365     return ret
5366
5367
5368 def decode_packed_codes(code):
5369     mobj = re.search(PACKED_CODES_RE, code)
5370     obfucasted_code, base, count, symbols = mobj.groups()
5371     base = int(base)
5372     count = int(count)
5373     symbols = symbols.split('|')
5374     symbol_table = {}
5375
5376     while count:
5377         count -= 1
5378         base_n_count = encode_base_n(count, base)
5379         symbol_table[base_n_count] = symbols[count] or base_n_count
5380
5381     return re.sub(
5382         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5383         obfucasted_code)
5384
5385
5386 def caesar(s, alphabet, shift):
5387     if shift == 0:
5388         return s
5389     l = len(alphabet)
5390     return ''.join(
5391         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5392         for c in s)
5393
5394
5395 def rot47(s):
5396     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5397
5398
5399 def parse_m3u8_attributes(attrib):
5400     info = {}
5401     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5402         if val.startswith('"'):
5403             val = val[1:-1]
5404         info[key] = val
5405     return info
5406
5407
5408 def urshift(val, n):
5409     return val >> n if val >= 0 else (val + 0x100000000) >> n
5410
5411
5412 # Based on png2str() written by @gdkchan and improved by @yokrysty
5413 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5414 def decode_png(png_data):
5415     # Reference: https://www.w3.org/TR/PNG/
5416     header = png_data[8:]
5417
5418     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5419         raise IOError('Not a valid PNG file.')
5420
5421     int_map = {1: '>B', 2: '>H', 4: '>I'}
5422     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5423
5424     chunks = []
5425
5426     while header:
5427         length = unpack_integer(header[:4])
5428         header = header[4:]
5429
5430         chunk_type = header[:4]
5431         header = header[4:]
5432
5433         chunk_data = header[:length]
5434         header = header[length:]
5435
5436         header = header[4:]  # Skip CRC
5437
5438         chunks.append({
5439             'type': chunk_type,
5440             'length': length,
5441             'data': chunk_data
5442         })
5443
5444     ihdr = chunks[0]['data']
5445
5446     width = unpack_integer(ihdr[:4])
5447     height = unpack_integer(ihdr[4:8])
5448
5449     idat = b''
5450
5451     for chunk in chunks:
5452         if chunk['type'] == b'IDAT':
5453             idat += chunk['data']
5454
5455     if not idat:
5456         raise IOError('Unable to read PNG data.')
5457
5458     decompressed_data = bytearray(zlib.decompress(idat))
5459
5460     stride = width * 3
5461     pixels = []
5462
5463     def _get_pixel(idx):
5464         x = idx % stride
5465         y = idx // stride
5466         return pixels[y][x]
5467
5468     for y in range(height):
5469         basePos = y * (1 + stride)
5470         filter_type = decompressed_data[basePos]
5471
5472         current_row = []
5473
5474         pixels.append(current_row)
5475
5476         for x in range(stride):
5477             color = decompressed_data[1 + basePos + x]
5478             basex = y * stride + x
5479             left = 0
5480             up = 0
5481
5482             if x > 2:
5483                 left = _get_pixel(basex - 3)
5484             if y > 0:
5485                 up = _get_pixel(basex - stride)
5486
5487             if filter_type == 1:  # Sub
5488                 color = (color + left) & 0xff
5489             elif filter_type == 2:  # Up
5490                 color = (color + up) & 0xff
5491             elif filter_type == 3:  # Average
5492                 color = (color + ((left + up) >> 1)) & 0xff
5493             elif filter_type == 4:  # Paeth
5494                 a = left
5495                 b = up
5496                 c = 0
5497
5498                 if x > 2 and y > 0:
5499                     c = _get_pixel(basex - stride - 3)
5500
5501                 p = a + b - c
5502
5503                 pa = abs(p - a)
5504                 pb = abs(p - b)
5505                 pc = abs(p - c)
5506
5507                 if pa <= pb and pa <= pc:
5508                     color = (color + a) & 0xff
5509                 elif pb <= pc:
5510                     color = (color + b) & 0xff
5511                 else:
5512                     color = (color + c) & 0xff
5513
5514             current_row.append(color)
5515
5516     return width, height, pixels
5517
5518
5519 def write_xattr(path, key, value):
5520     # This mess below finds the best xattr tool for the job
5521     try:
5522         # try the pyxattr module...
5523         import xattr
5524
5525         if hasattr(xattr, 'set'):  # pyxattr
5526             # Unicode arguments are not supported in python-pyxattr until
5527             # version 0.5.0
5528             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5529             pyxattr_required_version = '0.5.0'
5530             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5531                 # TODO: fallback to CLI tools
5532                 raise XAttrUnavailableError(
5533                     'python-pyxattr is detected but is too old. '
5534                     'youtube-dl requires %s or above while your version is %s. '
5535                     'Falling back to other xattr implementations' % (
5536                         pyxattr_required_version, xattr.__version__))
5537
5538             setxattr = xattr.set
5539         else:  # xattr
5540             setxattr = xattr.setxattr
5541
5542         try:
5543             setxattr(path, key, value)
5544         except EnvironmentError as e:
5545             raise XAttrMetadataError(e.errno, e.strerror)
5546
5547     except ImportError:
5548         if compat_os_name == 'nt':
5549             # Write xattrs to NTFS Alternate Data Streams:
5550             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5551             assert ':' not in key
5552             assert os.path.exists(path)
5553
5554             ads_fn = path + ':' + key
5555             try:
5556                 with open(ads_fn, 'wb') as f:
5557                     f.write(value)
5558             except EnvironmentError as e:
5559                 raise XAttrMetadataError(e.errno, e.strerror)
5560         else:
5561             user_has_setfattr = check_executable('setfattr', ['--version'])
5562             user_has_xattr = check_executable('xattr', ['-h'])
5563
5564             if user_has_setfattr or user_has_xattr:
5565
5566                 value = value.decode('utf-8')
5567                 if user_has_setfattr:
5568                     executable = 'setfattr'
5569                     opts = ['-n', key, '-v', value]
5570                 elif user_has_xattr:
5571                     executable = 'xattr'
5572                     opts = ['-w', key, value]
5573
5574                 cmd = ([encodeFilename(executable, True)]
5575                        + [encodeArgument(o) for o in opts]
5576                        + [encodeFilename(path, True)])
5577
5578                 try:
5579                     p = subprocess.Popen(
5580                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5581                 except EnvironmentError as e:
5582                     raise XAttrMetadataError(e.errno, e.strerror)
5583                 stdout, stderr = p.communicate()
5584                 stderr = stderr.decode('utf-8', 'replace')
5585                 if p.returncode != 0:
5586                     raise XAttrMetadataError(p.returncode, stderr)
5587
5588             else:
5589                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5590                 if sys.platform.startswith('linux'):
5591                     raise XAttrUnavailableError(
5592                         "Couldn't find a tool to set the xattrs. "
5593                         "Install either the python 'pyxattr' or 'xattr' "
5594                         "modules, or the GNU 'attr' package "
5595                         "(which contains the 'setfattr' tool).")
5596                 else:
5597                     raise XAttrUnavailableError(
5598                         "Couldn't find a tool to set the xattrs. "
5599                         "Install either the python 'xattr' module, "
5600                         "or the 'xattr' binary.")
5601
5602
5603 def random_birthday(year_field, month_field, day_field):
5604     start_date = datetime.date(1950, 1, 1)
5605     end_date = datetime.date(1995, 12, 31)
5606     offset = random.randint(0, (end_date - start_date).days)
5607     random_date = start_date + datetime.timedelta(offset)
5608     return {
5609         year_field: str(random_date.year),
5610         month_field: str(random_date.month),
5611         day_field: str(random_date.day),
5612     }