2 from __future__ import unicode_literals
21 import xml.etree.ElementTree
25 import urllib.request as compat_urllib_request
26 except ImportError: # Python 2
27 import urllib2 as compat_urllib_request
30 import urllib.error as compat_urllib_error
31 except ImportError: # Python 2
32 import urllib2 as compat_urllib_error
35 import urllib.parse as compat_urllib_parse
36 except ImportError: # Python 2
37 import urllib as compat_urllib_parse
40 from urllib.parse import urlparse as compat_urllib_parse_urlparse
41 except ImportError: # Python 2
42 from urlparse import urlparse as compat_urllib_parse_urlparse
45 import urllib.parse as compat_urlparse
46 except ImportError: # Python 2
47 import urlparse as compat_urlparse
50 import urllib.response as compat_urllib_response
51 except ImportError: # Python 2
52 import urllib as compat_urllib_response
55 import http.cookiejar as compat_cookiejar
56 except ImportError: # Python 2
57 import cookielib as compat_cookiejar
60 import http.cookies as compat_cookies
61 except ImportError: # Python 2
62 import Cookie as compat_cookies
65 import html.entities as compat_html_entities
66 except ImportError: # Python 2
67 import htmlentitydefs as compat_html_entities
70 compat_html_entities_html5 = compat_html_entities.html5
71 except AttributeError:
72 # Copied from CPython 3.5.1 html/entities.py
73 compat_html_entities_html5 = {
82 'acE;': '\u223e\u0333',
102 'alefsym;': '\u2135',
117 'andslope;': '\u2a58',
123 'angmsdaa;': '\u29a8',
124 'angmsdab;': '\u29a9',
125 'angmsdac;': '\u29aa',
126 'angmsdad;': '\u29ab',
127 'angmsdae;': '\u29ac',
128 'angmsdaf;': '\u29ad',
129 'angmsdag;': '\u29ae',
130 'angmsdah;': '\u29af',
132 'angrtvb;': '\u22be',
133 'angrtvbd;': '\u299d',
136 'angzarr;': '\u237c',
139 'Aopf;': '\U0001d538',
140 'aopf;': '\U0001d552',
147 'ApplyFunction;': '\u2061',
149 'approxeq;': '\u224a',
154 'Ascr;': '\U0001d49c',
155 'ascr;': '\U0001d4b6',
159 'asympeq;': '\u224d',
168 'awconint;': '\u2233',
170 'backcong;': '\u224c',
171 'backepsilon;': '\u03f6',
172 'backprime;': '\u2035',
173 'backsim;': '\u223d',
174 'backsimeq;': '\u22cd',
175 'Backslash;': '\u2216',
180 'barwedge;': '\u2305',
182 'bbrktbrk;': '\u23b6',
188 'Because;': '\u2235',
189 'because;': '\u2235',
190 'bemptyv;': '\u29b0',
193 'Bernoullis;': '\u212c',
197 'between;': '\u226c',
198 'Bfr;': '\U0001d505',
199 'bfr;': '\U0001d51f',
201 'bigcirc;': '\u25ef',
203 'bigodot;': '\u2a00',
204 'bigoplus;': '\u2a01',
205 'bigotimes;': '\u2a02',
206 'bigsqcup;': '\u2a06',
207 'bigstar;': '\u2605',
208 'bigtriangledown;': '\u25bd',
209 'bigtriangleup;': '\u25b3',
210 'biguplus;': '\u2a04',
212 'bigwedge;': '\u22c0',
214 'blacklozenge;': '\u29eb',
215 'blacksquare;': '\u25aa',
216 'blacktriangle;': '\u25b4',
217 'blacktriangledown;': '\u25be',
218 'blacktriangleleft;': '\u25c2',
219 'blacktriangleright;': '\u25b8',
226 'bnequiv;': '\u2261\u20e5',
229 'Bopf;': '\U0001d539',
230 'bopf;': '\U0001d553',
253 'boxminus;': '\u229f',
254 'boxplus;': '\u229e',
255 'boxtimes;': '\u22a0',
284 'bscr;': '\U0001d4b7',
290 'bsolhsub;': '\u27c8',
303 'capbrcup;': '\u2a49',
307 'CapitalDifferentialD;': '\u2145',
308 'caps;': '\u2229\ufe00',
311 'Cayleys;': '\u212d',
321 'Cconint;': '\u2230',
323 'ccupssm;': '\u2a50',
329 'cemptyv;': '\u29b2',
332 'CenterDot;': '\xb7',
333 'centerdot;': '\xb7',
335 'cfr;': '\U0001d520',
339 'checkmark;': '\u2713',
345 'circlearrowleft;': '\u21ba',
346 'circlearrowright;': '\u21bb',
347 'circledast;': '\u229b',
348 'circledcirc;': '\u229a',
349 'circleddash;': '\u229d',
350 'CircleDot;': '\u2299',
352 'circledS;': '\u24c8',
353 'CircleMinus;': '\u2296',
354 'CirclePlus;': '\u2295',
355 'CircleTimes;': '\u2297',
358 'cirfnint;': '\u2a10',
360 'cirscir;': '\u29c2',
361 'ClockwiseContourIntegral;': '\u2232',
362 'CloseCurlyDoubleQuote;': '\u201d',
363 'CloseCurlyQuote;': '\u2019',
365 'clubsuit;': '\u2663',
370 'coloneq;': '\u2254',
375 'complement;': '\u2201',
376 'complexes;': '\u2102',
378 'congdot;': '\u2a6d',
379 'Congruent;': '\u2261',
382 'ContourIntegral;': '\u222e',
384 'copf;': '\U0001d554',
386 'Coproduct;': '\u2210',
392 'CounterClockwiseContourIntegral;': '\u2233',
396 'Cscr;': '\U0001d49e',
397 'cscr;': '\U0001d4b8',
403 'cudarrl;': '\u2938',
404 'cudarrr;': '\u2935',
408 'cularrp;': '\u293d',
411 'cupbrcap;': '\u2a48',
417 'cups;': '\u222a\ufe00',
419 'curarrm;': '\u293c',
420 'curlyeqprec;': '\u22de',
421 'curlyeqsucc;': '\u22df',
422 'curlyvee;': '\u22ce',
423 'curlywedge;': '\u22cf',
426 'curvearrowleft;': '\u21b6',
427 'curvearrowright;': '\u21b7',
430 'cwconint;': '\u2232',
442 'dbkarow;': '\u290f',
450 'ddagger;': '\u2021',
452 'DDotrahd;': '\u2911',
453 'ddotseq;': '\u2a77',
459 'demptyv;': '\u29b1',
461 'Dfr;': '\U0001d507',
462 'dfr;': '\U0001d521',
466 'DiacriticalAcute;': '\xb4',
467 'DiacriticalDot;': '\u02d9',
468 'DiacriticalDoubleAcute;': '\u02dd',
469 'DiacriticalGrave;': '`',
470 'DiacriticalTilde;': '\u02dc',
472 'Diamond;': '\u22c4',
473 'diamond;': '\u22c4',
474 'diamondsuit;': '\u2666',
477 'DifferentialD;': '\u2146',
478 'digamma;': '\u03dd',
483 'divideontimes;': '\u22c7',
490 'Dopf;': '\U0001d53b',
491 'dopf;': '\U0001d555',
496 'doteqdot;': '\u2251',
497 'DotEqual;': '\u2250',
498 'dotminus;': '\u2238',
499 'dotplus;': '\u2214',
500 'dotsquare;': '\u22a1',
501 'doublebarwedge;': '\u2306',
502 'DoubleContourIntegral;': '\u222f',
503 'DoubleDot;': '\xa8',
504 'DoubleDownArrow;': '\u21d3',
505 'DoubleLeftArrow;': '\u21d0',
506 'DoubleLeftRightArrow;': '\u21d4',
507 'DoubleLeftTee;': '\u2ae4',
508 'DoubleLongLeftArrow;': '\u27f8',
509 'DoubleLongLeftRightArrow;': '\u27fa',
510 'DoubleLongRightArrow;': '\u27f9',
511 'DoubleRightArrow;': '\u21d2',
512 'DoubleRightTee;': '\u22a8',
513 'DoubleUpArrow;': '\u21d1',
514 'DoubleUpDownArrow;': '\u21d5',
515 'DoubleVerticalBar;': '\u2225',
516 'DownArrow;': '\u2193',
517 'Downarrow;': '\u21d3',
518 'downarrow;': '\u2193',
519 'DownArrowBar;': '\u2913',
520 'DownArrowUpArrow;': '\u21f5',
521 'DownBreve;': '\u0311',
522 'downdownarrows;': '\u21ca',
523 'downharpoonleft;': '\u21c3',
524 'downharpoonright;': '\u21c2',
525 'DownLeftRightVector;': '\u2950',
526 'DownLeftTeeVector;': '\u295e',
527 'DownLeftVector;': '\u21bd',
528 'DownLeftVectorBar;': '\u2956',
529 'DownRightTeeVector;': '\u295f',
530 'DownRightVector;': '\u21c1',
531 'DownRightVectorBar;': '\u2957',
532 'DownTee;': '\u22a4',
533 'DownTeeArrow;': '\u21a7',
534 'drbkarow;': '\u2910',
537 'Dscr;': '\U0001d49f',
538 'dscr;': '\U0001d4b9',
549 'dwangle;': '\u29a6',
552 'dzigrarr;': '\u27ff',
574 'Efr;': '\U0001d508',
575 'efr;': '\U0001d522',
584 'Element;': '\u2208',
585 'elinters;': '\u23e7',
592 'emptyset;': '\u2205',
593 'EmptySmallSquare;': '\u25fb',
595 'EmptyVerySmallSquare;': '\u25ab',
604 'Eopf;': '\U0001d53c',
605 'eopf;': '\U0001d556',
610 'Epsilon;': '\u0395',
611 'epsilon;': '\u03b5',
614 'eqcolon;': '\u2255',
616 'eqslantgtr;': '\u2a96',
617 'eqslantless;': '\u2a95',
620 'EqualTilde;': '\u2242',
622 'Equilibrium;': '\u21cc',
624 'equivDD;': '\u2a78',
625 'eqvparsl;': '\u29e5',
647 'expectation;': '\u2130',
648 'ExponentialE;': '\u2147',
649 'exponentiale;': '\u2147',
650 'fallingdotseq;': '\u2252',
657 'Ffr;': '\U0001d509',
658 'ffr;': '\U0001d523',
660 'FilledSmallSquare;': '\u25fc',
661 'FilledVerySmallSquare;': '\u25aa',
667 'Fopf;': '\U0001d53d',
668 'fopf;': '\U0001d557',
673 'Fouriertrf;': '\u2131',
674 'fpartint;': '\u2a0d',
696 'fscr;': '\U0001d4bb',
718 'geqslant;': '\u2a7e',
722 'gesdoto;': '\u2a82',
723 'gesdotol;': '\u2a84',
724 'gesl;': '\u22db\ufe00',
726 'Gfr;': '\U0001d50a',
727 'gfr;': '\U0001d524',
739 'gnapprox;': '\u2a8a',
745 'Gopf;': '\U0001d53e',
746 'gopf;': '\U0001d558',
748 'GreaterEqual;': '\u2265',
749 'GreaterEqualLess;': '\u22db',
750 'GreaterFullEqual;': '\u2267',
751 'GreaterGreater;': '\u2aa2',
752 'GreaterLess;': '\u2277',
753 'GreaterSlantEqual;': '\u2a7e',
754 'GreaterTilde;': '\u2273',
755 'Gscr;': '\U0001d4a2',
769 'gtquest;': '\u2a7c',
770 'gtrapprox;': '\u2a86',
773 'gtreqless;': '\u22db',
774 'gtreqqless;': '\u2a8c',
775 'gtrless;': '\u2277',
777 'gvertneqq;': '\u2269\ufe00',
778 'gvnE;': '\u2269\ufe00',
787 'harrcir;': '\u2948',
794 'heartsuit;': '\u2665',
798 'hfr;': '\U0001d525',
799 'HilbertSpace;': '\u210b',
800 'hksearow;': '\u2925',
801 'hkswarow;': '\u2926',
804 'hookleftarrow;': '\u21a9',
805 'hookrightarrow;': '\u21aa',
807 'hopf;': '\U0001d559',
809 'HorizontalLine;': '\u2500',
811 'hscr;': '\U0001d4bd',
815 'HumpDownHump;': '\u224e',
816 'HumpEqual;': '\u224f',
837 'ifr;': '\U0001d526',
853 'ImaginaryI;': '\u2148',
854 'imagline;': '\u2110',
855 'imagpart;': '\u2111',
859 'Implies;': '\u21d2',
863 'infintie;': '\u29dd',
868 'integers;': '\u2124',
869 'Integral;': '\u222b',
870 'intercal;': '\u22ba',
871 'Intersection;': '\u22c2',
872 'intlarhk;': '\u2a17',
873 'intprod;': '\u2a3c',
874 'InvisibleComma;': '\u2063',
875 'InvisibleTimes;': '\u2062',
880 'Iopf;': '\U0001d540',
881 'iopf;': '\U0001d55a',
888 'iscr;': '\U0001d4be',
890 'isindot;': '\u22f5',
908 'Jfr;': '\U0001d50d',
909 'jfr;': '\U0001d527',
911 'Jopf;': '\U0001d541',
912 'jopf;': '\U0001d55b',
913 'Jscr;': '\U0001d4a5',
914 'jscr;': '\U0001d4bf',
926 'Kfr;': '\U0001d50e',
927 'kfr;': '\U0001d528',
933 'Kopf;': '\U0001d542',
934 'kopf;': '\U0001d55c',
935 'Kscr;': '\U0001d4a6',
936 'kscr;': '\U0001d4c0',
940 'laemptyv;': '\u29b4',
949 'Laplacetrf;': '\u2112',
956 'larrbfs;': '\u291f',
961 'larrsim;': '\u2973',
967 'lates;': '\u2aad\ufe00',
974 'lbrksld;': '\u298f',
975 'lbrkslu;': '\u298d',
987 'ldrdhar;': '\u2967',
988 'ldrushar;': '\u294b',
992 'LeftAngleBracket;': '\u27e8',
993 'LeftArrow;': '\u2190',
994 'Leftarrow;': '\u21d0',
995 'leftarrow;': '\u2190',
996 'LeftArrowBar;': '\u21e4',
997 'LeftArrowRightArrow;': '\u21c6',
998 'leftarrowtail;': '\u21a2',
999 'LeftCeiling;': '\u2308',
1000 'LeftDoubleBracket;': '\u27e6',
1001 'LeftDownTeeVector;': '\u2961',
1002 'LeftDownVector;': '\u21c3',
1003 'LeftDownVectorBar;': '\u2959',
1004 'LeftFloor;': '\u230a',
1005 'leftharpoondown;': '\u21bd',
1006 'leftharpoonup;': '\u21bc',
1007 'leftleftarrows;': '\u21c7',
1008 'LeftRightArrow;': '\u2194',
1009 'Leftrightarrow;': '\u21d4',
1010 'leftrightarrow;': '\u2194',
1011 'leftrightarrows;': '\u21c6',
1012 'leftrightharpoons;': '\u21cb',
1013 'leftrightsquigarrow;': '\u21ad',
1014 'LeftRightVector;': '\u294e',
1015 'LeftTee;': '\u22a3',
1016 'LeftTeeArrow;': '\u21a4',
1017 'LeftTeeVector;': '\u295a',
1018 'leftthreetimes;': '\u22cb',
1019 'LeftTriangle;': '\u22b2',
1020 'LeftTriangleBar;': '\u29cf',
1021 'LeftTriangleEqual;': '\u22b4',
1022 'LeftUpDownVector;': '\u2951',
1023 'LeftUpTeeVector;': '\u2960',
1024 'LeftUpVector;': '\u21bf',
1025 'LeftUpVectorBar;': '\u2958',
1026 'LeftVector;': '\u21bc',
1027 'LeftVectorBar;': '\u2952',
1032 'leqslant;': '\u2a7d',
1035 'lesdot;': '\u2a7f',
1036 'lesdoto;': '\u2a81',
1037 'lesdotor;': '\u2a83',
1038 'lesg;': '\u22da\ufe00',
1039 'lesges;': '\u2a93',
1040 'lessapprox;': '\u2a85',
1041 'lessdot;': '\u22d6',
1042 'lesseqgtr;': '\u22da',
1043 'lesseqqgtr;': '\u2a8b',
1044 'LessEqualGreater;': '\u22da',
1045 'LessFullEqual;': '\u2266',
1046 'LessGreater;': '\u2276',
1047 'lessgtr;': '\u2276',
1048 'LessLess;': '\u2aa1',
1049 'lesssim;': '\u2272',
1050 'LessSlantEqual;': '\u2a7d',
1051 'LessTilde;': '\u2272',
1052 'lfisht;': '\u297c',
1053 'lfloor;': '\u230a',
1054 'Lfr;': '\U0001d50f',
1055 'lfr;': '\U0001d529',
1061 'lharul;': '\u296a',
1068 'llcorner;': '\u231e',
1069 'Lleftarrow;': '\u21da',
1070 'llhard;': '\u296b',
1072 'Lmidot;': '\u013f',
1073 'lmidot;': '\u0140',
1074 'lmoust;': '\u23b0',
1075 'lmoustache;': '\u23b0',
1077 'lnapprox;': '\u2a89',
1086 'LongLeftArrow;': '\u27f5',
1087 'Longleftarrow;': '\u27f8',
1088 'longleftarrow;': '\u27f5',
1089 'LongLeftRightArrow;': '\u27f7',
1090 'Longleftrightarrow;': '\u27fa',
1091 'longleftrightarrow;': '\u27f7',
1092 'longmapsto;': '\u27fc',
1093 'LongRightArrow;': '\u27f6',
1094 'Longrightarrow;': '\u27f9',
1095 'longrightarrow;': '\u27f6',
1096 'looparrowleft;': '\u21ab',
1097 'looparrowright;': '\u21ac',
1099 'Lopf;': '\U0001d543',
1100 'lopf;': '\U0001d55d',
1101 'loplus;': '\u2a2d',
1102 'lotimes;': '\u2a34',
1103 'lowast;': '\u2217',
1105 'LowerLeftArrow;': '\u2199',
1106 'LowerRightArrow;': '\u2198',
1108 'lozenge;': '\u25ca',
1111 'lparlt;': '\u2993',
1113 'lrcorner;': '\u231f',
1115 'lrhard;': '\u296d',
1118 'lsaquo;': '\u2039',
1120 'lscr;': '\U0001d4c1',
1128 'lsquor;': '\u201a',
1129 'Lstrok;': '\u0141',
1130 'lstrok;': '\u0142',
1139 'lthree;': '\u22cb',
1140 'ltimes;': '\u22c9',
1141 'ltlarr;': '\u2976',
1142 'ltquest;': '\u2a7b',
1146 'ltrPar;': '\u2996',
1147 'lurdshar;': '\u294a',
1148 'luruhar;': '\u2966',
1149 'lvertneqq;': '\u2268\ufe00',
1150 'lvnE;': '\u2268\ufe00',
1155 'maltese;': '\u2720',
1158 'mapsto;': '\u21a6',
1159 'mapstodown;': '\u21a7',
1160 'mapstoleft;': '\u21a4',
1161 'mapstoup;': '\u21a5',
1162 'marker;': '\u25ae',
1163 'mcomma;': '\u2a29',
1168 'measuredangle;': '\u2221',
1169 'MediumSpace;': '\u205f',
1170 'Mellintrf;': '\u2133',
1171 'Mfr;': '\U0001d510',
1172 'mfr;': '\U0001d52a',
1178 'midcir;': '\u2af0',
1182 'minusb;': '\u229f',
1183 'minusd;': '\u2238',
1184 'minusdu;': '\u2a2a',
1185 'MinusPlus;': '\u2213',
1188 'mnplus;': '\u2213',
1189 'models;': '\u22a7',
1190 'Mopf;': '\U0001d544',
1191 'mopf;': '\U0001d55e',
1194 'mscr;': '\U0001d4c2',
1195 'mstpos;': '\u223e',
1198 'multimap;': '\u22b8',
1201 'Nacute;': '\u0143',
1202 'nacute;': '\u0144',
1203 'nang;': '\u2220\u20d2',
1205 'napE;': '\u2a70\u0338',
1206 'napid;': '\u224b\u0338',
1208 'napprox;': '\u2249',
1210 'natural;': '\u266e',
1211 'naturals;': '\u2115',
1214 'nbump;': '\u224e\u0338',
1215 'nbumpe;': '\u224f\u0338',
1217 'Ncaron;': '\u0147',
1218 'ncaron;': '\u0148',
1219 'Ncedil;': '\u0145',
1220 'ncedil;': '\u0146',
1222 'ncongdot;': '\u2a6d\u0338',
1228 'nearhk;': '\u2924',
1231 'nearrow;': '\u2197',
1232 'nedot;': '\u2250\u0338',
1233 'NegativeMediumSpace;': '\u200b',
1234 'NegativeThickSpace;': '\u200b',
1235 'NegativeThinSpace;': '\u200b',
1236 'NegativeVeryThinSpace;': '\u200b',
1237 'nequiv;': '\u2262',
1238 'nesear;': '\u2928',
1239 'nesim;': '\u2242\u0338',
1240 'NestedGreaterGreater;': '\u226b',
1241 'NestedLessLess;': '\u226a',
1243 'nexist;': '\u2204',
1244 'nexists;': '\u2204',
1245 'Nfr;': '\U0001d511',
1246 'nfr;': '\U0001d52b',
1247 'ngE;': '\u2267\u0338',
1250 'ngeqq;': '\u2267\u0338',
1251 'ngeqslant;': '\u2a7e\u0338',
1252 'nges;': '\u2a7e\u0338',
1253 'nGg;': '\u22d9\u0338',
1255 'nGt;': '\u226b\u20d2',
1258 'nGtv;': '\u226b\u0338',
1271 'nlE;': '\u2266\u0338',
1273 'nLeftarrow;': '\u21cd',
1274 'nleftarrow;': '\u219a',
1275 'nLeftrightarrow;': '\u21ce',
1276 'nleftrightarrow;': '\u21ae',
1278 'nleqq;': '\u2266\u0338',
1279 'nleqslant;': '\u2a7d\u0338',
1280 'nles;': '\u2a7d\u0338',
1282 'nLl;': '\u22d8\u0338',
1284 'nLt;': '\u226a\u20d2',
1287 'nltrie;': '\u22ec',
1288 'nLtv;': '\u226a\u0338',
1290 'NoBreak;': '\u2060',
1291 'NonBreakingSpace;': '\xa0',
1293 'nopf;': '\U0001d55f',
1297 'NotCongruent;': '\u2262',
1298 'NotCupCap;': '\u226d',
1299 'NotDoubleVerticalBar;': '\u2226',
1300 'NotElement;': '\u2209',
1301 'NotEqual;': '\u2260',
1302 'NotEqualTilde;': '\u2242\u0338',
1303 'NotExists;': '\u2204',
1304 'NotGreater;': '\u226f',
1305 'NotGreaterEqual;': '\u2271',
1306 'NotGreaterFullEqual;': '\u2267\u0338',
1307 'NotGreaterGreater;': '\u226b\u0338',
1308 'NotGreaterLess;': '\u2279',
1309 'NotGreaterSlantEqual;': '\u2a7e\u0338',
1310 'NotGreaterTilde;': '\u2275',
1311 'NotHumpDownHump;': '\u224e\u0338',
1312 'NotHumpEqual;': '\u224f\u0338',
1314 'notindot;': '\u22f5\u0338',
1315 'notinE;': '\u22f9\u0338',
1316 'notinva;': '\u2209',
1317 'notinvb;': '\u22f7',
1318 'notinvc;': '\u22f6',
1319 'NotLeftTriangle;': '\u22ea',
1320 'NotLeftTriangleBar;': '\u29cf\u0338',
1321 'NotLeftTriangleEqual;': '\u22ec',
1322 'NotLess;': '\u226e',
1323 'NotLessEqual;': '\u2270',
1324 'NotLessGreater;': '\u2278',
1325 'NotLessLess;': '\u226a\u0338',
1326 'NotLessSlantEqual;': '\u2a7d\u0338',
1327 'NotLessTilde;': '\u2274',
1328 'NotNestedGreaterGreater;': '\u2aa2\u0338',
1329 'NotNestedLessLess;': '\u2aa1\u0338',
1331 'notniva;': '\u220c',
1332 'notnivb;': '\u22fe',
1333 'notnivc;': '\u22fd',
1334 'NotPrecedes;': '\u2280',
1335 'NotPrecedesEqual;': '\u2aaf\u0338',
1336 'NotPrecedesSlantEqual;': '\u22e0',
1337 'NotReverseElement;': '\u220c',
1338 'NotRightTriangle;': '\u22eb',
1339 'NotRightTriangleBar;': '\u29d0\u0338',
1340 'NotRightTriangleEqual;': '\u22ed',
1341 'NotSquareSubset;': '\u228f\u0338',
1342 'NotSquareSubsetEqual;': '\u22e2',
1343 'NotSquareSuperset;': '\u2290\u0338',
1344 'NotSquareSupersetEqual;': '\u22e3',
1345 'NotSubset;': '\u2282\u20d2',
1346 'NotSubsetEqual;': '\u2288',
1347 'NotSucceeds;': '\u2281',
1348 'NotSucceedsEqual;': '\u2ab0\u0338',
1349 'NotSucceedsSlantEqual;': '\u22e1',
1350 'NotSucceedsTilde;': '\u227f\u0338',
1351 'NotSuperset;': '\u2283\u20d2',
1352 'NotSupersetEqual;': '\u2289',
1353 'NotTilde;': '\u2241',
1354 'NotTildeEqual;': '\u2244',
1355 'NotTildeFullEqual;': '\u2247',
1356 'NotTildeTilde;': '\u2249',
1357 'NotVerticalBar;': '\u2224',
1359 'nparallel;': '\u2226',
1360 'nparsl;': '\u2afd\u20e5',
1361 'npart;': '\u2202\u0338',
1362 'npolint;': '\u2a14',
1364 'nprcue;': '\u22e0',
1365 'npre;': '\u2aaf\u0338',
1367 'npreceq;': '\u2aaf\u0338',
1370 'nrarrc;': '\u2933\u0338',
1371 'nrarrw;': '\u219d\u0338',
1372 'nRightarrow;': '\u21cf',
1373 'nrightarrow;': '\u219b',
1375 'nrtrie;': '\u22ed',
1377 'nsccue;': '\u22e1',
1378 'nsce;': '\u2ab0\u0338',
1379 'Nscr;': '\U0001d4a9',
1380 'nscr;': '\U0001d4c3',
1381 'nshortmid;': '\u2224',
1382 'nshortparallel;': '\u2226',
1385 'nsimeq;': '\u2244',
1388 'nsqsube;': '\u22e2',
1389 'nsqsupe;': '\u22e3',
1391 'nsubE;': '\u2ac5\u0338',
1393 'nsubset;': '\u2282\u20d2',
1394 'nsubseteq;': '\u2288',
1395 'nsubseteqq;': '\u2ac5\u0338',
1397 'nsucceq;': '\u2ab0\u0338',
1399 'nsupE;': '\u2ac6\u0338',
1401 'nsupset;': '\u2283\u20d2',
1402 'nsupseteq;': '\u2289',
1403 'nsupseteqq;': '\u2ac6\u0338',
1410 'ntriangleleft;': '\u22ea',
1411 'ntrianglelefteq;': '\u22ec',
1412 'ntriangleright;': '\u22eb',
1413 'ntrianglerighteq;': '\u22ed',
1417 'numero;': '\u2116',
1419 'nvap;': '\u224d\u20d2',
1420 'nVDash;': '\u22af',
1421 'nVdash;': '\u22ae',
1422 'nvDash;': '\u22ad',
1423 'nvdash;': '\u22ac',
1424 'nvge;': '\u2265\u20d2',
1426 'nvHarr;': '\u2904',
1427 'nvinfin;': '\u29de',
1428 'nvlArr;': '\u2902',
1429 'nvle;': '\u2264\u20d2',
1431 'nvltrie;': '\u22b4\u20d2',
1432 'nvrArr;': '\u2903',
1433 'nvrtrie;': '\u22b5\u20d2',
1434 'nvsim;': '\u223c\u20d2',
1435 'nwarhk;': '\u2923',
1438 'nwarrow;': '\u2196',
1439 'nwnear;': '\u2927',
1453 'Odblac;': '\u0150',
1454 'odblac;': '\u0151',
1457 'odsold;': '\u29bc',
1461 'Ofr;': '\U0001d512',
1462 'ofr;': '\U0001d52c',
1474 'olcross;': '\u29bb',
1481 'Omicron;': '\u039f',
1482 'omicron;': '\u03bf',
1484 'ominus;': '\u2296',
1485 'Oopf;': '\U0001d546',
1486 'oopf;': '\U0001d560',
1488 'OpenCurlyDoubleQuote;': '\u201c',
1489 'OpenCurlyQuote;': '\u2018',
1497 'orderof;': '\u2134',
1502 'origof;': '\u22b6',
1504 'orslope;': '\u2a57',
1507 'Oscr;': '\U0001d4aa',
1518 'Otimes;': '\u2a37',
1519 'otimes;': '\u2297',
1520 'otimesas;': '\u2a36',
1526 'OverBar;': '\u203e',
1527 'OverBrace;': '\u23de',
1528 'OverBracket;': '\u23b4',
1529 'OverParenthesis;': '\u23dc',
1533 'parallel;': '\u2225',
1534 'parsim;': '\u2af3',
1537 'PartialD;': '\u2202',
1542 'permil;': '\u2030',
1544 'pertenk;': '\u2031',
1545 'Pfr;': '\U0001d513',
1546 'pfr;': '\U0001d52d',
1550 'phmmat;': '\u2133',
1554 'pitchfork;': '\u22d4',
1556 'planck;': '\u210f',
1557 'planckh;': '\u210e',
1558 'plankv;': '\u210f',
1560 'plusacir;': '\u2a23',
1562 'pluscir;': '\u2a22',
1563 'plusdo;': '\u2214',
1564 'plusdu;': '\u2a25',
1566 'PlusMinus;': '\xb1',
1569 'plussim;': '\u2a26',
1570 'plustwo;': '\u2a27',
1572 'Poincareplane;': '\u210c',
1573 'pointint;': '\u2a15',
1575 'popf;': '\U0001d561',
1585 'precapprox;': '\u2ab7',
1586 'preccurlyeq;': '\u227c',
1587 'Precedes;': '\u227a',
1588 'PrecedesEqual;': '\u2aaf',
1589 'PrecedesSlantEqual;': '\u227c',
1590 'PrecedesTilde;': '\u227e',
1591 'preceq;': '\u2aaf',
1592 'precnapprox;': '\u2ab9',
1593 'precneqq;': '\u2ab5',
1594 'precnsim;': '\u22e8',
1595 'precsim;': '\u227e',
1598 'primes;': '\u2119',
1601 'prnsim;': '\u22e8',
1603 'Product;': '\u220f',
1604 'profalar;': '\u232e',
1605 'profline;': '\u2312',
1606 'profsurf;': '\u2313',
1608 'Proportion;': '\u2237',
1609 'Proportional;': '\u221d',
1610 'propto;': '\u221d',
1612 'prurel;': '\u22b0',
1613 'Pscr;': '\U0001d4ab',
1614 'pscr;': '\U0001d4c5',
1617 'puncsp;': '\u2008',
1618 'Qfr;': '\U0001d514',
1619 'qfr;': '\U0001d52e',
1622 'qopf;': '\U0001d562',
1623 'qprime;': '\u2057',
1624 'Qscr;': '\U0001d4ac',
1625 'qscr;': '\U0001d4c6',
1626 'quaternions;': '\u210d',
1627 'quatint;': '\u2a16',
1629 'questeq;': '\u225f',
1635 'race;': '\u223d\u0331',
1636 'Racute;': '\u0154',
1637 'racute;': '\u0155',
1639 'raemptyv;': '\u29b3',
1644 'rangle;': '\u27e9',
1650 'rarrap;': '\u2975',
1652 'rarrbfs;': '\u2920',
1654 'rarrfs;': '\u291e',
1655 'rarrhk;': '\u21aa',
1656 'rarrlp;': '\u21ac',
1657 'rarrpl;': '\u2945',
1658 'rarrsim;': '\u2974',
1659 'Rarrtl;': '\u2916',
1660 'rarrtl;': '\u21a3',
1662 'rAtail;': '\u291c',
1663 'ratail;': '\u291a',
1665 'rationals;': '\u211a',
1673 'rbrksld;': '\u298e',
1674 'rbrkslu;': '\u2990',
1675 'Rcaron;': '\u0158',
1676 'rcaron;': '\u0159',
1677 'Rcedil;': '\u0156',
1678 'rcedil;': '\u0157',
1684 'rdldhar;': '\u2969',
1686 'rdquor;': '\u201d',
1690 'realine;': '\u211b',
1691 'realpart;': '\u211c',
1698 'ReverseElement;': '\u220b',
1699 'ReverseEquilibrium;': '\u21cb',
1700 'ReverseUpEquilibrium;': '\u296f',
1701 'rfisht;': '\u297d',
1702 'rfloor;': '\u230b',
1704 'rfr;': '\U0001d52f',
1708 'rharul;': '\u296c',
1712 'RightAngleBracket;': '\u27e9',
1713 'RightArrow;': '\u2192',
1714 'Rightarrow;': '\u21d2',
1715 'rightarrow;': '\u2192',
1716 'RightArrowBar;': '\u21e5',
1717 'RightArrowLeftArrow;': '\u21c4',
1718 'rightarrowtail;': '\u21a3',
1719 'RightCeiling;': '\u2309',
1720 'RightDoubleBracket;': '\u27e7',
1721 'RightDownTeeVector;': '\u295d',
1722 'RightDownVector;': '\u21c2',
1723 'RightDownVectorBar;': '\u2955',
1724 'RightFloor;': '\u230b',
1725 'rightharpoondown;': '\u21c1',
1726 'rightharpoonup;': '\u21c0',
1727 'rightleftarrows;': '\u21c4',
1728 'rightleftharpoons;': '\u21cc',
1729 'rightrightarrows;': '\u21c9',
1730 'rightsquigarrow;': '\u219d',
1731 'RightTee;': '\u22a2',
1732 'RightTeeArrow;': '\u21a6',
1733 'RightTeeVector;': '\u295b',
1734 'rightthreetimes;': '\u22cc',
1735 'RightTriangle;': '\u22b3',
1736 'RightTriangleBar;': '\u29d0',
1737 'RightTriangleEqual;': '\u22b5',
1738 'RightUpDownVector;': '\u294f',
1739 'RightUpTeeVector;': '\u295c',
1740 'RightUpVector;': '\u21be',
1741 'RightUpVectorBar;': '\u2954',
1742 'RightVector;': '\u21c0',
1743 'RightVectorBar;': '\u2953',
1745 'risingdotseq;': '\u2253',
1749 'rmoust;': '\u23b1',
1750 'rmoustache;': '\u23b1',
1757 'ropf;': '\U0001d563',
1758 'roplus;': '\u2a2e',
1759 'rotimes;': '\u2a35',
1760 'RoundImplies;': '\u2970',
1762 'rpargt;': '\u2994',
1763 'rppolint;': '\u2a12',
1765 'Rrightarrow;': '\u21db',
1766 'rsaquo;': '\u203a',
1768 'rscr;': '\U0001d4c7',
1773 'rsquor;': '\u2019',
1774 'rthree;': '\u22cc',
1775 'rtimes;': '\u22ca',
1779 'rtriltri;': '\u29ce',
1780 'RuleDelayed;': '\u29f4',
1781 'ruluhar;': '\u2968',
1783 'Sacute;': '\u015a',
1784 'sacute;': '\u015b',
1789 'Scaron;': '\u0160',
1790 'scaron;': '\u0161',
1794 'Scedil;': '\u015e',
1795 'scedil;': '\u015f',
1800 'scnsim;': '\u22e9',
1801 'scpolint;': '\u2a13',
1808 'searhk;': '\u2925',
1811 'searrow;': '\u2198',
1815 'seswar;': '\u2929',
1816 'setminus;': '\u2216',
1819 'Sfr;': '\U0001d516',
1820 'sfr;': '\U0001d530',
1821 'sfrown;': '\u2322',
1823 'SHCHcy;': '\u0429',
1824 'shchcy;': '\u0449',
1827 'ShortDownArrow;': '\u2193',
1828 'ShortLeftArrow;': '\u2190',
1829 'shortmid;': '\u2223',
1830 'shortparallel;': '\u2225',
1831 'ShortRightArrow;': '\u2192',
1832 'ShortUpArrow;': '\u2191',
1837 'sigmaf;': '\u03c2',
1838 'sigmav;': '\u03c2',
1840 'simdot;': '\u2a6a',
1848 'simplus;': '\u2a24',
1849 'simrarr;': '\u2972',
1851 'SmallCircle;': '\u2218',
1852 'smallsetminus;': '\u2216',
1853 'smashp;': '\u2a33',
1854 'smeparsl;': '\u29e4',
1859 'smtes;': '\u2aac\ufe00',
1860 'SOFTcy;': '\u042c',
1861 'softcy;': '\u044c',
1864 'solbar;': '\u233f',
1865 'Sopf;': '\U0001d54a',
1866 'sopf;': '\U0001d564',
1867 'spades;': '\u2660',
1868 'spadesuit;': '\u2660',
1871 'sqcaps;': '\u2293\ufe00',
1873 'sqcups;': '\u2294\ufe00',
1876 'sqsube;': '\u2291',
1877 'sqsubset;': '\u228f',
1878 'sqsubseteq;': '\u2291',
1880 'sqsupe;': '\u2292',
1881 'sqsupset;': '\u2290',
1882 'sqsupseteq;': '\u2292',
1884 'Square;': '\u25a1',
1885 'square;': '\u25a1',
1886 'SquareIntersection;': '\u2293',
1887 'SquareSubset;': '\u228f',
1888 'SquareSubsetEqual;': '\u2291',
1889 'SquareSuperset;': '\u2290',
1890 'SquareSupersetEqual;': '\u2292',
1891 'SquareUnion;': '\u2294',
1892 'squarf;': '\u25aa',
1895 'Sscr;': '\U0001d4ae',
1896 'sscr;': '\U0001d4c8',
1897 'ssetmn;': '\u2216',
1898 'ssmile;': '\u2323',
1899 'sstarf;': '\u22c6',
1903 'straightepsilon;': '\u03f5',
1904 'straightphi;': '\u03d5',
1908 'subdot;': '\u2abd',
1911 'subedot;': '\u2ac3',
1912 'submult;': '\u2ac1',
1915 'subplus;': '\u2abf',
1916 'subrarr;': '\u2979',
1917 'Subset;': '\u22d0',
1918 'subset;': '\u2282',
1919 'subseteq;': '\u2286',
1920 'subseteqq;': '\u2ac5',
1921 'SubsetEqual;': '\u2286',
1922 'subsetneq;': '\u228a',
1923 'subsetneqq;': '\u2acb',
1924 'subsim;': '\u2ac7',
1925 'subsub;': '\u2ad5',
1926 'subsup;': '\u2ad3',
1928 'succapprox;': '\u2ab8',
1929 'succcurlyeq;': '\u227d',
1930 'Succeeds;': '\u227b',
1931 'SucceedsEqual;': '\u2ab0',
1932 'SucceedsSlantEqual;': '\u227d',
1933 'SucceedsTilde;': '\u227f',
1934 'succeq;': '\u2ab0',
1935 'succnapprox;': '\u2aba',
1936 'succneqq;': '\u2ab6',
1937 'succnsim;': '\u22e9',
1938 'succsim;': '\u227f',
1939 'SuchThat;': '\u220b',
1951 'supdot;': '\u2abe',
1952 'supdsub;': '\u2ad8',
1955 'supedot;': '\u2ac4',
1956 'Superset;': '\u2283',
1957 'SupersetEqual;': '\u2287',
1958 'suphsol;': '\u27c9',
1959 'suphsub;': '\u2ad7',
1960 'suplarr;': '\u297b',
1961 'supmult;': '\u2ac2',
1964 'supplus;': '\u2ac0',
1965 'Supset;': '\u22d1',
1966 'supset;': '\u2283',
1967 'supseteq;': '\u2287',
1968 'supseteqq;': '\u2ac6',
1969 'supsetneq;': '\u228b',
1970 'supsetneqq;': '\u2acc',
1971 'supsim;': '\u2ac8',
1972 'supsub;': '\u2ad4',
1973 'supsup;': '\u2ad6',
1974 'swarhk;': '\u2926',
1977 'swarrow;': '\u2199',
1978 'swnwar;': '\u292a',
1982 'target;': '\u2316',
1986 'Tcaron;': '\u0164',
1987 'tcaron;': '\u0165',
1988 'Tcedil;': '\u0162',
1989 'tcedil;': '\u0163',
1993 'telrec;': '\u2315',
1994 'Tfr;': '\U0001d517',
1995 'tfr;': '\U0001d531',
1996 'there4;': '\u2234',
1997 'Therefore;': '\u2234',
1998 'therefore;': '\u2234',
2001 'thetasym;': '\u03d1',
2002 'thetav;': '\u03d1',
2003 'thickapprox;': '\u2248',
2004 'thicksim;': '\u223c',
2005 'ThickSpace;': '\u205f\u200a',
2006 'thinsp;': '\u2009',
2007 'ThinSpace;': '\u2009',
2009 'thksim;': '\u223c',
2016 'TildeEqual;': '\u2243',
2017 'TildeFullEqual;': '\u2245',
2018 'TildeTilde;': '\u2248',
2021 'timesb;': '\u22a0',
2022 'timesbar;': '\u2a31',
2023 'timesd;': '\u2a30',
2027 'topbot;': '\u2336',
2028 'topcir;': '\u2af1',
2029 'Topf;': '\U0001d54b',
2030 'topf;': '\U0001d565',
2031 'topfork;': '\u2ada',
2033 'tprime;': '\u2034',
2036 'triangle;': '\u25b5',
2037 'triangledown;': '\u25bf',
2038 'triangleleft;': '\u25c3',
2039 'trianglelefteq;': '\u22b4',
2040 'triangleq;': '\u225c',
2041 'triangleright;': '\u25b9',
2042 'trianglerighteq;': '\u22b5',
2043 'tridot;': '\u25ec',
2045 'triminus;': '\u2a3a',
2046 'TripleDot;': '\u20db',
2047 'triplus;': '\u2a39',
2049 'tritime;': '\u2a3b',
2050 'trpezium;': '\u23e2',
2051 'Tscr;': '\U0001d4af',
2052 'tscr;': '\U0001d4c9',
2057 'Tstrok;': '\u0166',
2058 'tstrok;': '\u0167',
2060 'twoheadleftarrow;': '\u219e',
2061 'twoheadrightarrow;': '\u21a0',
2069 'Uarrocir;': '\u2949',
2072 'Ubreve;': '\u016c',
2073 'ubreve;': '\u016d',
2081 'Udblac;': '\u0170',
2082 'udblac;': '\u0171',
2084 'ufisht;': '\u297e',
2085 'Ufr;': '\U0001d518',
2086 'ufr;': '\U0001d532',
2095 'ulcorn;': '\u231c',
2096 'ulcorner;': '\u231c',
2097 'ulcrop;': '\u230f',
2104 'UnderBrace;': '\u23df',
2105 'UnderBracket;': '\u23b5',
2106 'UnderParenthesis;': '\u23dd',
2108 'UnionPlus;': '\u228e',
2111 'Uopf;': '\U0001d54c',
2112 'uopf;': '\U0001d566',
2113 'UpArrow;': '\u2191',
2114 'Uparrow;': '\u21d1',
2115 'uparrow;': '\u2191',
2116 'UpArrowBar;': '\u2912',
2117 'UpArrowDownArrow;': '\u21c5',
2118 'UpDownArrow;': '\u2195',
2119 'Updownarrow;': '\u21d5',
2120 'updownarrow;': '\u2195',
2121 'UpEquilibrium;': '\u296e',
2122 'upharpoonleft;': '\u21bf',
2123 'upharpoonright;': '\u21be',
2125 'UpperLeftArrow;': '\u2196',
2126 'UpperRightArrow;': '\u2197',
2130 'Upsilon;': '\u03a5',
2131 'upsilon;': '\u03c5',
2133 'UpTeeArrow;': '\u21a5',
2134 'upuparrows;': '\u21c8',
2135 'urcorn;': '\u231d',
2136 'urcorner;': '\u231d',
2137 'urcrop;': '\u230e',
2141 'Uscr;': '\U0001d4b0',
2142 'uscr;': '\U0001d4ca',
2144 'Utilde;': '\u0168',
2145 'utilde;': '\u0169',
2153 'uwangle;': '\u29a7',
2154 'vangrt;': '\u299c',
2155 'varepsilon;': '\u03f5',
2156 'varkappa;': '\u03f0',
2157 'varnothing;': '\u2205',
2158 'varphi;': '\u03d5',
2160 'varpropto;': '\u221d',
2163 'varrho;': '\u03f1',
2164 'varsigma;': '\u03c2',
2165 'varsubsetneq;': '\u228a\ufe00',
2166 'varsubsetneqq;': '\u2acb\ufe00',
2167 'varsupsetneq;': '\u228b\ufe00',
2168 'varsupsetneqq;': '\u2acc\ufe00',
2169 'vartheta;': '\u03d1',
2170 'vartriangleleft;': '\u22b2',
2171 'vartriangleright;': '\u22b3',
2181 'Vdashl;': '\u2ae6',
2184 'veebar;': '\u22bb',
2186 'vellip;': '\u22ee',
2187 'Verbar;': '\u2016',
2191 'VerticalBar;': '\u2223',
2192 'VerticalLine;': '|',
2193 'VerticalSeparator;': '\u2758',
2194 'VerticalTilde;': '\u2240',
2195 'VeryThinSpace;': '\u200a',
2196 'Vfr;': '\U0001d519',
2197 'vfr;': '\U0001d533',
2199 'vnsub;': '\u2282\u20d2',
2200 'vnsup;': '\u2283\u20d2',
2201 'Vopf;': '\U0001d54d',
2202 'vopf;': '\U0001d567',
2205 'Vscr;': '\U0001d4b1',
2206 'vscr;': '\U0001d4cb',
2207 'vsubnE;': '\u2acb\ufe00',
2208 'vsubne;': '\u228a\ufe00',
2209 'vsupnE;': '\u2acc\ufe00',
2210 'vsupne;': '\u228b\ufe00',
2211 'Vvdash;': '\u22aa',
2212 'vzigzag;': '\u299a',
2215 'wedbar;': '\u2a5f',
2218 'wedgeq;': '\u2259',
2219 'weierp;': '\u2118',
2220 'Wfr;': '\U0001d51a',
2221 'wfr;': '\U0001d534',
2222 'Wopf;': '\U0001d54e',
2223 'wopf;': '\U0001d568',
2226 'wreath;': '\u2240',
2227 'Wscr;': '\U0001d4b2',
2228 'wscr;': '\U0001d4cc',
2233 'Xfr;': '\U0001d51b',
2234 'xfr;': '\U0001d535',
2244 'Xopf;': '\U0001d54f',
2245 'xopf;': '\U0001d569',
2246 'xoplus;': '\u2a01',
2247 'xotime;': '\u2a02',
2250 'Xscr;': '\U0001d4b3',
2251 'xscr;': '\U0001d4cd',
2252 'xsqcup;': '\u2a06',
2253 'xuplus;': '\u2a04',
2256 'xwedge;': '\u22c0',
2269 'Yfr;': '\U0001d51c',
2270 'yfr;': '\U0001d536',
2273 'Yopf;': '\U0001d550',
2274 'yopf;': '\U0001d56a',
2275 'Yscr;': '\U0001d4b4',
2276 'yscr;': '\U0001d4ce',
2282 'Zacute;': '\u0179',
2283 'zacute;': '\u017a',
2284 'Zcaron;': '\u017d',
2285 'zcaron;': '\u017e',
2290 'zeetrf;': '\u2128',
2291 'ZeroWidthSpace;': '\u200b',
2295 'zfr;': '\U0001d537',
2298 'zigrarr;': '\u21dd',
2300 'zopf;': '\U0001d56b',
2301 'Zscr;': '\U0001d4b5',
2302 'zscr;': '\U0001d4cf',
2308 import http.client as compat_http_client
2309 except ImportError: # Python 2
2310 import httplib as compat_http_client
2313 from urllib.error import HTTPError as compat_HTTPError
2314 except ImportError: # Python 2
2315 from urllib2 import HTTPError as compat_HTTPError
2318 from urllib.request import urlretrieve as compat_urlretrieve
2319 except ImportError: # Python 2
2320 from urllib import urlretrieve as compat_urlretrieve
2323 from html.parser import HTMLParser as compat_HTMLParser
2324 except ImportError: # Python 2
2325 from HTMLParser import HTMLParser as compat_HTMLParser
2328 from HTMLParser import HTMLParseError as compat_HTMLParseError
2329 except ImportError: # Python <3.4
2331 from html.parser import HTMLParseError as compat_HTMLParseError
2332 except ImportError: # Python >3.4
2334 # HTMLParseError has been deprecated in Python 3.3 and removed in
2335 # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
2336 # and uniform cross-version exceptiong handling
2337 class compat_HTMLParseError(Exception):
2341 from subprocess import DEVNULL
2342 compat_subprocess_get_DEVNULL = lambda: DEVNULL
2344 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
2347 import http.server as compat_http_server
2349 import BaseHTTPServer as compat_http_server
2352 compat_str = unicode # Python 2
2357 from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
2358 from urllib.parse import unquote as compat_urllib_parse_unquote
2359 from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
2360 except ImportError: # Python 2
2361 _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
2362 else re.compile(r'([\x00-\x7f]+)'))
2364 # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
2365 # implementations from cpython 3.4.3's stdlib. Python 2's version
2366 # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
2368 def compat_urllib_parse_unquote_to_bytes(string):
2369 """unquote_to_bytes('abc%20def') -> b'abc def'."""
2370 # Note: strings are encoded as UTF-8. This is only an issue if it contains
2371 # unescaped non-ASCII characters, which URIs should not.
2373 # Is it a string-like object?
2376 if isinstance(string, compat_str):
2377 string = string.encode('utf-8')
2378 bits = string.split(b'%')
2383 for item in bits[1:]:
2385 append(compat_urllib_parse._hextochr[item[:2]])
2390 return b''.join(res)
2392 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
2393 """Replace %xx escapes by their single-character equivalent. The optional
2394 encoding and errors parameters specify how to decode percent-encoded
2395 sequences into Unicode characters, as accepted by the bytes.decode()
2397 By default, percent-encoded sequences are decoded with UTF-8, and invalid
2398 sequences are replaced by a placeholder character.
2400 unquote('abc%20def') -> 'abc def'.
2402 if '%' not in string:
2405 if encoding is None:
2409 bits = _asciire.split(string)
2412 for i in range(1, len(bits), 2):
2413 append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
2417 def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
2418 """Like unquote(), but also replace plus signs by spaces, as required for
2419 unquoting HTML form values.
2421 unquote_plus('%7e/abc+def') -> '~/abc def'
2423 string = string.replace('+', ' ')
2424 return compat_urllib_parse_unquote(string, encoding, errors)
2427 from urllib.parse import urlencode as compat_urllib_parse_urlencode
2428 except ImportError: # Python 2
2429 # Python 2 will choke in urlencode on mixture of byte and unicode strings.
2430 # Possible solutions are to either port it from python 3 with all
2431 # the friends or manually ensure input query contains only byte strings.
2432 # We will stick with latter thus recursively encoding the whole query.
2433 def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
2435 if isinstance(e, dict):
2437 elif isinstance(e, (list, tuple,)):
2438 list_e = encode_list(e)
2439 e = tuple(list_e) if isinstance(e, tuple) else list_e
2440 elif isinstance(e, compat_str):
2441 e = e.encode(encoding)
2445 return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
2448 return [encode_elem(e) for e in l]
2450 return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
2453 from urllib.request import DataHandler as compat_urllib_request_DataHandler
2454 except ImportError: # Python < 3.4
2455 # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
2456 class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
2457 def data_open(self, req):
2458 # data URLs as specified in RFC 2397.
2460 # ignores POSTed data
2463 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2464 # mediatype := [ type "/" subtype ] *( ";" parameter )
2466 # parameter := attribute "=" value
2467 url = req.get_full_url()
2469 scheme, data = url.split(':', 1)
2470 mediatype, data = data.split(',', 1)
2472 # even base64 encoded data URLs might be quoted so unquote in any case:
2473 data = compat_urllib_parse_unquote_to_bytes(data)
2474 if mediatype.endswith(';base64'):
2475 data = binascii.a2b_base64(data)
2476 mediatype = mediatype[:-7]
2479 mediatype = 'text/plain;charset=US-ASCII'
2481 headers = email.message_from_string(
2482 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
2484 return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
2487 compat_basestring = basestring # Python 2
2489 compat_basestring = str
2492 compat_chr = unichr # Python 2
2497 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
2498 except ImportError: # Python 2.6
2499 from xml.parsers.expat import ExpatError as compat_xml_parse_error
2502 etree = xml.etree.ElementTree
2505 class _TreeBuilder(etree.TreeBuilder):
2506 def doctype(self, name, pubid, system):
2510 if sys.version_info[0] >= 3:
2511 def compat_etree_fromstring(text):
2512 return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
2514 # python 2.x tries to encode unicode strings with ascii (see the
2515 # XMLParser._fixtext method)
2517 _etree_iter = etree.Element.iter
2518 except AttributeError: # Python <=2.6
2519 def _etree_iter(root):
2520 for el in root.findall('*'):
2522 for sub in _etree_iter(el):
2525 # on 2.6 XML doesn't have a parser argument, function copied from CPython
2527 def _XML(text, parser=None):
2529 parser = etree.XMLParser(target=_TreeBuilder())
2531 return parser.close()
2533 def _element_factory(*args, **kwargs):
2534 el = etree.Element(*args, **kwargs)
2535 for k, v in el.items():
2536 if isinstance(v, bytes):
2537 el.set(k, v.decode('utf-8'))
2540 def compat_etree_fromstring(text):
2541 doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
2542 for el in _etree_iter(doc):
2543 if el.text is not None and isinstance(el.text, bytes):
2544 el.text = el.text.decode('utf-8')
2547 if hasattr(etree, 'register_namespace'):
2548 compat_etree_register_namespace = etree.register_namespace
2550 def compat_etree_register_namespace(prefix, uri):
2551 """Register a namespace prefix.
2552 The registry is global, and any existing mapping for either the
2553 given prefix or the namespace URI will be removed.
2554 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
2555 attributes in this namespace will be serialized with prefix if possible.
2556 ValueError is raised if prefix is reserved or is invalid.
2558 if re.match(r"ns\d+$", prefix):
2559 raise ValueError("Prefix format reserved for internal use")
2560 for k, v in list(etree._namespace_map.items()):
2561 if k == uri or v == prefix:
2562 del etree._namespace_map[k]
2563 etree._namespace_map[uri] = prefix
2565 if sys.version_info < (2, 7):
2566 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
2567 # .//node does not match if a node is a direct child of . !
2568 def compat_xpath(xpath):
2569 if isinstance(xpath, compat_str):
2570 xpath = xpath.encode('ascii')
2573 compat_xpath = lambda xpath: xpath
2576 from urllib.parse import parse_qs as compat_parse_qs
2577 except ImportError: # Python 2
2578 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
2579 # Python 2's version is apparently totally broken
2581 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
2582 encoding='utf-8', errors='replace'):
2583 qs, _coerce_result = qs, compat_str
2584 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
2586 for name_value in pairs:
2587 if not name_value and not strict_parsing:
2589 nv = name_value.split('=', 1)
2592 raise ValueError('bad query field: %r' % (name_value,))
2593 # Handle case of a control-name with no equal sign
2594 if keep_blank_values:
2598 if len(nv[1]) or keep_blank_values:
2599 name = nv[0].replace('+', ' ')
2600 name = compat_urllib_parse_unquote(
2601 name, encoding=encoding, errors=errors)
2602 name = _coerce_result(name)
2603 value = nv[1].replace('+', ' ')
2604 value = compat_urllib_parse_unquote(
2605 value, encoding=encoding, errors=errors)
2606 value = _coerce_result(value)
2607 r.append((name, value))
2610 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
2611 encoding='utf-8', errors='replace'):
2613 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
2614 encoding=encoding, errors=errors)
2615 for name, value in pairs:
2616 if name in parsed_result:
2617 parsed_result[name].append(value)
2619 parsed_result[name] = [value]
2620 return parsed_result
2623 compat_os_name = os._name if os.name == 'java' else os.name
2626 if compat_os_name == 'nt':
2627 def compat_shlex_quote(s):
2628 return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
2631 from shlex import quote as compat_shlex_quote
2632 except ImportError: # Python < 3.3
2633 def compat_shlex_quote(s):
2634 if re.match(r'^[-_\w./]+$', s):
2637 return "'" + s.replace("'", "'\"'\"'") + "'"
2641 args = shlex.split('ä¸æ–‡')
2642 assert (isinstance(args, list) and
2643 isinstance(args[0], compat_str) and
2644 args[0] == 'ä¸æ–‡')
2645 compat_shlex_split = shlex.split
2646 except (AssertionError, UnicodeEncodeError):
2647 # Working around shlex issue with unicode strings on some python 2
2648 # versions (see http://bugs.python.org/issue1548891)
2649 def compat_shlex_split(s, comments=False, posix=True):
2650 if isinstance(s, compat_str):
2651 s = s.encode('utf-8')
2652 return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
2662 if sys.version_info >= (3, 0):
2663 compat_getenv = os.getenv
2664 compat_expanduser = os.path.expanduser
2666 def compat_setenv(key, value, env=os.environ):
2669 # Environment variables should be decoded with filesystem encoding.
2670 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
2672 def compat_getenv(key, default=None):
2673 from .utils import get_filesystem_encoding
2674 env = os.getenv(key, default)
2676 env = env.decode(get_filesystem_encoding())
2679 def compat_setenv(key, value, env=os.environ):
2681 from .utils import get_filesystem_encoding
2682 return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
2683 env[encode(key)] = encode(value)
2685 # HACK: The default implementations of os.path.expanduser from cpython do not decode
2686 # environment variables with filesystem encoding. We will work around this by
2687 # providing adjusted implementations.
2688 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
2689 # for different platforms with correct environment variables decoding.
2691 if compat_os_name == 'posix':
2692 def compat_expanduser(path):
2693 """Expand ~ and ~user constructions. If user or $HOME is unknown,
2695 if not path.startswith('~'):
2697 i = path.find('/', 1)
2701 if 'HOME' not in os.environ:
2703 userhome = pwd.getpwuid(os.getuid()).pw_dir
2705 userhome = compat_getenv('HOME')
2709 pwent = pwd.getpwnam(path[1:i])
2712 userhome = pwent.pw_dir
2713 userhome = userhome.rstrip('/')
2714 return (userhome + path[i:]) or '/'
2715 elif compat_os_name in ('nt', 'ce'):
2716 def compat_expanduser(path):
2717 """Expand ~ and ~user constructs.
2719 If user or $HOME is unknown, do nothing."""
2723 while i < n and path[i] not in '/\\':
2726 if 'HOME' in os.environ:
2727 userhome = compat_getenv('HOME')
2728 elif 'USERPROFILE' in os.environ:
2729 userhome = compat_getenv('USERPROFILE')
2730 elif 'HOMEPATH' not in os.environ:
2734 drive = compat_getenv('HOMEDRIVE')
2737 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
2740 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
2742 return userhome + path[i:]
2744 compat_expanduser = os.path.expanduser
2747 if sys.version_info < (3, 0):
2748 def compat_print(s):
2749 from .utils import preferredencoding
2750 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
2752 def compat_print(s):
2753 assert isinstance(s, compat_str)
2757 if sys.version_info < (3, 0) and sys.platform == 'win32':
2758 def compat_getpass(prompt, *args, **kwargs):
2759 if isinstance(prompt, compat_str):
2760 from .utils import preferredencoding
2761 prompt = prompt.encode(preferredencoding())
2762 return getpass.getpass(prompt, *args, **kwargs)
2764 compat_getpass = getpass.getpass
2767 compat_input = raw_input
2768 except NameError: # Python 3
2769 compat_input = input
2771 # Python < 2.6.5 require kwargs to be bytes
2775 _testfunc(**{'x': 0})
2777 def compat_kwargs(kwargs):
2778 return dict((bytes(k), v) for k, v in kwargs.items())
2780 compat_kwargs = lambda kwargs: kwargs
2784 compat_numeric_types = (int, float, long, complex)
2785 except NameError: # Python 3
2786 compat_numeric_types = (int, float, complex)
2789 if sys.version_info < (2, 7):
2790 def compat_socket_create_connection(address, timeout, source_address=None):
2791 host, port = address
2793 for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
2794 af, socktype, proto, canonname, sa = res
2797 sock = socket.socket(af, socktype, proto)
2798 sock.settimeout(timeout)
2800 sock.bind(source_address)
2803 except socket.error as _:
2805 if sock is not None:
2810 raise socket.error('getaddrinfo returns an empty list')
2812 compat_socket_create_connection = socket.create_connection
2815 # Fix https://github.com/rg3/youtube-dl/issues/4223
2816 # See http://bugs.python.org/issue9161 for what is broken
2817 def workaround_optparse_bug9161():
2818 op = optparse.OptionParser()
2819 og = optparse.OptionGroup(op, 'foo')
2823 real_add_option = optparse.OptionGroup.add_option
2825 def _compat_add_option(self, *args, **kwargs):
2827 v.encode('ascii', 'replace') if isinstance(v, compat_str)
2829 bargs = [enc(a) for a in args]
2831 (k, enc(v)) for k, v in kwargs.items())
2832 return real_add_option(self, *bargs, **bkwargs)
2833 optparse.OptionGroup.add_option = _compat_add_option
2836 if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
2837 compat_get_terminal_size = shutil.get_terminal_size
2839 _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
2841 def compat_get_terminal_size(fallback=(80, 24)):
2842 columns = compat_getenv('COLUMNS')
2844 columns = int(columns)
2847 lines = compat_getenv('LINES')
2853 if columns is None or lines is None or columns <= 0 or lines <= 0:
2855 sp = subprocess.Popen(
2857 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
2858 out, err = sp.communicate()
2859 _lines, _columns = map(int, out.split())
2861 _columns, _lines = _terminal_size(*fallback)
2863 if columns is None or columns <= 0:
2865 if lines is None or lines <= 0:
2867 return _terminal_size(columns, lines)
2870 itertools.count(start=0, step=1)
2871 compat_itertools_count = itertools.count
2872 except TypeError: # Python 2.6
2873 def compat_itertools_count(start=0, step=1):
2879 if sys.version_info >= (3, 0):
2880 from tokenize import tokenize as compat_tokenize_tokenize
2882 from tokenize import generate_tokens as compat_tokenize_tokenize
2886 struct.pack('!I', 0)
2888 # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
2889 # See https://bugs.python.org/issue19099
2890 def compat_struct_pack(spec, *args):
2891 if isinstance(spec, compat_str):
2892 spec = spec.encode('ascii')
2893 return struct.pack(spec, *args)
2895 def compat_struct_unpack(spec, *args):
2896 if isinstance(spec, compat_str):
2897 spec = spec.encode('ascii')
2898 return struct.unpack(spec, *args)
2900 compat_struct_pack = struct.pack
2901 compat_struct_unpack = struct.unpack
2904 from future_builtins import zip as compat_zip
2905 except ImportError: # not 2.6+ or is 3.x
2907 from itertools import izip as compat_zip # < 2.5 or 3.x
2911 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
2912 # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
2913 # names, see the original PyPy issue [1] and the youtube-dl one [2].
2914 # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
2915 # 2. https://github.com/rg3/youtube-dl/pull/4392
2916 def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
2917 real = ctypes.WINFUNCTYPE(*args, **kwargs)
2919 def resf(tpl, *args, **kwargs):
2921 return real((str(funcname), dll), *args, **kwargs)
2925 def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
2926 return ctypes.WINFUNCTYPE(*args, **kwargs)
2930 'compat_HTMLParseError',
2931 'compat_HTMLParser',
2933 'compat_basestring',
2937 'compat_ctypes_WINFUNCTYPE',
2938 'compat_etree_fromstring',
2939 'compat_etree_register_namespace',
2940 'compat_expanduser',
2941 'compat_get_terminal_size',
2944 'compat_html_entities',
2945 'compat_html_entities_html5',
2946 'compat_http_client',
2947 'compat_http_server',
2949 'compat_itertools_count',
2951 'compat_numeric_types',
2957 'compat_shlex_quote',
2958 'compat_shlex_split',
2959 'compat_socket_create_connection',
2961 'compat_struct_pack',
2962 'compat_struct_unpack',
2963 'compat_subprocess_get_DEVNULL',
2964 'compat_tokenize_tokenize',
2965 'compat_urllib_error',
2966 'compat_urllib_parse',
2967 'compat_urllib_parse_unquote',
2968 'compat_urllib_parse_unquote_plus',
2969 'compat_urllib_parse_unquote_to_bytes',
2970 'compat_urllib_parse_urlencode',
2971 'compat_urllib_parse_urlparse',
2972 'compat_urllib_request',
2973 'compat_urllib_request_DataHandler',
2974 'compat_urllib_response',
2976 'compat_urlretrieve',
2977 'compat_xml_parse_error',
2980 'workaround_optparse_bug9161',