summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin O'Connor <kevin.oconnor7@gmail.com>2020-10-17 13:10:41 -0400
committerGitHub <noreply@github.com>2020-10-18 00:10:41 +0700
commit4eda10499e8db831167062b0e0dbc7d10d34c1f9 (patch)
treec0afe01daf906bd9b7f0568eb9cce49e24f15d45
parent605535776a8d5beba78b4d1b057d5206ddd969eb (diff)
[utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
The current logic in `js_to_json` tries to rewrite octal/hex numbers to decimal. However, when the logic actually happens the `"` or `'` have already been trimmed off. This causes what were originally strings, that happen to look like octal/hex numbers, to get rewritten to decimal and returned as a number rather than a string. In practive something like: ```js { "0x40": "foo", "040": "bar", } ``` would get rewritten as: ```json { 64: "foo", 32: "bar } ``` This is problematic since this isn't valid JSON as you cannot have non-string keys.
-rw-r--r--test/test_utils.py6
-rw-r--r--youtube_dl/utils.py12
2 files changed, 12 insertions, 6 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 962fd8d75..c2d1e4fb1 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{42:4.2e1}')
self.assertEqual(json.loads(on), {'42': 42.0})
+ on = js_to_json('{ "0x40": "0x40" }')
+ self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+ on = js_to_json('{ "040": "040" }')
+ self.assertEqual(json.loads(on), {'040': '040'})
+
def test_js_to_json_malformed(self):
self.assertEqual(js_to_json('42a1'), '42"a1"')
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 01d9c0362..737e2810e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4088,12 +4088,12 @@ def js_to_json(code):
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
-
- for regex, base in INTEGER_TABLE:
- im = re.match(regex, v)
- if im:
- i = int(im.group(1), base)
- return '"%d":' % i if v.endswith(':') else '%d' % i
+ else:
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return '"%d":' % i if v.endswith(':') else '%d' % i
return '"%s"' % v