Skip to content

Commit 91dbba8

Browse files
committed
Scrub body_data params too (e.g. POSTed JSON)
If we have `$c->req->body_data` - for e.g. the request was a POST with a JSON body which Catalyst has decoded into `$c->req->body_data` - then scrub HTML in there too (but applying the same `ignore_params` checks so that you can exempt certain JSON body params from scrubbing). Also moved the ignore_params tests into t/03_params.t, and added the tests for this new feature there too - don't need so many individual test apps, when most features can be tested with a single test app.
1 parent 6ac9a8e commit 91dbba8

File tree

6 files changed

+149
-119
lines changed

6 files changed

+149
-119
lines changed

lib/Catalyst/Plugin/HTML/Scrubber.pm

Lines changed: 69 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
package Catalyst::Plugin::HTML::Scrubber;
2-
32
use Moose;
43
use namespace::autoclean;
54

@@ -47,33 +46,80 @@ sub prepare_parameters {
4746
sub html_scrub {
4847
my ($c, $conf) = @_;
4948

50-
param:
51-
for my $param (keys %{ $c->request->{parameters} }) {
52-
#while (my ($param, $value) = each %{ $c->request->{parameters} }) {
53-
my $value = \$c->request->{parameters}{$param};
54-
if (ref $$value && ref $$value ne 'ARRAY') {
55-
next param;
56-
}
49+
# If there's body_data - for e.g. a POSTed JSON body that was decoded -
50+
# then we need to walk through it, scrubbing as appropriate
51+
if (my $body_data = $c->request->body_data) {
52+
$c->_scrub_recurse($conf, $c->request->body_data);
53+
}
54+
55+
# Normal query/POST body parameters:
56+
$c->_scrub_recurse($conf, $c->request->parameters);
57+
58+
}
59+
60+
# Recursively scrub param values...
61+
sub _scrub_recurse {
62+
my ($c, $conf, $data) = @_;
63+
64+
# If the thing we've got is a hashref, walk over its keys, checking
65+
# whether we should ignore, otherwise, do the needful
66+
if (ref $data eq 'HASH') {
67+
for my $key (keys %$data) {
68+
if (!$c->_should_scrub_param($conf, $key)) {
69+
next;
70+
}
5771

58-
# If we only want to operate on certain params, do that checking
59-
# now...
60-
if ($conf && $conf->{ignore_params}) {
61-
my $ignore_params = $c->config->{scrubber}{ignore_params};
62-
if (ref $ignore_params ne 'ARRAY') {
63-
$ignore_params = [ $ignore_params ];
72+
# OK, it's fine to fettle with this key - if its value is
73+
# a ref, recurse, otherwise, scrub
74+
if (my $ref = ref $data->{$key}) {
75+
$c->_scrub_recurse($conf, $data->{$key});
76+
} else {
77+
# Alright, non-ref value, so scrub it
78+
# FIXME why did we have to have this ref-ref handling fun?
79+
#$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
80+
$data->{$key} = $c->_scrubber->scrub($data->{$key});
6481
}
65-
for my $ignore_param (@$ignore_params) {
66-
if (ref $ignore_param eq 'Regexp') {
67-
next param if $param =~ $ignore_param;
68-
} else {
69-
next param if $param eq $ignore_param;
70-
}
82+
}
83+
} elsif (ref $data eq 'ARRAY') {
84+
# Simple - scrub all the values
85+
$_ = $c->_scrubber->scrub($_) for @$data;
86+
for (@$data) {
87+
if (ref $_) {
88+
$c->_scrub_recurse($conf, $_);
89+
} else {
90+
$_ = $c->_scrubber->scrub($_);
7191
}
72-
}
92+
}
93+
} elsif (ref $data eq 'CODE') {
94+
$c->log->debug("Can't scrub a coderef!");
95+
} else {
96+
# This shouldn't happen, as we should always start with a ref,
97+
# and non-ref hash/array values should have been handled above.
98+
$c->log->debug("Non-ref to scrub - should this happen?");
99+
}
100+
}
73101

74-
# If we're still here, we want to scrub this param's value.
75-
$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
102+
sub _should_scrub_param {
103+
my ($c, $conf, $param) = @_;
104+
# If we only want to operate on certain params, do that checking
105+
# now...
106+
if ($conf && $conf->{ignore_params}) {
107+
my $ignore_params = $c->config->{scrubber}{ignore_params};
108+
if (ref $ignore_params ne 'ARRAY') {
109+
$ignore_params = [ $ignore_params ];
110+
}
111+
for my $ignore_param (@$ignore_params) {
112+
if (ref $ignore_param eq 'Regexp') {
113+
return if $param =~ $ignore_param;
114+
} else {
115+
return if $param eq $ignore_param;
116+
}
117+
}
76118
}
119+
120+
# If we've not bailed above, we didn't match any ignore_params
121+
# entries, or didn't have any, so we do want to scrub
122+
return 1;
77123
}
78124

79125
__PACKAGE__->meta->make_immutable;

t/03_params.t

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,86 @@ use Test::More;
1919
my $req = POST('/', [foo => 'bar']);
2020
my ($res, $c) = ctx_request($req);
2121
ok($res->code == RC_OK, 'response ok');
22-
is($c->req->param('foo'), 'bar', 'parameter ok');
22+
is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone');
2323
}
2424
{
2525
my $req = POST('/', [foo => 'bar<script>alert("0");</script>']);
2626
my ($res, $c) = ctx_request($req);
2727
ok($res->code == RC_OK, 'response ok');
28-
is($c->req->param('foo'), 'bar');
28+
is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param');
2929
}
3030
{
31+
# we allow <b> in the test app config so this should not be stripped
3132
my $req = POST('/', [foo => '<b>bar</b>']);
3233
my ($res, $c) = ctx_request($req);
3334
ok($res->code == RC_OK, 'response ok');
34-
is($c->req->param('foo'), '<b>bar</b>', 'parameter ok');
35+
is($c->req->param('foo'), '<b>bar</b>', 'Allowed tag not stripped');
36+
}
37+
{
38+
diag "HTML left alone in ignored field - by regex match";
39+
my $value = '<h1>Bar</h1><p>Foo</p>';
40+
my $req = POST('/', [foo_html => $value]);
41+
my ($res, $c) = ctx_request($req);
42+
ok($res->code == RC_OK, 'response ok');
43+
is(
44+
$c->req->param('foo_html'),
45+
$value,
46+
'HTML left alone in ignored (by regex) field',
47+
);
48+
}
49+
{
50+
diag "HTML left alone in ignored field - by name";
51+
my $value = '<h1>Bar</h1><p>Foo</p>';
52+
my $req = POST('/', [ignored_param => $value]);
53+
my ($res, $c) = ctx_request($req);
54+
ok($res->code == RC_OK, 'response ok');
55+
is(
56+
$c->req->param('ignored_param'),
57+
$value,
58+
'HTML left alone in ignored (by name) field',
59+
);
60+
}
61+
62+
{
63+
# Test that data in a JSON body POSTed gets scrubbed too
64+
my $json_body = <<JSON;
65+
{
66+
"foo": "Top-level <img src=foo.jpg title=fun>",
67+
"baz":{
68+
"one":"Second-level <img src=test.jpg>"
69+
},
70+
"arr": [
71+
"one test <img src=arrtest1.jpg>",
72+
"two <script>window.alert('XSS!');</script>"
73+
],
74+
"some_html": "Leave <b>this</b> alone: <img src=allowed.gif>"
75+
}
76+
JSON
77+
my $req = POST('/',
78+
Content_Type => 'application/json', Content => $json_body
79+
);
80+
my ($res, $c) = ctx_request($req);
81+
ok($res->code == RC_OK, 'response ok');
82+
is(
83+
$c->req->body_data->{foo},
84+
'Top-level ', # note trailing space where img was removed
85+
'Top level body param scrubbed',
86+
);
87+
is(
88+
$c->req->body_data->{baz}{one},
89+
'Second-level ',
90+
'Second level body param scrubbed',
91+
);
92+
is(
93+
$c->req->body_data->{arr}[0],
94+
'one test ',
95+
'Second level array contents scrubbbed',
96+
);
97+
is(
98+
$c->req->body_data->{some_html},
99+
'Leave <b>this</b> alone: <img src=allowed.gif>',
100+
'Body data param matching ignore_params left alone',
101+
);
35102
}
36103

37104
done_testing();

t/05_ignore_params.t

Lines changed: 0 additions & 53 deletions
This file was deleted.

t/lib/MyApp03.pm

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,17 @@ extends 'Catalyst';
99

1010
__PACKAGE__->config(
1111
name => 'MyApp03',
12-
scrubber => [allow => [qw/br hr b/],]
12+
scrubber => {
1313

14+
auto => 1,
15+
16+
ignore_params => [ qr/_html$/, 'ignored_param' ],
17+
18+
# params for HTML::Scrubber
19+
params => [
20+
allow => [qw/br hr b/],
21+
],
22+
}
1423
);
1524
__PACKAGE__->setup();
1625

t/lib/MyApp05.pm

Lines changed: 0 additions & 22 deletions
This file was deleted.

t/lib/MyApp05/Controller/Root.pm

Lines changed: 0 additions & 17 deletions
This file was deleted.

0 commit comments

Comments
 (0)