Data Validation

Paul Seamons

Perl Projects

What is taint?

Top Reasons to use taint

Top reasons why not to use taint

Removing all excuses (but one)

Q: Why don't you use taint?
A: I don't right now - why start?

Rebuttal: Um - because there is no reason not too.
A: Yes there is ...

Removing all excuses (but one)

Q: Why don't you use taint?
A: I'm already careful with my data

Rebuttal: Um - are you sure you caught every hole?
A: Yes I am!

Rebuttal: Um - Wow.

Removing all excuses (but one)

Q: Why don't you use taint?
A: I don't want to lose PERL5ENV.

Rebuttal: Use Taint::Runtime and you don't have to?
A: Oh

Removing all excuses (but one)

Q: Why don't you use taint?
A: I trust the user.

Rebuttal: Do you trust me?
A: Yes

Rebuttal: I've got some land in Alaska ...

Removing all excuses (but one)

Q: Why don't you use taint?
A: I trust the user's data

Rebuttal: Really?
A: No - just kidding - I don't.

Removing all excuses (but one)

Q: Why don't you use taint?
A: I use one liners

Rebuttal: For production code?
A: well ...

Taint::Runtime allows you to...

Taint::Runtime does not allow you to...

Taint::Runtime Usage

  ### sample "enable" usage

  #!/usr/bin/perl -w

  use Taint::Runtime qw(enable taint_env);

  # having the keyword enable in the import list starts taint

Taint::Runtime Usage

  ### sample $TAINT usage

  #!/usr/bin/perl -w
  use Taint::Runtime qw($TAINT taint_env);

    $TAINT = 1;

  # taint is now enabled

  if (1) {
    local $TAINT = 0;

    # do something we trust

  # back to an untrustwory area

Taint::Runtime Usage

  ### sample functional usage

  #!/usr/bin/perl -w
  use strict;
  use Taint::Runtime qw(taint_start is_tainted taint_env
                        taint untaint

  ### other operations here

  taint_start(); # taint should become active
  taint_env(); # %ENV was previously untainted

  print taint_enabled() ? "enabled\n" : "not enabled\n";

  my $var = taint("some string");

  print is_tainted($var) ? "tainted\n" : "not tainted\n";

  $var = untaint($var);
  # OR
  untaint \$var;

  print is_tainted($var) ? "tainted\n" : "not tainted\n";

Validating your data

Common goals of CPAN modules

Data::FormValidator (Setup)

  my $val_hash = {
    required => [qw(username

    dependencies => {
      password => [qw(password2)],

    constraints => {
      email    => qr/^[\w\.\-]+\@[\w\.\-]+$/,
      password => qr/^[ -~]{6,30}$/,
      username => qr/^\w+$/,

    untaint_all_constraints => 1,

    msgs => {
     format => '%s',
     prefix => 'error_',

Data::FormValidator (Results 1)

  use Data::FormValidator;

  my $form = {};
  my $results = Data::FormValidator->check($form, $val_hash);

  if ($results->has_invalid || $results->has_missing) {
    use Data::Dumper qw(Dumper);
    print Dumper $results->msgs;
    # prints
    # $VAR1 = {
    #   'error_password' => 'Missing',
    #   'error_username' => 'Missing',
    #   'error_email'    => 'Missing'
    # };

Data::FormValidator (Results 1.2)

  if ($results->has_invalid || $results->has_missing) {
    use Data::Dumper qw(Dumper);
    print Dumper [$results->invalid];
    print Dumper [$results->missing];
    # prints
    # $VAR1 = [];
    # $VAR1 = [
    #   'email',
    #   'password',
    #   'username'
    # ];

Data::FormValidator (Results 2)

  $form = {
    username  => "++foobar++",
    password  => "123",
    password2 => "1234",
  $results = Data::FormValidator->check($form, $val_hash);

  if ($results->has_invalid || $results->has_missing) {
    use Data::Dumper qw(Dumper);
    print Dumper $results->msgs;
    # prints
    # $VAR1 = {
    #   'error_password' => 'Invalid',
    #   'error_username' => 'Invalid',
    #   'error_email' => 'Missing'
    # };

CGI::Ex::Validate (Setup)

  my $val_hash = {
    username => {
      required => 1,
      match    => 'm/^\w+$/',
      untaint  => 1,
    password => {
      required => 1,
      match    => 'm/^[ -~]{6,30}$/',
      untaint  => 1,
    password2 => {
      validate_if => 'password',
      equals      => 'password',
    email => {
      required => 1,
      match    => 'm/^[\w\.\-]+\@[\w\.\-]+$/',
      untaint  => 1,

CGI::Ex::Validate (Results)

  use CGI::Ex::Validate;

  my $form   = {};

  my $errobj = CGI::Ex::Validate->validate($form, $val_hash);

  if ($errobj) {
    print $errobj->as_string."\n";
    # Prints
    # Please correct the following items:
    #   The field email is required.
    #   The field password is required.
    #   The field username is required.

CGI::Ex::Validate (Results 2)

  $form = {
    username  => "++foobar++",
    password  => "123",
    password2 => "1234",

  $val_hash->{'group order'} = [qw(username password password2 email)];

  $errobj = CGI::Ex::Validate->validate($form, $val_hash);

  if ($errobj) {
    print $errobj->as_string."\n";
    # Prints
    # Please correct the following items:
    #   The field username may only contain letters and numbers
    #   The field password was less than 6 characters.
    #   The field password2 did not equal the field password.
    #   The field email is required.

CGI::Ex::Validate (Results 2.2)

  if ($errobj) {
    use Data::Dumper qw(Dumper);

    print Dumper $errobj->as_array;
    # Prints
    # $VAR1 = [
    #   "Please correct the following items:",
    #   "  The field username may only contain letters and numbers",
    #   "  The field password was less than 6 characters.",
    #   "  The field password2 did not equal the field password.",
    #   "  The field email is required."
    # ];

CGI::Ex::Validate (Results 2.3)

  if ($errobj) {
    use Data::Dumper qw(Dumper);

    print Dumper $errobj->as_hash;
    # Prints
    # $VAR1 = {
    #   email_error     => "The field email is required.",
    #   password2_error =>
    #     "The field password2 did not equal the field password.",
    #   password_error  =>
    #     "The field password was less than 6 characters.",
    #   username_error  =>
    #     "The field username may only contain letters and numbers"
    # };

Homegrown (Setup)

sub check_form {
  my $form = shift;
  my $hash = {};
  if (! exists $form->{'username'}) {
    push @{ $hash->{'username_error'} }, 'Username required';
  } elsif ($form->{'username'} !~ m/^(\w+)$/) {
    push @{ $hash->{'username_error'} }, 'Username may only contain letters and numbers';
  } else {
    $form->{'username'} = $1;

Homegrown (Setup 2)

  if (! exists $form->{'password'}) {
    push @{ $hash->{'password_error'} }, 'Password required';
  } else {
    if ($form->{'password'} !~ m/^([ -~]+)$/) {
      push @{ $hash->{'password_error'} }, 'Password contained bad characters';
    } else {
      $form->{'password'} = $1;
    if (length($form->{'password'}) < 6) {
      push @{ $hash->{'password_error'} }, 'Password must be more than 6 characters';
    } elsif (length($form->{'password'}) > 30) {
      push @{ $hash->{'password_error'} }, 'Password must be less than 30 characters';

    if (! defined($form->{'password2'})
        || $form->{'password2'} ne $form->{'password'}) {
      push @{ $hash->{'password2_error'} }, 'Password2 and password must be the same';

Homegrown (Setup 3)

  if (! exists $form->{'email'}) {
    push @{ $hash->{'email_error'} }, 'Email required';
  } elsif ($form->{'email'} !~ m/^[\w\.\-]+\@[\w\.\-]+$/) {
    push @{ $hash->{'email_error'} }, 'Please type a valid email address';

  return $hash;

Homegrown (Results)

my $form = {};
my $hash = check_form($form);

if (scalar keys %$hash) {
  use Data::Dumper qw(Dumper);
  print Dumper $hash;
  # Prints
  # $VAR1 = {
  #   'email_error' => [
  #                      'Email required'
  #                    ],
  #   'password_error' => [
  #                         'Password required'
  #                       ],
  #   'username_error' => [
  #                         'Username required'
  #                       ]
  # };

Homegrown (Results 2)

if (scalar keys %$hash) {
  use Data::Dumper qw(Dumper);
  print Dumper $hash;
  # Prints
  # $VAR1 = {
  #   'email_error' => [
  #      'Email required'
  #                    ],
  #   'password_error' => [
  #      'Password must be more than 6 characters'
  #                       ],
  #   'password2_error' => [
  #      'Password2 and password must be the same'
  #                        ],
  #   'username_error' => [
  #      'Username may only contain letters and numbers'
  #                       ]
  # };

So which is better?

So which is better? (2)

So which is better? (3)

So which is better? (4)

When is a hash not a hash?

CGI::Ex::Validate's val_hash can be any of

Tying it all back together