Data Validation
Taint
CGI::Ex::Validate

Paul Seamons

Perl Projects

What is taint?

Top Reasons to use taint

Top reasons why not to use taint

Removing all excuses (but one)


Q: Why don't you use taint?
A: I don't right now - why start?

Rebuttal: Um - because there is no reason not too.
A: Yes there is ...

Removing all excuses (but one)


Q: Why don't you use taint?
A: I'm already careful with my data

Rebuttal: Um - are you sure you caught every hole?
A: Yes I am!

Rebuttal: Um - Wow.

Removing all excuses (but one)


Q: Why don't you use taint?
A: I don't want to lose PERL5ENV.

Rebuttal: Use Taint::Runtime and you don't have to?
A: Oh

Removing all excuses (but one)


Q: Why don't you use taint?
A: I trust the user.

Rebuttal: Do you trust me?
A: Yes

Rebuttal: I've got some land in Alaska ...

Removing all excuses (but one)


Q: Why don't you use taint?
A: I trust the user's data

Rebuttal: Really?
A: No - just kidding - I don't.

Removing all excuses (but one)


Q: Why don't you use taint?
A: I use one liners

Rebuttal: For production code?
A: well ...

Taint::Runtime allows you to...

Taint::Runtime does not allow you to...

Taint::Runtime Usage


  ### sample "enable" usage

  #!/usr/bin/perl -w

  use Taint::Runtime qw(enable taint_env);
  taint_env();

  # having the keyword enable in the import list starts taint

Taint::Runtime Usage


  ### sample $TAINT usage

  #!/usr/bin/perl -w
  use Taint::Runtime qw($TAINT taint_env);

  BEGIN {
    $TAINT = 1;
    taint_env();
  }

  # taint is now enabled

  if (1) {
    local $TAINT = 0;

    # do something we trust
  }

  # back to an untrustwory area

Taint::Runtime Usage


  ### sample functional usage

  #!/usr/bin/perl -w
  use strict;
  use Taint::Runtime qw(taint_start is_tainted taint_env
                        taint untaint
                        taint_enabled);

  ### other operations here

  taint_start(); # taint should become active
  taint_env(); # %ENV was previously untainted

  print taint_enabled() ? "enabled\n" : "not enabled\n";

  my $var = taint("some string");

  print is_tainted($var) ? "tainted\n" : "not tainted\n";

  $var = untaint($var);
  # OR
  untaint \$var;

  print is_tainted($var) ? "tainted\n" : "not tainted\n";

Validating your data

Common goals of CPAN modules

Data::FormValidator (Setup)



  my $val_hash = {
    required => [qw(username
                    password
                    email)],

    dependencies => {
      password => [qw(password2)],
    },

    constraints => {
      email    => qr/^[\w\.\-]+\@[\w\.\-]+$/,
      password => qr/^[ -~]{6,30}$/,
      username => qr/^\w+$/,
    },

    untaint_all_constraints => 1,

    msgs => {
     format => '%s',
     prefix => 'error_',
    },
  };

Data::FormValidator (Results 1)


  use Data::FormValidator;

  my $form = {};
  my $results = Data::FormValidator->check($form, $val_hash);

  if ($results->has_invalid || $results->has_missing) {
    use Data::Dumper qw(Dumper);
    print Dumper $results->msgs;
    # prints
    # $VAR1 = {
    #   'error_password' => 'Missing',
    #   'error_username' => 'Missing',
    #   'error_email'    => 'Missing'
    # };
  }

Data::FormValidator (Results 1.2)


  if ($results->has_invalid || $results->has_missing) {
    use Data::Dumper qw(Dumper);
    print Dumper [$results->invalid];
    print Dumper [$results->missing];
    # prints
    # $VAR1 = [];
    # $VAR1 = [
    #   'email',
    #   'password',
    #   'username'
    # ];
  }

Data::FormValidator (Results 2)


  $form = {
    username  => "++foobar++",
    password  => "123",
    password2 => "1234",
  };
  $results = Data::FormValidator->check($form, $val_hash);

  if ($results->has_invalid || $results->has_missing) {
    use Data::Dumper qw(Dumper);
    print Dumper $results->msgs;
    # prints
    # $VAR1 = {
    #   'error_password' => 'Invalid',
    #   'error_username' => 'Invalid',
    #   'error_email' => 'Missing'
    # };
  }

CGI::Ex::Validate (Setup)


  my $val_hash = {
    username => {
      required => 1,
      match    => 'm/^\w+$/',
      untaint  => 1,
    },
    password => {
      required => 1,
      match    => 'm/^[ -~]{6,30}$/',
      untaint  => 1,
    },
    password2 => {
      validate_if => 'password',
      equals      => 'password',
    },
    email => {
      required => 1,
      match    => 'm/^[\w\.\-]+\@[\w\.\-]+$/',
      untaint  => 1,
    },
  };

CGI::Ex::Validate (Results)


  use CGI::Ex::Validate;

  my $form   = {};

  my $errobj = CGI::Ex::Validate->validate($form, $val_hash);

  if ($errobj) {
    print $errobj->as_string."\n";
    # Prints
    # Please correct the following items:
    #   The field email is required.
    #   The field password is required.
    #   The field username is required.
  }

CGI::Ex::Validate (Results 2)


  $form = {
    username  => "++foobar++",
    password  => "123",
    password2 => "1234",
  };

  $val_hash->{'group order'} = [qw(username password password2 email)];

  $errobj = CGI::Ex::Validate->validate($form, $val_hash);

  if ($errobj) {
    print $errobj->as_string."\n";
    # Prints
    # Please correct the following items:
    #   The field username may only contain letters and numbers
    #   The field password was less than 6 characters.
    #   The field password2 did not equal the field password.
    #   The field email is required.
  }

CGI::Ex::Validate (Results 2.2)


  if ($errobj) {
    use Data::Dumper qw(Dumper);

    print Dumper $errobj->as_array;
    # Prints
    # $VAR1 = [
    #   "Please correct the following items:",
    #   "  The field username may only contain letters and numbers",
    #   "  The field password was less than 6 characters.",
    #   "  The field password2 did not equal the field password.",
    #   "  The field email is required."
    # ];
  }

CGI::Ex::Validate (Results 2.3)


  if ($errobj) {
    use Data::Dumper qw(Dumper);

    print Dumper $errobj->as_hash;
    # Prints
    # $VAR1 = {
    #   email_error     => "The field email is required.",
    #   password2_error =>
    #     "The field password2 did not equal the field password.",
    #   password_error  =>
    #     "The field password was less than 6 characters.",
    #   username_error  =>
    #     "The field username may only contain letters and numbers"
    # };
  }

Homegrown (Setup)


sub check_form {
  my $form = shift;
  my $hash = {};
  if (! exists $form->{'username'}) {
    push @{ $hash->{'username_error'} }, 'Username required';
  } elsif ($form->{'username'} !~ m/^(\w+)$/) {
    push @{ $hash->{'username_error'} }, 'Username may only contain letters and numbers';
  } else {
    $form->{'username'} = $1;
  }

Homegrown (Setup 2)


  if (! exists $form->{'password'}) {
    push @{ $hash->{'password_error'} }, 'Password required';
  } else {
    if ($form->{'password'} !~ m/^([ -~]+)$/) {
      push @{ $hash->{'password_error'} }, 'Password contained bad characters';
    } else {
      $form->{'password'} = $1;
    }
    if (length($form->{'password'}) < 6) {
      push @{ $hash->{'password_error'} }, 'Password must be more than 6 characters';
    } elsif (length($form->{'password'}) > 30) {
      push @{ $hash->{'password_error'} }, 'Password must be less than 30 characters';
    }

    if (! defined($form->{'password2'})
        || $form->{'password2'} ne $form->{'password'}) {
      push @{ $hash->{'password2_error'} }, 'Password2 and password must be the same';
    }
  }

Homegrown (Setup 3)


  if (! exists $form->{'email'}) {
    push @{ $hash->{'email_error'} }, 'Email required';
  } elsif ($form->{'email'} !~ m/^[\w\.\-]+\@[\w\.\-]+$/) {
    push @{ $hash->{'email_error'} }, 'Please type a valid email address';
  }

  return $hash;
}

Homegrown (Results)


my $form = {};
my $hash = check_form($form);

if (scalar keys %$hash) {
  use Data::Dumper qw(Dumper);
  print Dumper $hash;
  # Prints
  # $VAR1 = {
  #   'email_error' => [
  #                      'Email required'
  #                    ],
  #   'password_error' => [
  #                         'Password required'
  #                       ],
  #   'username_error' => [
  #                         'Username required'
  #                       ]
  # };
}

Homegrown (Results 2)


if (scalar keys %$hash) {
  use Data::Dumper qw(Dumper);
  print Dumper $hash;
  # Prints
  # $VAR1 = {
  #   'email_error' => [
  #      'Email required'
  #                    ],
  #   'password_error' => [
  #      'Password must be more than 6 characters'
  #                       ],
  #   'password2_error' => [
  #      'Password2 and password must be the same'
  #                        ],
  #   'username_error' => [
  #      'Username may only contain letters and numbers'
  #                       ]
  # };
}

So which is better?

So which is better? (2)

So which is better? (3)

So which is better? (4)

When is a hash not a hash?


CGI::Ex::Validate's val_hash can be any of

Tying it all back together