NAME

  UTF8::R2 - makes UTF-8 scripting easy for enterprise use or LTS

SYNOPSIS

  use UTF8::R2;
  use UTF8::R2 ver.sion;            # match or die
  use UTF8::R2 qw( RFC3629 );       # m/./ matches RFC3629 codepoint (default)
  use UTF8::R2 qw( RFC2279 );       # m/./ matches RFC2279 codepoint
  use UTF8::R2 qw( WTF8 );          # m/./ matches WTF-8 codepoint
  use UTF8::R2 qw( RFC3629.ja_JP ); # optimized RFC3629 for ja_JP
  use UTF8::R2 qw( WTF8.ja_JP );    # optimized WTF-8 for ja_JP
  use UTF8::R2 qw( %mb );           # multibyte regex by %mb

DESCRIPTION

  UTF8::R2 module provides minimal UTF-8 subroutines for stable scripting
  environment, using no utf8 pragma, no UTF-8 flag.

    # on use UTF8::R2 qw( RFC2279 );
    # m/./ means
    # beautiful concept in young days
    # https://www.ietf.org/rfc/rfc2279.txt
    'RFC2279' => qr{(?>@{[join('', qw(
        [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
        [\xC2-\xDF][\x80-\xBF]                       |
        [\xE0-\xEF][\x80-\xBF][\x80-\xBF]            |
        [\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
        [\x00-\xFF]
    ))]})}x,

    # on use UTF8::R2;
    # or use UTF8::R2 qw( RFC3629 );
    # m/./ means
    # https://tools.ietf.org/rfc/rfc3629.txt
    'RFC3629' => qr{(?>@{[join('', qw(
        [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
        [\xC2-\xDF][\x80-\xBF]                       |
        [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
        [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            |
        [\xED-\xED][\x80-\x9F][\x80-\xBF]            |
        [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            |
        [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
        [\x00-\xFF]
    ))]})}x,

    # or use UTF8::R2 qw( WTF8 );
    # m/./ means
    # http://simonsapin.github.io/wtf-8/
    'WTF8' => qr{(?>@{[join('', qw(
        [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
        [\xC2-\xDF][\x80-\xBF]                       |
        [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
        [\xE1-\xEF][\x80-\xBF][\x80-\xBF]            |
        [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
        [\x00-\xFF]
    ))]})}x,

    # or use UTF8::R2 qw( RFC3629.ja_JP );
    # m/./ means
    # optimized RFC3629 for ja_JP
    'RFC3629.ja_JP' => qr{(?>@{[join('', qw(
        [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
        [\xE1-\xEC][\x80-\xBF][\x80-\xBF]            |
        [\xC2-\xDF][\x80-\xBF]                       |
        [\xEE-\xEF][\x80-\xBF][\x80-\xBF]            |
        [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
        [\xED-\xED][\x80-\x9F][\x80-\xBF]            |
        [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
        [\x00-\xFF]
    ))]})}x,

    # or use UTF8::R2 qw( WTF8.ja_JP );
    # m/./ means
    # optimized WTF-8 for ja_JP
    'WTF8.ja_JP' => qr{(?>@{[join('', qw(
        [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF]       |
        [\xE1-\xEF][\x80-\xBF][\x80-\xBF]            |
        [\xC2-\xDF][\x80-\xBF]                       |
        [\xE0-\xE0][\xA0-\xBF][\x80-\xBF]            |
        [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
        [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
        [\x00-\xFF]
    ))]})}x,

SUBROUTINES

  VERY USEFUL UTF-8 CODEPOINT FEATURE
    UTF8::R2::qr(qr/ utf8_regex_here . \D \H \N \R \S \V \W \b \d \h \s \v \w \x{Unicode} [ \D \H \S \V \W \b \d \h \s \v \w \x{Unicode} [:POSIX:] [:^POSIX:] ] ? + * {n} {n,} {n,m} /imsxogc)
    UTF8::R2::split(qr/$utf8regex/imsxo, $_, 3)
    UTF8::R2::tr($_, 'ABC', 'XYZ', 'cdsr')
    use UTF8::R2 qw(%mb);
      $_ =~ $mb{qr/$utf8regex/imsxo}
      $_ =~ m<\G$mb{qr/$utf8regex/imsxo}>gc
      $_ =~ s<$mb{qr/before/imsxo}><after>egr

  OTHER UTF-8 CODEPOINT FEATURE
    UTF8::R2::chop(@_)
    UTF8::R2::chr($_)
    UTF8::R2::getc(FILEHANDLE)
    UTF8::R2::index($_, 'ABC', 5)
    UTF8::R2::lc($_)
    UTF8::R2::lcfirst($_)
    UTF8::R2::length($_)
    UTF8::R2::ord($_)
    UTF8::R2::reverse(@_)
    UTF8::R2::rindex($_, 'ABC', 5)
    UTF8::R2::substr($_, 0, 5)
    UTF8::R2::uc($_)
    UTF8::R2::ucfirst($_)

SUPPORTED PERL VERSIONS

  perl version 5.005_03 to newest perl

SEE ALSO

  http://search.cpan.org/~ina/
  http://backpan.perl.org/authors/id/I/IN/INA/