User:Xybot/FixRussianAddress

From OpenStreetMap Wiki
Jump to navigation Jump to search

This is the Ruleset FixRussianAddress of the Xybot script

sub begin {
  my $me = shift;
  Data::Primitive->setCreator("FixRussianAddress");
  Data::Primitive->setComment('Correction of russian addresses according to official adressing scheme');
}

sub process {
  my ($me, $obj) = @_;

  my $resultstr = "";
  my $work = $obj;
  my $clone = undef;

  foreach $k ("addr:housenumber", "addr:housenumber1", "addr:housenumber2") {
    if(defined($work->{"tags"}->{$k})) {
      my $v = $work->{"tags"}->{$k};
      $v =~ s/\s+//g;                                   # remove whitespace
      $v =~ s/,?стр\.?/с/;                              # replace стр with с
      $v =~ s/,?(корпус|корп\.?|к\.?)/к/;               # replace корпус with с
      $v =~ s/вл\.?/в/;                                 # replace вл with в
      $v =~ s/A/А/g;                                    # transliterate latin to cyrillic
      $v =~ s/B/В/g;
      $v =~ s/E/Е/g;
      $v =~ s/K/К/g;
      $v =~ s/M/М/g;
      $v =~ s/H/Н/g;
      $v =~ s/O/О/g;
      $v =~ s/P/Р/g;
      $v =~ s/C/С/g;
      $v =~ s/T/Т/g;
      $v =~ s/X/Х/g;
      $v =~ s/a/а/g;
      $v =~ s/e/е/g;
      $v =~ s/o/о/g;
      $v =~ s/p/р/g;
      $v =~ s/c/с/g;
      $v =~ s/x/х/g;

      if($v =~ /^(\d+)((а|б|г|д|е|ж|н|р|с){1,2})(.*)$/) {
        my $suffix1 = $2;
        $suffix1 =~ tr/[а,б,г,д,е,ж,н,р,с]/[А,Б,Г,Д,Е,Ж,Н,Р,С]/;        # transliterate lowercase character after first number to uppercase
        $v = $1.$suffix1.$4;
      }

      if($v =~ /^(.+? \d+)((А|Б|В|Г|Д|Е|Ж){1,2})$/) {
        my $suffix2 = $2;
        $suffix2 =~ tr/[А,Б,В,Г,Д,Е,Ж]/[а,б,в,г,д,е,ж]/;  # transliterate uppercase character after last number to lowercase
        $v = $1.$suffix2;
      }

      $v =~ s/(с|С)(\d+\w{0,2})/ с\2/;                  # replace с|С followed by a number with ' с'
      $v =~ s/(к|K)(\d+\w{0,2})/ к\2/;                  # replace к|K followed by a number with ' к'
      $v =~ s/(в|В)(\d+\w{0,2})/ в\2/;                  # replace в|В followed by a number with ' в'
      $v =~ s/^\s*(.+?)\s*$/\1/;                        # strip whitespace from ends
      $v =~ s/\s+/ /g;                                  # replace multiple whitespace with ' '

      if ($v ne $work->{"tags"}->{$k}) {
        if ($v =~ /^(\d+(\/\d+)?)((А|Б|В|Г|Д|Е|Ж|Н|Р|С){0,2})((( (к|с|в)\d+){1,2})((а|б|в|г|д|е|ж){0,2}))?$/) {
          $resultstr .= sprintf " - modifying '$k' from '%s' to '%s'", $work->{"tags"}->{$k}, $v;
          $clone=$work->clone() unless defined($clone);
          $clone->{"tags"}->{$k} = $v;
          $clone->{"tags"}->{"addr:country"} = "RU" if(!defined($work->{"tags"}->{"addr:country"}));
        } else {
          $resultstr .= sprintf " - key '$k' has illegal value '%s' NOT changing to '%s'", $work->{"tags"}->{$k}, $v;
        }
      }
    }
  }
  $obj->log(ref($obj), $work->{"id"}, $resultstr, "") if (!defined($clone) && $resultstr ne "");
  $obj->log(ref($obj), $work->{"id"}, $resultstr, $obj->update($clone, $me)) if (defined($clone));
}