X-Git-Url: http://git.shiar.nl/sheet.git/blobdiff_plain/3e2e19818a6102eff505984ca2430ae3ac901d08..0c9d5cd332b3a9124f75926de5094c90b353e238:/charset.plp

diff --git a/charset.plp b/charset.plp
index 4f60765..7f19bc1 100644
--- a/charset.plp
+++ b/charset.plp
@@ -143,50 +143,12 @@ sub tabinput {
 		}
 	}
 
-	if ($input =~ m{ \A (?:wing|web)dings \d* \z }ix) {
-		eval "require Encode::\u$input";
+	if ($charset->{setup}) {
+		eval { $charset->{setup}->(\%row) }
+			or Alert("Incomplete setup of $input", $@);
 	}
 
-	if ($input eq '') {
-		$row{offset} = delete $row{startpoint};
-		$row{set} = 'Unicode characters';
-		my $block = $row{offset} >> 8;
-		$row{endpoint} ||= ($block + 1 << 8) - 1;
-		$block == ($row{endpoint} >> 8) or undef $block;
-
-		$row{table} = join '', map { chr } $row{offset} .. $row{endpoint};
-		utf8::upgrade($row{table});  # prevent latin1 output
-
-		$row{endpoint} -= $row{offset};
-
-		if (defined $block) {
-			$row{set} = sprintf 'Unicode block U+%02Xxx', $block;
-			$row{offset} %= 0x100;
-		}
-	}
-	elsif (lc $input eq 'uu') {
-		$row{cell} = do 'charset-ucplanes.inc.pl'
-			or Alert('Table data could not be read', $@ || $!);
-		$row{endpoint} ||= 0x3FF;
-		$row{set} = 'Unicode planes';
-	}
-	elsif (lc $input eq 'u') {
-		$row{cell} = do 'charset-unicode.inc.pl'
-			or Alert('Table data could not be read', $@ || $!);
-
-		$row{endpoint} ||= 0x1FFF;
-		$row{set} = 'Unicode ' . (
-			$row{startpoint} <  0x1000 && $row{endpoint} < 0x1000 ? 'BMP' :
-			$row{startpoint} >= 0x1000 && $row{endpoint} < 0x2000 ? 'SMP' :
-			'allocations'
-		);
-	}
-	elsif ($input =~ m/^utf-*8$/i) {
-		$row{set} = 'UTF-8';
-		$row{cell} = do 'charset-utf8.inc.pl'
-			or Alert('Table data could not be read', $@ || $!);
-		$row{endpoint} = 0xFF;
-	}
+	if ($row{set}) {}
 	elsif ($row{set} = Encode::resolve_alias($input)) {
 		$row{offset} = delete $row{startpoint};
 		$row{endpoint} ||= 0xFF;
@@ -202,33 +164,6 @@ sub tabinput {
 			$row{table} = Encode::decode($row{set}, pack 'C*', $row{offset} .. $row{endpoint});
 		}
 
-		if ($row{set} eq 'cp437') {
-			if ($row{offset} <= 0xED and $row{endpoint} >= 0xED) {
-				# replace phi glyph
-				substr($row{table}, 0xED - $row{offset}, 1) = 'Ï';
-			}
-			if ($row{offset} < 0x20) {
-				# replace control characters by visible variants
-				my $sub = substr 'ââºâ»â¥â¦â£â â¢ââââââªâ«â¼âºâââ¼Â¶Â§â¬â¨âââââââ²â¼', $row{offset};
-				substr($row{table}, 0, length $sub) = $sub;
-			}
-		}
-		elsif ($row{set} eq 'symbol') {
-			if ($row{offset} <= 0x60 and $row{endpoint} >= 0x60) {
-				# replace radical extender by closest unicode equivalent
-				substr($row{table}, 0x60 - $row{offset},  1) = 'â';
-			}
-			if ($row{offset} <= 0xBD and $row{endpoint} >= 0xFF) {
-				substr($row{table}, 0xBD - $row{offset},  2) = 'ââ¯'; # arrow extenders
-				substr($row{table}, 0xD2 - $row{offset},  3) = 'Â®Â©â¢'; # serif variants
-				substr($row{table}, 0xE0 - $row{offset},  1) = 'â'; # replace lookalike, should match AdobeSymbol
-				substr($row{table}, 0xE2 - $row{offset},  3) = 'Â®Â©â¢'; # sans-serif variants
-				substr($row{table}, 0xE6 - $row{offset}, 10) = 'ââââ¡â¢â£â§â¨â©âª';
-				substr($row{table}, 0xF0 - $row{offset},  1) = 'â¬';
-				substr($row{table}, 0xF4 - $row{offset}, 11) = 'â®â¡âââ â¤â¥â¦â«â¬â­';
-			}
-		}
-
 		$row{endpoint} -= $row{offset};
 
 		$visible->{ascii} =  # assume common base
@@ -238,6 +173,32 @@ sub tabinput {
 		Alert("Encoding <q>$input</q> unknown");
 		return;
 	}
+
+	if (my $replace = $charset->{replace}) {
+		while (my ($offset, $sub) = each %{$replace}) {
+			$offset -= $row{offset};
+
+			my $length = length $sub;
+
+			if ($offset < 0) {
+				$offset > -$length or next; # at least one character after start
+				# trim leftmost part to start at offset
+				substr($sub, 0, -$offset) = '';
+				$length += $offset;
+				$offset = 0;
+			}
+
+			if ((my $excess = $row{endpoint} - $offset - $length + 1) < 0) {
+				$excess > -$length or next;
+				# trim rightmost part to prevent overflow
+				substr($sub, $excess) = '';
+				$length += $excess;
+			}
+
+			substr($row{table}, $offset, $length) = $sub;
+		}
+	}
+
 	push @request, \%row;
 }
 tabinput($_) for @tablist;