summaryrefslogtreecommitdiff
path: root/beagle/debian-rfs/usr/share/perl5/Text/WrapI18N.pm
diff options
context:
space:
mode:
Diffstat (limited to 'beagle/debian-rfs/usr/share/perl5/Text/WrapI18N.pm')
-rw-r--r--beagle/debian-rfs/usr/share/perl5/Text/WrapI18N.pm239
1 files changed, 0 insertions, 239 deletions
diff --git a/beagle/debian-rfs/usr/share/perl5/Text/WrapI18N.pm b/beagle/debian-rfs/usr/share/perl5/Text/WrapI18N.pm
deleted file mode 100644
index 4d9d73c..0000000
--- a/beagle/debian-rfs/usr/share/perl5/Text/WrapI18N.pm
+++ /dev/null
@@ -1,239 +0,0 @@
-package Text::WrapI18N;
-
-require Exporter;
-use strict;
-use warnings;
-
-our @ISA = qw(Exporter);
-our @EXPORT = qw(wrap);
-our @EXPORT_OK = qw($columns $separator);
-our %EXPORT_TAGS = ('all' => [ @EXPORT, @EXPORT_OK ]);
-
-our $VERSION = '0.06';
-
-use vars qw($columns $break $tabstop $separator $huge $unexpand $charmap);
-use Text::CharWidth qw(mbswidth mblen);
-
-BEGIN {
- $columns = 76;
- # $break, $separator, $huge, and $unexpand are not supported yet.
- $break = '\s';
- $tabstop = 8;
- $separator = "\n";
- $huge = 'wrap';
- $unexpand = 1;
- undef $charmap;
-}
-
-sub wrap {
- my $top1=shift;
- my $top2=shift;
- my $text=shift;
-
- $text = $top1 . $text;
-
- # $out already-formatted text for output including current line
- # $len visible width of the current line without the current word
- # $word the current word which might be sent to the next line
- # $wlen visible width of the current word
- # $c the current character
- # $b whether to allow line-breaking after the current character
- # $cont_lf true when LF (line feed) characters appear continuously
- # $w visible width of the current character
-
- my $out = '';
- my $len = 0;
- my $word = '';
- my $wlen = 0;
- my $cont_lf = 0;
- my ($c, $w, $b);
- $text =~ s/\n+$/\n/;
- while(1) {
- if (length($text) == 0) {
- return $out . $word;
- }
- ($c, $text, $w, $b) = _extract($text);
- if ($c eq "\n") {
- $out .= $word . $separator;
- if (length($text) == 0) {return $out;}
- $len = 0;
- $text = $top2 . $text;
- $word = '' ; $wlen = 0;
- next;
- } elsif ($w == -1) {
- # all control characters other than LF are ignored
- next;
- }
-
- # when the current line have enough room
- # for the curren character
-
- if ($len + $wlen + $w <= $columns) {
- if ($c eq ' ' || $b) {
- $out .= $word . $c;
- $len += $wlen + $w;
- $word = ''; $wlen = 0;
- } else {
- $word .= $c; $wlen += $w;
- }
- next;
- }
-
- # when the current line overflows with the
- # current character
-
- if ($c eq ' ') {
- # the line ends by space
- $out .= $word . $separator;
- $len = 0;
- $text = $top2 . $text;
- $word = ''; $wlen = 0;
- } elsif ($wlen + $w <= $columns - length ($top2)) {
- # the current word is sent to next line
- $out .= $separator;
- $len = 0;
- $text = $top2 . $word . $c . $text;
- $word = ''; $wlen = 0;
- } else {
- # the current word is too long to fit a line
- $out .= $word . $separator;
- $len = 0;
- $text = $top2 . $c . $text;
- $word = ''; $wlen = 0;
- }
- }
-}
-
-
-# Extract one character from the beginning from the given string.
-# Supports multibyte encodings such as UTF-8, EUC-JP, EUC-KR,
-# GB2312, and Big5.
-#
-# return value: (character, rest string, width, line breakable)
-# character: a character. This may consist from multiple bytes.
-# rest string: given string without the extracted character.
-# width: number of columns which the character occupies on screen.
-# line breakable: true if the character allows line break after it.
-
-sub _extract {
- my $string=shift;
- my ($l, $c, $r, $w, $b, $u);
-
- if (length($string) == 0) {
- return ('', '', 0, 0);
- }
- $l = mblen($string);
- if ($l == 0 || $l == -1) {
- return ('?', substr($string,1), 1, 0);
- }
- $c = substr($string, 0, $l);
- $r = substr($string, $l);
- $w = mbswidth($c);
-
- if (!defined($charmap)) {
- $charmap = `/usr/bin/locale charmap`;
- }
-
- if ($charmap =~ /UTF.8/i) {
- # UTF-8
- if ($l == 3) {
- # U+0800 - U+FFFF
- $u = (ord(substr($c,0,1))&0x0f) * 0x1000
- + (ord(substr($c,1,1))&0x3f) * 0x40
- + (ord(substr($c,2,1))&0x3f);
- $b = _isCJ($u);
- } elsif ($l == 4) {
- # U+10000 - U+10FFFF
- $u = (ord(substr($c,0,1))&7) * 0x40000
- + (ord(substr($c,1,1))&0x3f) * 0x1000
- + (ord(substr($c,2,1))&0x3f) * 0x40
- + (ord(substr($c,3,1))&0x3f);
- $b = _isCJ($u);
- } else {
- $b = 0;
- }
- } elsif ($charmap =~ /(^EUC)|(^GB)|(^BIG)/i) {
- # East Asian legacy encodings
- # (EUC-JP, EUC-KR, GB2312, Big5, Big5HKSCS, and so on)
-
- if (ord(substr($c,0,1)) >= 0x80) {$b = 1;} else {$b = 0;}
- } else {
- $b = 0;
- }
- return ($c, $r, $w, $b);
-}
-
-# Returns 1 for Chinese and Japanese characters. This means that
-# these characters allow line wrapping after this character even
-# without whitespaces because these languages don't use whitespaces
-# between words.
-#
-# Character must be given in UCS-4 codepoint value.
-
-sub _isCJ {
- my $u=shift;
-
- if ($u >= 0x3000 && $u <= 0x312f) {
- if ($u == 0x300a || $u == 0x300c || $u == 0x300e ||
- $u == 0x3010 || $u == 0x3014 || $u == 0x3016 ||
- $u == 0x3018 || $u == 0x301a) {return 0;}
- return 1;
- } # CJK punctuations, Hiragana, Katakana, Bopomofo
- if ($u >= 0x31a0 && $u <= 0x31bf) {return 1;} # Bopomofo
- if ($u >= 0x31f0 && $u <= 0x31ff) {return 1;} # Katakana extension
- if ($u >= 0x3400 && $u <= 0x9fff) {return 1;} # Han Ideogram
- if ($u >= 0xf900 && $u <= 0xfaff) {return 1;} # Han Ideogram
- if ($u >= 0x20000 && $u <= 0x2ffff) {return 1;} # Han Ideogram
-
- return 0;
-}
-
-1;
-__END__
-
-=head1 NAME
-
-Text::WrapI18N - Line wrapping module with support for multibyte, fullwidth,
-and combining characters and languages without whitespaces between words
-
-=head1 SYNOPSIS
-
- use Text::WrapI18N qw(wrap $columns);
- wrap(firstheader, nextheader, texts);
-
-=head1 DESCRIPTION
-
-This module intends to be a better Text::Wrap module.
-This module is needed to support multibyte character encodings such
-as UTF-8, EUC-JP, EUC-KR, GB2312, and Big5. This module also supports
-characters with irregular widths, such as combining characters (which
-occupy zero columns on terminal, like diacritical marks in UTF-8) and
-fullwidth characters (which occupy two columns on terminal, like most
-of east Asian characters). Also, minimal handling of languages which
-doesn't use whitespaces between words (like Chinese and Japanese) is
-supported.
-
-Like Text::Wrap, hyphenation and "kinsoku" processing are not supported,
-to keep simplicity.
-
-I<wrap(firstheader, nextheader, texts)> is the main subroutine of
-Text::WrapI18N module to execute the line wrapping. Input parameters
-and output data emulate Text::Wrap. The texts have to be written in
-locale encoding.
-
-=head1 SEE ALSO
-
-locale(5), utf-8(7), charsets(7)
-
-=head1 AUTHOR
-
-Tomohiro KUBOTA, E<lt>kubota@debian.orgE<gt>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright 2003 by Tomohiro KUBOTA
-
-This library is free software; you can redistribute it and/or modify
-it under the same terms as Perl itself.
-
-=cut