Source for java.lang.Character

   1: /* java.lang.Character -- Wrapper class for char, and Unicode subsets
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: /*
  39:  * Note: This class must not be merged with Classpath.  Gcj uses C-style
  40:  * arrays (see include/java-chartables.h) to store the Unicode character
  41:  * database, whereas Classpath uses Java objects (char[] extracted from
  42:  * String constants) in gnu.java.lang.CharData.  Gcj's approach is more
  43:  * efficient, because there is no vtable or data relocation to worry about.
  44:  * However, despite the difference in the database interface, the two
  45:  * versions share identical algorithms.
  46:  */
  47: 
  48: package java.lang;
  49: 
  50: import java.io.Serializable;
  51: import java.text.Collator;
  52: import java.util.Locale;
  53: 
  54: /**
  55:  * Wrapper class for the primitive char data type.  In addition, this class
  56:  * allows one to retrieve property information and perform transformations
  57:  * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
  58:  * java.lang.Character is designed to be very dynamic, and as such, it
  59:  * retrieves information on the Unicode character set from a separate
  60:  * database, gnu.java.lang.CharData, which can be easily upgraded.
  61:  *
  62:  * <p>For predicates, boundaries are used to describe
  63:  * the set of characters for which the method will return true.
  64:  * This syntax uses fairly normal regular expression notation.
  65:  * See 5.13 of the Unicode Standard, Version 3.0, for the
  66:  * boundary specification.
  67:  *
  68:  * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
  69:  * for more information on the Unicode Standard.
  70:  *
  71:  * @author Tom Tromey (tromey@cygnus.com)
  72:  * @author Paul N. Fisher
  73:  * @author Jochen Hoenicke
  74:  * @author Eric Blake (ebb9@email.byu.edu)
  75:  * @since 1.0
  76:  * @status updated to 1.4
  77:  */
  78: public final class Character implements Serializable, Comparable
  79: {
  80:   /**
  81:    * A subset of Unicode blocks.
  82:    *
  83:    * @author Paul N. Fisher
  84:    * @author Eric Blake (ebb9@email.byu.edu)
  85:    * @since 1.2
  86:    */
  87:   public static class Subset
  88:   {
  89:     /** The name of the subset. */
  90:     private final String name;
  91: 
  92:     /**
  93:      * Construct a new subset of characters.
  94:      *
  95:      * @param name the name of the subset
  96:      * @throws NullPointerException if name is null
  97:      */
  98:     protected Subset(String name)
  99:     {
 100:       // Note that name.toString() is name, unless name was null.
 101:       this.name = name.toString();
 102:     }
 103: 
 104:     /**
 105:      * Compares two Subsets for equality. This is <code>final</code>, and
 106:      * restricts the comparison on the <code>==</code> operator, so it returns
 107:      * true only for the same object.
 108:      *
 109:      * @param o the object to compare
 110:      * @return true if o is this
 111:      */
 112:     public final boolean equals(Object o)
 113:     {
 114:       return o == this;
 115:     }
 116: 
 117:     /**
 118:      * Makes the original hashCode of Object final, to be consistent with
 119:      * equals.
 120:      *
 121:      * @return the hash code for this object
 122:      */
 123:     public final int hashCode()
 124:     {
 125:       return super.hashCode();
 126:     }
 127: 
 128:     /**
 129:      * Returns the name of the subset.
 130:      *
 131:      * @return the name
 132:      */
 133:     public final String toString()
 134:     {
 135:       return name;
 136:     }
 137:   } // class Subset
 138: 
 139:   /**
 140:    * A family of character subsets in the Unicode specification. A character
 141:    * is in at most one of these blocks.
 142:    *
 143:    * This inner class was generated automatically from
 144:    * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts.
 145:    * This Unicode definition file can be found on the
 146:    * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 147:    * JDK 1.4 uses Unicode version 3.0.0.
 148:    *
 149:    * @author scripts/unicode-blocks.pl (written by Eric Blake)
 150:    * @since 1.2
 151:    */
 152:   public static final class UnicodeBlock extends Subset
 153:   {
 154:     /** The start of the subset. */
 155:     private final int start;
 156: 
 157:     /** The end of the subset. */
 158:     private final int end;
 159: 
 160:     /** The canonical name of the block according to the Unicode standard. */
 161:     private final String canonicalName;
 162: 
 163:     /** Constants for the <code>forName()</code> method */
 164:     private static final int CANONICAL_NAME = 0;
 165:     private static final int NO_SPACES_NAME = 1;
 166:     private static final int CONSTANT_NAME = 2;
 167: 
 168:     /**
 169:      * Constructor for strictly defined blocks.
 170:      *
 171:      * @param start the start character of the range
 172:      * @param end the end character of the range
 173:      * @param name the block name
 174:      */
 175:     private UnicodeBlock(int start, int end, String name,
 176:              String canonicalName)
 177:     {
 178:       super(name);
 179:       this.start = start;
 180:       this.end = end;
 181:       this.canonicalName = canonicalName;
 182:     }
 183: 
 184:     /**
 185:      * Returns the Unicode character block which a character belongs to.
 186:      * <strong>Note</strong>: This method does not support the use of
 187:      * supplementary characters.  For such support, <code>of(int)</code>
 188:      * should be used instead.
 189:      *
 190:      * @param ch the character to look up
 191:      * @return the set it belongs to, or null if it is not in one
 192:      */
 193:     public static UnicodeBlock of(char ch)
 194:     {
 195:       return of((int) ch);
 196:     }
 197: 
 198:     /**
 199:      * Returns the Unicode character block which a code point belongs to.
 200:      *
 201:      * @param codePoint the character to look up
 202:      * @return the set it belongs to, or null if it is not in one.
 203:      * @throws IllegalArgumentException if the specified code point is
 204:      *         invalid.
 205:      * @since 1.5
 206:      */
 207:     public static UnicodeBlock of(int codePoint)
 208:     {
 209:       if (codePoint > MAX_CODE_POINT)
 210:     throw new IllegalArgumentException("The supplied integer value is " +
 211:                        "too large to be a codepoint.");
 212:       // Simple binary search for the correct block.
 213:       int low = 0;
 214:       int hi = sets.length - 1;
 215:       while (low <= hi)
 216:         {
 217:           int mid = (low + hi) >> 1;
 218:           UnicodeBlock b = sets[mid];
 219:           if (codePoint < b.start)
 220:             hi = mid - 1;
 221:           else if (codePoint > b.end)
 222:             low = mid + 1;
 223:           else
 224:             return b;
 225:         }
 226:       return null;
 227:     }
 228: 
 229:     /**
 230:      * <p>
 231:      * Returns the <code>UnicodeBlock</code> with the given name, as defined
 232:      * by the Unicode standard.  The version of Unicode in use is defined by
 233:      * the <code>Character</code> class, and the names are given in the
 234:      * <code>Blocks-<version>.txt</code> file corresponding to that version.
 235:      * The name may be specified in one of three ways:
 236:      * </p>
 237:      * <ol>
 238:      * <li>The canonical, human-readable name used by the Unicode standard.
 239:      * This is the name with all spaces and hyphens retained.  For example,
 240:      * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li>
 241:      * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li>
 242:      * <li>The name used for the constants specified by this class, which
 243:      * is the canonical name with all spaces and hyphens replaced with
 244:      * underscores e.g. `BASIC_LATIN'</li>
 245:      * </ol>
 246:      * <p>
 247:      * The names are compared case-insensitively using the case comparison
 248:      * associated with the U.S. English locale.  The method recognises the
 249:      * previous names used for blocks as well as the current ones.  At
 250:      * present, this simply means that the deprecated `SURROGATES_AREA'
 251:      * will be recognised by this method (the <code>of()</code> methods
 252:      * only return one of the three new surrogate blocks).
 253:      * </p>
 254:      *
 255:      * @param blockName the name of the block to look up.
 256:      * @return the specified block.
 257:      * @throws NullPointerException if the <code>blockName</code> is
 258:      *         <code>null</code>.
 259:      * @throws IllegalArgumentException if the name does not match any Unicode
 260:      *         block.
 261:      * @since 1.5
 262:      */
 263:     public static final UnicodeBlock forName(String blockName)
 264:     {
 265:       int type;
 266:       if (blockName.indexOf(' ') != -1)
 267:         type = CANONICAL_NAME;
 268:       else if (blockName.indexOf('_') != -1)
 269:         type = CONSTANT_NAME;
 270:       else
 271:         type = NO_SPACES_NAME;
 272:       Collator usCollator = Collator.getInstance(Locale.US);
 273:       usCollator.setStrength(Collator.PRIMARY);
 274:       /* Special case for deprecated blocks not in sets */
 275:       switch (type)
 276:       {
 277:         case CANONICAL_NAME:
 278:           if (usCollator.compare(blockName, "Surrogates Area") == 0)
 279:             return SURROGATES_AREA;
 280:           break;
 281:         case NO_SPACES_NAME:
 282:           if (usCollator.compare(blockName, "SurrogatesArea") == 0)
 283:             return SURROGATES_AREA;
 284:           break;
 285:         case CONSTANT_NAME:
 286:           if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
 287:             return SURROGATES_AREA;
 288:           break;
 289:       }
 290:       /* Other cases */
 291:       int setLength = sets.length;
 292:       switch (type)
 293:       {
 294:         case CANONICAL_NAME:
 295:           for (int i = 0; i < setLength; i++)
 296:             {
 297:               UnicodeBlock block = sets[i];
 298:               if (usCollator.compare(blockName, block.canonicalName) == 0)
 299:                 return block;
 300:             }
 301:           break;
 302:         case NO_SPACES_NAME:
 303:           for (int i = 0; i < setLength; i++)
 304:             {
 305:               UnicodeBlock block = sets[i];
 306:               String nsName = block.canonicalName.replaceAll(" ","");
 307:               if (usCollator.compare(blockName, nsName) == 0)
 308:                 return block;
 309:             }        
 310:           break;
 311:         case CONSTANT_NAME:
 312:           for (int i = 0; i < setLength; i++)
 313:             {
 314:               UnicodeBlock block = sets[i];
 315:               if (usCollator.compare(blockName, block.toString()) == 0)
 316:                 return block;
 317:             }
 318:           break;
 319:       }
 320:       throw new IllegalArgumentException("No Unicode block found for " +
 321:                                          blockName + ".");
 322:     }
 323: 
 324:     /**
 325:      * Basic Latin.
 326:      * 0x0000 - 0x007F.
 327:      */
 328:     public static final UnicodeBlock BASIC_LATIN
 329:       = new UnicodeBlock(0x0000, 0x007F,
 330:                          "BASIC_LATIN", 
 331:                          "Basic Latin");
 332: 
 333:     /**
 334:      * Latin-1 Supplement.
 335:      * 0x0080 - 0x00FF.
 336:      */
 337:     public static final UnicodeBlock LATIN_1_SUPPLEMENT
 338:       = new UnicodeBlock(0x0080, 0x00FF,
 339:                          "LATIN_1_SUPPLEMENT", 
 340:                          "Latin-1 Supplement");
 341: 
 342:     /**
 343:      * Latin Extended-A.
 344:      * 0x0100 - 0x017F.
 345:      */
 346:     public static final UnicodeBlock LATIN_EXTENDED_A
 347:       = new UnicodeBlock(0x0100, 0x017F,
 348:                          "LATIN_EXTENDED_A", 
 349:                          "Latin Extended-A");
 350: 
 351:     /**
 352:      * Latin Extended-B.
 353:      * 0x0180 - 0x024F.
 354:      */
 355:     public static final UnicodeBlock LATIN_EXTENDED_B
 356:       = new UnicodeBlock(0x0180, 0x024F,
 357:                          "LATIN_EXTENDED_B", 
 358:                          "Latin Extended-B");
 359: 
 360:     /**
 361:      * IPA Extensions.
 362:      * 0x0250 - 0x02AF.
 363:      */
 364:     public static final UnicodeBlock IPA_EXTENSIONS
 365:       = new UnicodeBlock(0x0250, 0x02AF,
 366:                          "IPA_EXTENSIONS", 
 367:                          "IPA Extensions");
 368: 
 369:     /**
 370:      * Spacing Modifier Letters.
 371:      * 0x02B0 - 0x02FF.
 372:      */
 373:     public static final UnicodeBlock SPACING_MODIFIER_LETTERS
 374:       = new UnicodeBlock(0x02B0, 0x02FF,
 375:                          "SPACING_MODIFIER_LETTERS", 
 376:                          "Spacing Modifier Letters");
 377: 
 378:     /**
 379:      * Combining Diacritical Marks.
 380:      * 0x0300 - 0x036F.
 381:      */
 382:     public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
 383:       = new UnicodeBlock(0x0300, 0x036F,
 384:                          "COMBINING_DIACRITICAL_MARKS", 
 385:                          "Combining Diacritical Marks");
 386: 
 387:     /**
 388:      * Greek.
 389:      * 0x0370 - 0x03FF.
 390:      */
 391:     public static final UnicodeBlock GREEK
 392:       = new UnicodeBlock(0x0370, 0x03FF,
 393:                          "GREEK", 
 394:                          "Greek");
 395: 
 396:     /**
 397:      * Cyrillic.
 398:      * 0x0400 - 0x04FF.
 399:      */
 400:     public static final UnicodeBlock CYRILLIC
 401:       = new UnicodeBlock(0x0400, 0x04FF,
 402:                          "CYRILLIC", 
 403:                          "Cyrillic");
 404: 
 405:     /**
 406:      * Cyrillic Supplementary.
 407:      * 0x0500 - 0x052F.
 408:      * @since 1.5
 409:      */
 410:     public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
 411:       = new UnicodeBlock(0x0500, 0x052F,
 412:                          "CYRILLIC_SUPPLEMENTARY", 
 413:                          "Cyrillic Supplementary");
 414: 
 415:     /**
 416:      * Armenian.
 417:      * 0x0530 - 0x058F.
 418:      */
 419:     public static final UnicodeBlock ARMENIAN
 420:       = new UnicodeBlock(0x0530, 0x058F,
 421:                          "ARMENIAN", 
 422:                          "Armenian");
 423: 
 424:     /**
 425:      * Hebrew.
 426:      * 0x0590 - 0x05FF.
 427:      */
 428:     public static final UnicodeBlock HEBREW
 429:       = new UnicodeBlock(0x0590, 0x05FF,
 430:                          "HEBREW", 
 431:                          "Hebrew");
 432: 
 433:     /**
 434:      * Arabic.
 435:      * 0x0600 - 0x06FF.
 436:      */
 437:     public static final UnicodeBlock ARABIC
 438:       = new UnicodeBlock(0x0600, 0x06FF,
 439:                          "ARABIC", 
 440:                          "Arabic");
 441: 
 442:     /**
 443:      * Syriac.
 444:      * 0x0700 - 0x074F.
 445:      * @since 1.4
 446:      */
 447:     public static final UnicodeBlock SYRIAC
 448:       = new UnicodeBlock(0x0700, 0x074F,
 449:                          "SYRIAC", 
 450:                          "Syriac");
 451: 
 452:     /**
 453:      * Thaana.
 454:      * 0x0780 - 0x07BF.
 455:      * @since 1.4
 456:      */
 457:     public static final UnicodeBlock THAANA
 458:       = new UnicodeBlock(0x0780, 0x07BF,
 459:                          "THAANA", 
 460:                          "Thaana");
 461: 
 462:     /**
 463:      * Devanagari.
 464:      * 0x0900 - 0x097F.
 465:      */
 466:     public static final UnicodeBlock DEVANAGARI
 467:       = new UnicodeBlock(0x0900, 0x097F,
 468:                          "DEVANAGARI", 
 469:                          "Devanagari");
 470: 
 471:     /**
 472:      * Bengali.
 473:      * 0x0980 - 0x09FF.
 474:      */
 475:     public static final UnicodeBlock BENGALI
 476:       = new UnicodeBlock(0x0980, 0x09FF,
 477:                          "BENGALI", 
 478:                          "Bengali");
 479: 
 480:     /**
 481:      * Gurmukhi.
 482:      * 0x0A00 - 0x0A7F.
 483:      */
 484:     public static final UnicodeBlock GURMUKHI
 485:       = new UnicodeBlock(0x0A00, 0x0A7F,
 486:                          "GURMUKHI", 
 487:                          "Gurmukhi");
 488: 
 489:     /**
 490:      * Gujarati.
 491:      * 0x0A80 - 0x0AFF.
 492:      */
 493:     public static final UnicodeBlock GUJARATI
 494:       = new UnicodeBlock(0x0A80, 0x0AFF,
 495:                          "GUJARATI", 
 496:                          "Gujarati");
 497: 
 498:     /**
 499:      * Oriya.
 500:      * 0x0B00 - 0x0B7F.
 501:      */
 502:     public static final UnicodeBlock ORIYA
 503:       = new UnicodeBlock(0x0B00, 0x0B7F,
 504:                          "ORIYA", 
 505:                          "Oriya");
 506: 
 507:     /**
 508:      * Tamil.
 509:      * 0x0B80 - 0x0BFF.
 510:      */
 511:     public static final UnicodeBlock TAMIL
 512:       = new UnicodeBlock(0x0B80, 0x0BFF,
 513:                          "TAMIL", 
 514:                          "Tamil");
 515: 
 516:     /**
 517:      * Telugu.
 518:      * 0x0C00 - 0x0C7F.
 519:      */
 520:     public static final UnicodeBlock TELUGU
 521:       = new UnicodeBlock(0x0C00, 0x0C7F,
 522:                          "TELUGU", 
 523:                          "Telugu");
 524: 
 525:     /**
 526:      * Kannada.
 527:      * 0x0C80 - 0x0CFF.
 528:      */
 529:     public static final UnicodeBlock KANNADA
 530:       = new UnicodeBlock(0x0C80, 0x0CFF,
 531:                          "KANNADA", 
 532:                          "Kannada");
 533: 
 534:     /**
 535:      * Malayalam.
 536:      * 0x0D00 - 0x0D7F.
 537:      */
 538:     public static final UnicodeBlock MALAYALAM
 539:       = new UnicodeBlock(0x0D00, 0x0D7F,
 540:                          "MALAYALAM", 
 541:                          "Malayalam");
 542: 
 543:     /**
 544:      * Sinhala.
 545:      * 0x0D80 - 0x0DFF.
 546:      * @since 1.4
 547:      */
 548:     public static final UnicodeBlock SINHALA
 549:       = new UnicodeBlock(0x0D80, 0x0DFF,
 550:                          "SINHALA", 
 551:                          "Sinhala");
 552: 
 553:     /**
 554:      * Thai.
 555:      * 0x0E00 - 0x0E7F.
 556:      */
 557:     public static final UnicodeBlock THAI
 558:       = new UnicodeBlock(0x0E00, 0x0E7F,
 559:                          "THAI", 
 560:                          "Thai");
 561: 
 562:     /**
 563:      * Lao.
 564:      * 0x0E80 - 0x0EFF.
 565:      */
 566:     public static final UnicodeBlock LAO
 567:       = new UnicodeBlock(0x0E80, 0x0EFF,
 568:                          "LAO", 
 569:                          "Lao");
 570: 
 571:     /**
 572:      * Tibetan.
 573:      * 0x0F00 - 0x0FFF.
 574:      */
 575:     public static final UnicodeBlock TIBETAN
 576:       = new UnicodeBlock(0x0F00, 0x0FFF,
 577:                          "TIBETAN", 
 578:                          "Tibetan");
 579: 
 580:     /**
 581:      * Myanmar.
 582:      * 0x1000 - 0x109F.
 583:      * @since 1.4
 584:      */
 585:     public static final UnicodeBlock MYANMAR
 586:       = new UnicodeBlock(0x1000, 0x109F,
 587:                          "MYANMAR", 
 588:                          "Myanmar");
 589: 
 590:     /**
 591:      * Georgian.
 592:      * 0x10A0 - 0x10FF.
 593:      */
 594:     public static final UnicodeBlock GEORGIAN
 595:       = new UnicodeBlock(0x10A0, 0x10FF,
 596:                          "GEORGIAN", 
 597:                          "Georgian");
 598: 
 599:     /**
 600:      * Hangul Jamo.
 601:      * 0x1100 - 0x11FF.
 602:      */
 603:     public static final UnicodeBlock HANGUL_JAMO
 604:       = new UnicodeBlock(0x1100, 0x11FF,
 605:                          "HANGUL_JAMO", 
 606:                          "Hangul Jamo");
 607: 
 608:     /**
 609:      * Ethiopic.
 610:      * 0x1200 - 0x137F.
 611:      * @since 1.4
 612:      */
 613:     public static final UnicodeBlock ETHIOPIC
 614:       = new UnicodeBlock(0x1200, 0x137F,
 615:                          "ETHIOPIC", 
 616:                          "Ethiopic");
 617: 
 618:     /**
 619:      * Cherokee.
 620:      * 0x13A0 - 0x13FF.
 621:      * @since 1.4
 622:      */
 623:     public static final UnicodeBlock CHEROKEE
 624:       = new UnicodeBlock(0x13A0, 0x13FF,
 625:                          "CHEROKEE", 
 626:                          "Cherokee");
 627: 
 628:     /**
 629:      * Unified Canadian Aboriginal Syllabics.
 630:      * 0x1400 - 0x167F.
 631:      * @since 1.4
 632:      */
 633:     public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
 634:       = new UnicodeBlock(0x1400, 0x167F,
 635:                          "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 
 636:                          "Unified Canadian Aboriginal Syllabics");
 637: 
 638:     /**
 639:      * Ogham.
 640:      * 0x1680 - 0x169F.
 641:      * @since 1.4
 642:      */
 643:     public static final UnicodeBlock OGHAM
 644:       = new UnicodeBlock(0x1680, 0x169F,
 645:                          "OGHAM", 
 646:                          "Ogham");
 647: 
 648:     /**
 649:      * Runic.
 650:      * 0x16A0 - 0x16FF.
 651:      * @since 1.4
 652:      */
 653:     public static final UnicodeBlock RUNIC
 654:       = new UnicodeBlock(0x16A0, 0x16FF,
 655:                          "RUNIC", 
 656:                          "Runic");
 657: 
 658:     /**
 659:      * Tagalog.
 660:      * 0x1700 - 0x171F.
 661:      * @since 1.5
 662:      */
 663:     public static final UnicodeBlock TAGALOG
 664:       = new UnicodeBlock(0x1700, 0x171F,
 665:                          "TAGALOG", 
 666:                          "Tagalog");
 667: 
 668:     /**
 669:      * Hanunoo.
 670:      * 0x1720 - 0x173F.
 671:      * @since 1.5
 672:      */
 673:     public static final UnicodeBlock HANUNOO
 674:       = new UnicodeBlock(0x1720, 0x173F,
 675:                          "HANUNOO", 
 676:                          "Hanunoo");
 677: 
 678:     /**
 679:      * Buhid.
 680:      * 0x1740 - 0x175F.
 681:      * @since 1.5
 682:      */
 683:     public static final UnicodeBlock BUHID
 684:       = new UnicodeBlock(0x1740, 0x175F,
 685:                          "BUHID", 
 686:                          "Buhid");
 687: 
 688:     /**
 689:      * Tagbanwa.
 690:      * 0x1760 - 0x177F.
 691:      * @since 1.5
 692:      */
 693:     public static final UnicodeBlock TAGBANWA
 694:       = new UnicodeBlock(0x1760, 0x177F,
 695:                          "TAGBANWA", 
 696:                          "Tagbanwa");
 697: 
 698:     /**
 699:      * Khmer.
 700:      * 0x1780 - 0x17FF.
 701:      * @since 1.4
 702:      */
 703:     public static final UnicodeBlock KHMER
 704:       = new UnicodeBlock(0x1780, 0x17FF,
 705:                          "KHMER", 
 706:                          "Khmer");
 707: 
 708:     /**
 709:      * Mongolian.
 710:      * 0x1800 - 0x18AF.
 711:      * @since 1.4
 712:      */
 713:     public static final UnicodeBlock MONGOLIAN
 714:       = new UnicodeBlock(0x1800, 0x18AF,
 715:                          "MONGOLIAN", 
 716:                          "Mongolian");
 717: 
 718:     /**
 719:      * Limbu.
 720:      * 0x1900 - 0x194F.
 721:      * @since 1.5
 722:      */
 723:     public static final UnicodeBlock LIMBU
 724:       = new UnicodeBlock(0x1900, 0x194F,
 725:                          "LIMBU", 
 726:                          "Limbu");
 727: 
 728:     /**
 729:      * Tai Le.
 730:      * 0x1950 - 0x197F.
 731:      * @since 1.5
 732:      */
 733:     public static final UnicodeBlock TAI_LE
 734:       = new UnicodeBlock(0x1950, 0x197F,
 735:                          "TAI_LE", 
 736:                          "Tai Le");
 737: 
 738:     /**
 739:      * Khmer Symbols.
 740:      * 0x19E0 - 0x19FF.
 741:      * @since 1.5
 742:      */
 743:     public static final UnicodeBlock KHMER_SYMBOLS
 744:       = new UnicodeBlock(0x19E0, 0x19FF,
 745:                          "KHMER_SYMBOLS", 
 746:                          "Khmer Symbols");
 747: 
 748:     /**
 749:      * Phonetic Extensions.
 750:      * 0x1D00 - 0x1D7F.
 751:      * @since 1.5
 752:      */
 753:     public static final UnicodeBlock PHONETIC_EXTENSIONS
 754:       = new UnicodeBlock(0x1D00, 0x1D7F,
 755:                          "PHONETIC_EXTENSIONS", 
 756:                          "Phonetic Extensions");
 757: 
 758:     /**
 759:      * Latin Extended Additional.
 760:      * 0x1E00 - 0x1EFF.
 761:      */
 762:     public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
 763:       = new UnicodeBlock(0x1E00, 0x1EFF,
 764:                          "LATIN_EXTENDED_ADDITIONAL", 
 765:                          "Latin Extended Additional");
 766: 
 767:     /**
 768:      * Greek Extended.
 769:      * 0x1F00 - 0x1FFF.
 770:      */
 771:     public static final UnicodeBlock GREEK_EXTENDED
 772:       = new UnicodeBlock(0x1F00, 0x1FFF,
 773:                          "GREEK_EXTENDED", 
 774:                          "Greek Extended");
 775: 
 776:     /**
 777:      * General Punctuation.
 778:      * 0x2000 - 0x206F.
 779:      */
 780:     public static final UnicodeBlock GENERAL_PUNCTUATION
 781:       = new UnicodeBlock(0x2000, 0x206F,
 782:                          "GENERAL_PUNCTUATION", 
 783:                          "General Punctuation");
 784: 
 785:     /**
 786:      * Superscripts and Subscripts.
 787:      * 0x2070 - 0x209F.
 788:      */
 789:     public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
 790:       = new UnicodeBlock(0x2070, 0x209F,
 791:                          "SUPERSCRIPTS_AND_SUBSCRIPTS", 
 792:                          "Superscripts and Subscripts");
 793: 
 794:     /**
 795:      * Currency Symbols.
 796:      * 0x20A0 - 0x20CF.
 797:      */
 798:     public static final UnicodeBlock CURRENCY_SYMBOLS
 799:       = new UnicodeBlock(0x20A0, 0x20CF,
 800:                          "CURRENCY_SYMBOLS", 
 801:                          "Currency Symbols");
 802: 
 803:     /**
 804:      * Combining Marks for Symbols.
 805:      * 0x20D0 - 0x20FF.
 806:      */
 807:     public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
 808:       = new UnicodeBlock(0x20D0, 0x20FF,
 809:                          "COMBINING_MARKS_FOR_SYMBOLS", 
 810:                          "Combining Marks for Symbols");
 811: 
 812:     /**
 813:      * Letterlike Symbols.
 814:      * 0x2100 - 0x214F.
 815:      */
 816:     public static final UnicodeBlock LETTERLIKE_SYMBOLS
 817:       = new UnicodeBlock(0x2100, 0x214F,
 818:                          "LETTERLIKE_SYMBOLS", 
 819:                          "Letterlike Symbols");
 820: 
 821:     /**
 822:      * Number Forms.
 823:      * 0x2150 - 0x218F.
 824:      */
 825:     public static final UnicodeBlock NUMBER_FORMS
 826:       = new UnicodeBlock(0x2150, 0x218F,
 827:                          "NUMBER_FORMS", 
 828:                          "Number Forms");
 829: 
 830:     /**
 831:      * Arrows.
 832:      * 0x2190 - 0x21FF.
 833:      */
 834:     public static final UnicodeBlock ARROWS
 835:       = new UnicodeBlock(0x2190, 0x21FF,
 836:                          "ARROWS", 
 837:                          "Arrows");
 838: 
 839:     /**
 840:      * Mathematical Operators.
 841:      * 0x2200 - 0x22FF.
 842:      */
 843:     public static final UnicodeBlock MATHEMATICAL_OPERATORS
 844:       = new UnicodeBlock(0x2200, 0x22FF,
 845:                          "MATHEMATICAL_OPERATORS", 
 846:                          "Mathematical Operators");
 847: 
 848:     /**
 849:      * Miscellaneous Technical.
 850:      * 0x2300 - 0x23FF.
 851:      */
 852:     public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
 853:       = new UnicodeBlock(0x2300, 0x23FF,
 854:                          "MISCELLANEOUS_TECHNICAL", 
 855:                          "Miscellaneous Technical");
 856: 
 857:     /**
 858:      * Control Pictures.
 859:      * 0x2400 - 0x243F.
 860:      */
 861:     public static final UnicodeBlock CONTROL_PICTURES
 862:       = new UnicodeBlock(0x2400, 0x243F,
 863:                          "CONTROL_PICTURES", 
 864:                          "Control Pictures");
 865: 
 866:     /**
 867:      * Optical Character Recognition.
 868:      * 0x2440 - 0x245F.
 869:      */
 870:     public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
 871:       = new UnicodeBlock(0x2440, 0x245F,
 872:                          "OPTICAL_CHARACTER_RECOGNITION", 
 873:                          "Optical Character Recognition");
 874: 
 875:     /**
 876:      * Enclosed Alphanumerics.
 877:      * 0x2460 - 0x24FF.
 878:      */
 879:     public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
 880:       = new UnicodeBlock(0x2460, 0x24FF,
 881:                          "ENCLOSED_ALPHANUMERICS", 
 882:                          "Enclosed Alphanumerics");
 883: 
 884:     /**
 885:      * Box Drawing.
 886:      * 0x2500 - 0x257F.
 887:      */
 888:     public static final UnicodeBlock BOX_DRAWING
 889:       = new UnicodeBlock(0x2500, 0x257F,
 890:                          "BOX_DRAWING", 
 891:                          "Box Drawing");
 892: 
 893:     /**
 894:      * Block Elements.
 895:      * 0x2580 - 0x259F.
 896:      */
 897:     public static final UnicodeBlock BLOCK_ELEMENTS
 898:       = new UnicodeBlock(0x2580, 0x259F,
 899:                          "BLOCK_ELEMENTS", 
 900:                          "Block Elements");
 901: 
 902:     /**
 903:      * Geometric Shapes.
 904:      * 0x25A0 - 0x25FF.
 905:      */
 906:     public static final UnicodeBlock GEOMETRIC_SHAPES
 907:       = new UnicodeBlock(0x25A0, 0x25FF,
 908:                          "GEOMETRIC_SHAPES", 
 909:                          "Geometric Shapes");
 910: 
 911:     /**
 912:      * Miscellaneous Symbols.
 913:      * 0x2600 - 0x26FF.
 914:      */
 915:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
 916:       = new UnicodeBlock(0x2600, 0x26FF,
 917:                          "MISCELLANEOUS_SYMBOLS", 
 918:                          "Miscellaneous Symbols");
 919: 
 920:     /**
 921:      * Dingbats.
 922:      * 0x2700 - 0x27BF.
 923:      */
 924:     public static final UnicodeBlock DINGBATS
 925:       = new UnicodeBlock(0x2700, 0x27BF,
 926:                          "DINGBATS", 
 927:                          "Dingbats");
 928: 
 929:     /**
 930:      * Miscellaneous Mathematical Symbols-A.
 931:      * 0x27C0 - 0x27EF.
 932:      * @since 1.5
 933:      */
 934:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
 935:       = new UnicodeBlock(0x27C0, 0x27EF,
 936:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 
 937:                          "Miscellaneous Mathematical Symbols-A");
 938: 
 939:     /**
 940:      * Supplemental Arrows-A.
 941:      * 0x27F0 - 0x27FF.
 942:      * @since 1.5
 943:      */
 944:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
 945:       = new UnicodeBlock(0x27F0, 0x27FF,
 946:                          "SUPPLEMENTAL_ARROWS_A", 
 947:                          "Supplemental Arrows-A");
 948: 
 949:     /**
 950:      * Braille Patterns.
 951:      * 0x2800 - 0x28FF.
 952:      * @since 1.4
 953:      */
 954:     public static final UnicodeBlock BRAILLE_PATTERNS
 955:       = new UnicodeBlock(0x2800, 0x28FF,
 956:                          "BRAILLE_PATTERNS", 
 957:                          "Braille Patterns");
 958: 
 959:     /**
 960:      * Supplemental Arrows-B.
 961:      * 0x2900 - 0x297F.
 962:      * @since 1.5
 963:      */
 964:     public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
 965:       = new UnicodeBlock(0x2900, 0x297F,
 966:                          "SUPPLEMENTAL_ARROWS_B", 
 967:                          "Supplemental Arrows-B");
 968: 
 969:     /**
 970:      * Miscellaneous Mathematical Symbols-B.
 971:      * 0x2980 - 0x29FF.
 972:      * @since 1.5
 973:      */
 974:     public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
 975:       = new UnicodeBlock(0x2980, 0x29FF,
 976:                          "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 
 977:                          "Miscellaneous Mathematical Symbols-B");
 978: 
 979:     /**
 980:      * Supplemental Mathematical Operators.
 981:      * 0x2A00 - 0x2AFF.
 982:      * @since 1.5
 983:      */
 984:     public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
 985:       = new UnicodeBlock(0x2A00, 0x2AFF,
 986:                          "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 
 987:                          "Supplemental Mathematical Operators");
 988: 
 989:     /**
 990:      * Miscellaneous Symbols and Arrows.
 991:      * 0x2B00 - 0x2BFF.
 992:      * @since 1.5
 993:      */
 994:     public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
 995:       = new UnicodeBlock(0x2B00, 0x2BFF,
 996:                          "MISCELLANEOUS_SYMBOLS_AND_ARROWS", 
 997:                          "Miscellaneous Symbols and Arrows");
 998: 
 999:     /**
1000:      * CJK Radicals Supplement.
1001:      * 0x2E80 - 0x2EFF.
1002:      * @since 1.4
1003:      */
1004:     public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1005:       = new UnicodeBlock(0x2E80, 0x2EFF,
1006:                          "CJK_RADICALS_SUPPLEMENT", 
1007:                          "CJK Radicals Supplement");
1008: 
1009:     /**
1010:      * Kangxi Radicals.
1011:      * 0x2F00 - 0x2FDF.
1012:      * @since 1.4
1013:      */
1014:     public static final UnicodeBlock KANGXI_RADICALS
1015:       = new UnicodeBlock(0x2F00, 0x2FDF,
1016:                          "KANGXI_RADICALS", 
1017:                          "Kangxi Radicals");
1018: 
1019:     /**
1020:      * Ideographic Description Characters.
1021:      * 0x2FF0 - 0x2FFF.
1022:      * @since 1.4
1023:      */
1024:     public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1025:       = new UnicodeBlock(0x2FF0, 0x2FFF,
1026:                          "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 
1027:                          "Ideographic Description Characters");
1028: 
1029:     /**
1030:      * CJK Symbols and Punctuation.
1031:      * 0x3000 - 0x303F.
1032:      */
1033:     public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1034:       = new UnicodeBlock(0x3000, 0x303F,
1035:                          "CJK_SYMBOLS_AND_PUNCTUATION", 
1036:                          "CJK Symbols and Punctuation");
1037: 
1038:     /**
1039:      * Hiragana.
1040:      * 0x3040 - 0x309F.
1041:      */
1042:     public static final UnicodeBlock HIRAGANA
1043:       = new UnicodeBlock(0x3040, 0x309F,
1044:                          "HIRAGANA", 
1045:                          "Hiragana");
1046: 
1047:     /**
1048:      * Katakana.
1049:      * 0x30A0 - 0x30FF.
1050:      */
1051:     public static final UnicodeBlock KATAKANA
1052:       = new UnicodeBlock(0x30A0, 0x30FF,
1053:                          "KATAKANA", 
1054:                          "Katakana");
1055: 
1056:     /**
1057:      * Bopomofo.
1058:      * 0x3100 - 0x312F.
1059:      */
1060:     public static final UnicodeBlock BOPOMOFO
1061:       = new UnicodeBlock(0x3100, 0x312F,
1062:                          "BOPOMOFO", 
1063:                          "Bopomofo");
1064: 
1065:     /**
1066:      * Hangul Compatibility Jamo.
1067:      * 0x3130 - 0x318F.
1068:      */
1069:     public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1070:       = new UnicodeBlock(0x3130, 0x318F,
1071:                          "HANGUL_COMPATIBILITY_JAMO", 
1072:                          "Hangul Compatibility Jamo");
1073: 
1074:     /**
1075:      * Kanbun.
1076:      * 0x3190 - 0x319F.
1077:      */
1078:     public static final UnicodeBlock KANBUN
1079:       = new UnicodeBlock(0x3190, 0x319F,
1080:                          "KANBUN", 
1081:                          "Kanbun");
1082: 
1083:     /**
1084:      * Bopomofo Extended.
1085:      * 0x31A0 - 0x31BF.
1086:      * @since 1.4
1087:      */
1088:     public static final UnicodeBlock BOPOMOFO_EXTENDED
1089:       = new UnicodeBlock(0x31A0, 0x31BF,
1090:                          "BOPOMOFO_EXTENDED", 
1091:                          "Bopomofo Extended");
1092: 
1093:     /**
1094:      * Katakana Phonetic Extensions.
1095:      * 0x31F0 - 0x31FF.
1096:      * @since 1.5
1097:      */
1098:     public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1099:       = new UnicodeBlock(0x31F0, 0x31FF,
1100:                          "KATAKANA_PHONETIC_EXTENSIONS", 
1101:                          "Katakana Phonetic Extensions");
1102: 
1103:     /**
1104:      * Enclosed CJK Letters and Months.
1105:      * 0x3200 - 0x32FF.
1106:      */
1107:     public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1108:       = new UnicodeBlock(0x3200, 0x32FF,
1109:                          "ENCLOSED_CJK_LETTERS_AND_MONTHS", 
1110:                          "Enclosed CJK Letters and Months");
1111: 
1112:     /**
1113:      * CJK Compatibility.
1114:      * 0x3300 - 0x33FF.
1115:      */
1116:     public static final UnicodeBlock CJK_COMPATIBILITY
1117:       = new UnicodeBlock(0x3300, 0x33FF,
1118:                          "CJK_COMPATIBILITY", 
1119:                          "CJK Compatibility");
1120: 
1121:     /**
1122:      * CJK Unified Ideographs Extension A.
1123:      * 0x3400 - 0x4DBF.
1124:      * @since 1.4
1125:      */
1126:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1127:       = new UnicodeBlock(0x3400, 0x4DBF,
1128:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 
1129:                          "CJK Unified Ideographs Extension A");
1130: 
1131:     /**
1132:      * Yijing Hexagram Symbols.
1133:      * 0x4DC0 - 0x4DFF.
1134:      * @since 1.5
1135:      */
1136:     public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1137:       = new UnicodeBlock(0x4DC0, 0x4DFF,
1138:                          "YIJING_HEXAGRAM_SYMBOLS", 
1139:                          "Yijing Hexagram Symbols");
1140: 
1141:     /**
1142:      * CJK Unified Ideographs.
1143:      * 0x4E00 - 0x9FFF.
1144:      */
1145:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1146:       = new UnicodeBlock(0x4E00, 0x9FFF,
1147:                          "CJK_UNIFIED_IDEOGRAPHS", 
1148:                          "CJK Unified Ideographs");
1149: 
1150:     /**
1151:      * Yi Syllables.
1152:      * 0xA000 - 0xA48F.
1153:      * @since 1.4
1154:      */
1155:     public static final UnicodeBlock YI_SYLLABLES
1156:       = new UnicodeBlock(0xA000, 0xA48F,
1157:                          "YI_SYLLABLES", 
1158:                          "Yi Syllables");
1159: 
1160:     /**
1161:      * Yi Radicals.
1162:      * 0xA490 - 0xA4CF.
1163:      * @since 1.4
1164:      */
1165:     public static final UnicodeBlock YI_RADICALS
1166:       = new UnicodeBlock(0xA490, 0xA4CF,
1167:                          "YI_RADICALS", 
1168:                          "Yi Radicals");
1169: 
1170:     /**
1171:      * Hangul Syllables.
1172:      * 0xAC00 - 0xD7AF.
1173:      */
1174:     public static final UnicodeBlock HANGUL_SYLLABLES
1175:       = new UnicodeBlock(0xAC00, 0xD7AF,
1176:                          "HANGUL_SYLLABLES", 
1177:                          "Hangul Syllables");
1178: 
1179:     /**
1180:      * High Surrogates.
1181:      * 0xD800 - 0xDB7F.
1182:      * @since 1.5
1183:      */
1184:     public static final UnicodeBlock HIGH_SURROGATES
1185:       = new UnicodeBlock(0xD800, 0xDB7F,
1186:                          "HIGH_SURROGATES", 
1187:                          "High Surrogates");
1188: 
1189:     /**
1190:      * High Private Use Surrogates.
1191:      * 0xDB80 - 0xDBFF.
1192:      * @since 1.5
1193:      */
1194:     public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1195:       = new UnicodeBlock(0xDB80, 0xDBFF,
1196:                          "HIGH_PRIVATE_USE_SURROGATES", 
1197:                          "High Private Use Surrogates");
1198: 
1199:     /**
1200:      * Low Surrogates.
1201:      * 0xDC00 - 0xDFFF.
1202:      * @since 1.5
1203:      */
1204:     public static final UnicodeBlock LOW_SURROGATES
1205:       = new UnicodeBlock(0xDC00, 0xDFFF,
1206:                          "LOW_SURROGATES", 
1207:                          "Low Surrogates");
1208: 
1209:     /**
1210:      * Private Use Area.
1211:      * 0xE000 - 0xF8FF.
1212:      */
1213:     public static final UnicodeBlock PRIVATE_USE_AREA
1214:       = new UnicodeBlock(0xE000, 0xF8FF,
1215:                          "PRIVATE_USE_AREA", 
1216:                          "Private Use Area");
1217: 
1218:     /**
1219:      * CJK Compatibility Ideographs.
1220:      * 0xF900 - 0xFAFF.
1221:      */
1222:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1223:       = new UnicodeBlock(0xF900, 0xFAFF,
1224:                          "CJK_COMPATIBILITY_IDEOGRAPHS", 
1225:                          "CJK Compatibility Ideographs");
1226: 
1227:     /**
1228:      * Alphabetic Presentation Forms.
1229:      * 0xFB00 - 0xFB4F.
1230:      */
1231:     public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1232:       = new UnicodeBlock(0xFB00, 0xFB4F,
1233:                          "ALPHABETIC_PRESENTATION_FORMS", 
1234:                          "Alphabetic Presentation Forms");
1235: 
1236:     /**
1237:      * Arabic Presentation Forms-A.
1238:      * 0xFB50 - 0xFDFF.
1239:      */
1240:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1241:       = new UnicodeBlock(0xFB50, 0xFDFF,
1242:                          "ARABIC_PRESENTATION_FORMS_A", 
1243:                          "Arabic Presentation Forms-A");
1244: 
1245:     /**
1246:      * Variation Selectors.
1247:      * 0xFE00 - 0xFE0F.
1248:      * @since 1.5
1249:      */
1250:     public static final UnicodeBlock VARIATION_SELECTORS
1251:       = new UnicodeBlock(0xFE00, 0xFE0F,
1252:                          "VARIATION_SELECTORS", 
1253:                          "Variation Selectors");
1254: 
1255:     /**
1256:      * Combining Half Marks.
1257:      * 0xFE20 - 0xFE2F.
1258:      */
1259:     public static final UnicodeBlock COMBINING_HALF_MARKS
1260:       = new UnicodeBlock(0xFE20, 0xFE2F,
1261:                          "COMBINING_HALF_MARKS", 
1262:                          "Combining Half Marks");
1263: 
1264:     /**
1265:      * CJK Compatibility Forms.
1266:      * 0xFE30 - 0xFE4F.
1267:      */
1268:     public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1269:       = new UnicodeBlock(0xFE30, 0xFE4F,
1270:                          "CJK_COMPATIBILITY_FORMS", 
1271:                          "CJK Compatibility Forms");
1272: 
1273:     /**
1274:      * Small Form Variants.
1275:      * 0xFE50 - 0xFE6F.
1276:      */
1277:     public static final UnicodeBlock SMALL_FORM_VARIANTS
1278:       = new UnicodeBlock(0xFE50, 0xFE6F,
1279:                          "SMALL_FORM_VARIANTS", 
1280:                          "Small Form Variants");
1281: 
1282:     /**
1283:      * Arabic Presentation Forms-B.
1284:      * 0xFE70 - 0xFEFF.
1285:      */
1286:     public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1287:       = new UnicodeBlock(0xFE70, 0xFEFF,
1288:                          "ARABIC_PRESENTATION_FORMS_B", 
1289:                          "Arabic Presentation Forms-B");
1290: 
1291:     /**
1292:      * Halfwidth and Fullwidth Forms.
1293:      * 0xFF00 - 0xFFEF.
1294:      */
1295:     public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1296:       = new UnicodeBlock(0xFF00, 0xFFEF,
1297:                          "HALFWIDTH_AND_FULLWIDTH_FORMS", 
1298:                          "Halfwidth and Fullwidth Forms");
1299: 
1300:     /**
1301:      * Specials.
1302:      * 0xFFF0 - 0xFFFF.
1303:      */
1304:     public static final UnicodeBlock SPECIALS
1305:       = new UnicodeBlock(0xFFF0, 0xFFFF,
1306:                          "SPECIALS", 
1307:                          "Specials");
1308: 
1309:     /**
1310:      * Linear B Syllabary.
1311:      * 0x10000 - 0x1007F.
1312:      * @since 1.5
1313:      */
1314:     public static final UnicodeBlock LINEAR_B_SYLLABARY
1315:       = new UnicodeBlock(0x10000, 0x1007F,
1316:                          "LINEAR_B_SYLLABARY", 
1317:                          "Linear B Syllabary");
1318: 
1319:     /**
1320:      * Linear B Ideograms.
1321:      * 0x10080 - 0x100FF.
1322:      * @since 1.5
1323:      */
1324:     public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1325:       = new UnicodeBlock(0x10080, 0x100FF,
1326:                          "LINEAR_B_IDEOGRAMS", 
1327:                          "Linear B Ideograms");
1328: 
1329:     /**
1330:      * Aegean Numbers.
1331:      * 0x10100 - 0x1013F.
1332:      * @since 1.5
1333:      */
1334:     public static final UnicodeBlock AEGEAN_NUMBERS
1335:       = new UnicodeBlock(0x10100, 0x1013F,
1336:                          "AEGEAN_NUMBERS", 
1337:                          "Aegean Numbers");
1338: 
1339:     /**
1340:      * Old Italic.
1341:      * 0x10300 - 0x1032F.
1342:      * @since 1.5
1343:      */
1344:     public static final UnicodeBlock OLD_ITALIC
1345:       = new UnicodeBlock(0x10300, 0x1032F,
1346:                          "OLD_ITALIC", 
1347:                          "Old Italic");
1348: 
1349:     /**
1350:      * Gothic.
1351:      * 0x10330 - 0x1034F.
1352:      * @since 1.5
1353:      */
1354:     public static final UnicodeBlock GOTHIC
1355:       = new UnicodeBlock(0x10330, 0x1034F,
1356:                          "GOTHIC", 
1357:                          "Gothic");
1358: 
1359:     /**
1360:      * Ugaritic.
1361:      * 0x10380 - 0x1039F.
1362:      * @since 1.5
1363:      */
1364:     public static final UnicodeBlock UGARITIC
1365:       = new UnicodeBlock(0x10380, 0x1039F,
1366:                          "UGARITIC", 
1367:                          "Ugaritic");
1368: 
1369:     /**
1370:      * Deseret.
1371:      * 0x10400 - 0x1044F.
1372:      * @since 1.5
1373:      */
1374:     public static final UnicodeBlock DESERET
1375:       = new UnicodeBlock(0x10400, 0x1044F,
1376:                          "DESERET", 
1377:                          "Deseret");
1378: 
1379:     /**
1380:      * Shavian.
1381:      * 0x10450 - 0x1047F.
1382:      * @since 1.5
1383:      */
1384:     public static final UnicodeBlock SHAVIAN
1385:       = new UnicodeBlock(0x10450, 0x1047F,
1386:                          "SHAVIAN", 
1387:                          "Shavian");
1388: 
1389:     /**
1390:      * Osmanya.
1391:      * 0x10480 - 0x104AF.
1392:      * @since 1.5
1393:      */
1394:     public static final UnicodeBlock OSMANYA
1395:       = new UnicodeBlock(0x10480, 0x104AF,
1396:                          "OSMANYA", 
1397:                          "Osmanya");
1398: 
1399:     /**
1400:      * Cypriot Syllabary.
1401:      * 0x10800 - 0x1083F.
1402:      * @since 1.5
1403:      */
1404:     public static final UnicodeBlock CYPRIOT_SYLLABARY
1405:       = new UnicodeBlock(0x10800, 0x1083F,
1406:                          "CYPRIOT_SYLLABARY", 
1407:                          "Cypriot Syllabary");
1408: 
1409:     /**
1410:      * Byzantine Musical Symbols.
1411:      * 0x1D000 - 0x1D0FF.
1412:      * @since 1.5
1413:      */
1414:     public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1415:       = new UnicodeBlock(0x1D000, 0x1D0FF,
1416:                          "BYZANTINE_MUSICAL_SYMBOLS", 
1417:                          "Byzantine Musical Symbols");
1418: 
1419:     /**
1420:      * Musical Symbols.
1421:      * 0x1D100 - 0x1D1FF.
1422:      * @since 1.5
1423:      */
1424:     public static final UnicodeBlock MUSICAL_SYMBOLS
1425:       = new UnicodeBlock(0x1D100, 0x1D1FF,
1426:                          "MUSICAL_SYMBOLS", 
1427:                          "Musical Symbols");
1428: 
1429:     /**
1430:      * Tai Xuan Jing Symbols.
1431:      * 0x1D300 - 0x1D35F.
1432:      * @since 1.5
1433:      */
1434:     public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1435:       = new UnicodeBlock(0x1D300, 0x1D35F,
1436:                          "TAI_XUAN_JING_SYMBOLS", 
1437:                          "Tai Xuan Jing Symbols");
1438: 
1439:     /**
1440:      * Mathematical Alphanumeric Symbols.
1441:      * 0x1D400 - 0x1D7FF.
1442:      * @since 1.5
1443:      */
1444:     public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1445:       = new UnicodeBlock(0x1D400, 0x1D7FF,
1446:                          "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 
1447:                          "Mathematical Alphanumeric Symbols");
1448: 
1449:     /**
1450:      * CJK Unified Ideographs Extension B.
1451:      * 0x20000 - 0x2A6DF.
1452:      * @since 1.5
1453:      */
1454:     public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1455:       = new UnicodeBlock(0x20000, 0x2A6DF,
1456:                          "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 
1457:                          "CJK Unified Ideographs Extension B");
1458: 
1459:     /**
1460:      * CJK Compatibility Ideographs Supplement.
1461:      * 0x2F800 - 0x2FA1F.
1462:      * @since 1.5
1463:      */
1464:     public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1465:       = new UnicodeBlock(0x2F800, 0x2FA1F,
1466:                          "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 
1467:                          "CJK Compatibility Ideographs Supplement");
1468: 
1469:     /**
1470:      * Tags.
1471:      * 0xE0000 - 0xE007F.
1472:      * @since 1.5
1473:      */
1474:     public static final UnicodeBlock TAGS
1475:       = new UnicodeBlock(0xE0000, 0xE007F,
1476:                          "TAGS", 
1477:                          "Tags");
1478: 
1479:     /**
1480:      * Variation Selectors Supplement.
1481:      * 0xE0100 - 0xE01EF.
1482:      * @since 1.5
1483:      */
1484:     public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1485:       = new UnicodeBlock(0xE0100, 0xE01EF,
1486:                          "VARIATION_SELECTORS_SUPPLEMENT", 
1487:                          "Variation Selectors Supplement");
1488: 
1489:     /**
1490:      * Supplementary Private Use Area-A.
1491:      * 0xF0000 - 0xFFFFF.
1492:      * @since 1.5
1493:      */
1494:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1495:       = new UnicodeBlock(0xF0000, 0xFFFFF,
1496:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_A", 
1497:                          "Supplementary Private Use Area-A");
1498: 
1499:     /**
1500:      * Supplementary Private Use Area-B.
1501:      * 0x100000 - 0x10FFFF.
1502:      * @since 1.5
1503:      */
1504:     public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1505:       = new UnicodeBlock(0x100000, 0x10FFFF,
1506:                          "SUPPLEMENTARY_PRIVATE_USE_AREA_B", 
1507:                          "Supplementary Private Use Area-B");
1508: 
1509:     /**
1510:      * Surrogates Area.
1511:      * 'D800' - 'DFFF'.
1512:      * @deprecated As of 1.5, the three areas, 
1513:      * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>,
1514:      * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a>
1515:      * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined
1516:      * by the Unicode standard, should be used in preference to
1517:      * this.  These are also returned from calls to <code>of(int)</code>
1518:      * and <code>of(char)</code>.
1519:      */
1520:     public static final UnicodeBlock SURROGATES_AREA
1521:       = new UnicodeBlock(0xD800, 0xDFFF,
1522:                          "SURROGATES_AREA",
1523:              "Surrogates Area");
1524: 
1525:     /**
1526:      * The defined subsets.
1527:      */
1528:     private static final UnicodeBlock sets[] = {
1529:       BASIC_LATIN,
1530:       LATIN_1_SUPPLEMENT,
1531:       LATIN_EXTENDED_A,
1532:       LATIN_EXTENDED_B,
1533:       IPA_EXTENSIONS,
1534:       SPACING_MODIFIER_LETTERS,
1535:       COMBINING_DIACRITICAL_MARKS,
1536:       GREEK,
1537:       CYRILLIC,
1538:       CYRILLIC_SUPPLEMENTARY,
1539:       ARMENIAN,
1540:       HEBREW,
1541:       ARABIC,
1542:       SYRIAC,
1543:       THAANA,
1544:       DEVANAGARI,
1545:       BENGALI,
1546:       GURMUKHI,
1547:       GUJARATI,
1548:       ORIYA,
1549:       TAMIL,
1550:       TELUGU,
1551:       KANNADA,
1552:       MALAYALAM,
1553:       SINHALA,
1554:       THAI,
1555:       LAO,
1556:       TIBETAN,
1557:       MYANMAR,
1558:       GEORGIAN,
1559:       HANGUL_JAMO,
1560:       ETHIOPIC,
1561:       CHEROKEE,
1562:       UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
1563:       OGHAM,
1564:       RUNIC,
1565:       TAGALOG,
1566:       HANUNOO,
1567:       BUHID,
1568:       TAGBANWA,
1569:       KHMER,
1570:       MONGOLIAN,
1571:       LIMBU,
1572:       TAI_LE,
1573:       KHMER_SYMBOLS,
1574:       PHONETIC_EXTENSIONS,
1575:       LATIN_EXTENDED_ADDITIONAL,
1576:       GREEK_EXTENDED,
1577:       GENERAL_PUNCTUATION,
1578:       SUPERSCRIPTS_AND_SUBSCRIPTS,
1579:       CURRENCY_SYMBOLS,
1580:       COMBINING_MARKS_FOR_SYMBOLS,
1581:       LETTERLIKE_SYMBOLS,
1582:       NUMBER_FORMS,
1583:       ARROWS,
1584:       MATHEMATICAL_OPERATORS,
1585:       MISCELLANEOUS_TECHNICAL,
1586:       CONTROL_PICTURES,
1587:       OPTICAL_CHARACTER_RECOGNITION,
1588:       ENCLOSED_ALPHANUMERICS,
1589:       BOX_DRAWING,
1590:       BLOCK_ELEMENTS,
1591:       GEOMETRIC_SHAPES,
1592:       MISCELLANEOUS_SYMBOLS,
1593:       DINGBATS,
1594:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
1595:       SUPPLEMENTAL_ARROWS_A,
1596:       BRAILLE_PATTERNS,
1597:       SUPPLEMENTAL_ARROWS_B,
1598:       MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
1599:       SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
1600:       MISCELLANEOUS_SYMBOLS_AND_ARROWS,
1601:       CJK_RADICALS_SUPPLEMENT,
1602:       KANGXI_RADICALS,
1603:       IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
1604:       CJK_SYMBOLS_AND_PUNCTUATION,
1605:       HIRAGANA,
1606:       KATAKANA,
1607:       BOPOMOFO,
1608:       HANGUL_COMPATIBILITY_JAMO,
1609:       KANBUN,
1610:       BOPOMOFO_EXTENDED,
1611:       KATAKANA_PHONETIC_EXTENSIONS,
1612:       ENCLOSED_CJK_LETTERS_AND_MONTHS,
1613:       CJK_COMPATIBILITY,
1614:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
1615:       YIJING_HEXAGRAM_SYMBOLS,
1616:       CJK_UNIFIED_IDEOGRAPHS,
1617:       YI_SYLLABLES,
1618:       YI_RADICALS,
1619:       HANGUL_SYLLABLES,
1620:       HIGH_SURROGATES,
1621:       HIGH_PRIVATE_USE_SURROGATES,
1622:       LOW_SURROGATES,
1623:       PRIVATE_USE_AREA,
1624:       CJK_COMPATIBILITY_IDEOGRAPHS,
1625:       ALPHABETIC_PRESENTATION_FORMS,
1626:       ARABIC_PRESENTATION_FORMS_A,
1627:       VARIATION_SELECTORS,
1628:       COMBINING_HALF_MARKS,
1629:       CJK_COMPATIBILITY_FORMS,
1630:       SMALL_FORM_VARIANTS,
1631:       ARABIC_PRESENTATION_FORMS_B,
1632:       HALFWIDTH_AND_FULLWIDTH_FORMS,
1633:       SPECIALS,
1634:       LINEAR_B_SYLLABARY,
1635:       LINEAR_B_IDEOGRAMS,
1636:       AEGEAN_NUMBERS,
1637:       OLD_ITALIC,
1638:       GOTHIC,
1639:       UGARITIC,
1640:       DESERET,
1641:       SHAVIAN,
1642:       OSMANYA,
1643:       CYPRIOT_SYLLABARY,
1644:       BYZANTINE_MUSICAL_SYMBOLS,
1645:       MUSICAL_SYMBOLS,
1646:       TAI_XUAN_JING_SYMBOLS,
1647:       MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
1648:       CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
1649:       CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
1650:       TAGS,
1651:       VARIATION_SELECTORS_SUPPLEMENT,
1652:       SUPPLEMENTARY_PRIVATE_USE_AREA_A,
1653:       SUPPLEMENTARY_PRIVATE_USE_AREA_B,
1654:     };
1655:   } // class UnicodeBlock
1656: 
1657:   /**
1658:    * The immutable value of this Character.
1659:    *
1660:    * @serial the value of this Character
1661:    */
1662:   private final char value;
1663: 
1664:   /**
1665:    * Compatible with JDK 1.0+.
1666:    */
1667:   private static final long serialVersionUID = 3786198910865385080L;
1668: 
1669:   /**
1670:    * Smallest value allowed for radix arguments in Java. This value is 2.
1671:    *
1672:    * @see #digit(char, int)
1673:    * @see #forDigit(int, int)
1674:    * @see Integer#toString(int, int)
1675:    * @see Integer#valueOf(String)
1676:    */
1677:   public static final int MIN_RADIX = 2;
1678: 
1679:   /**
1680:    * Largest value allowed for radix arguments in Java. This value is 36.
1681:    *
1682:    * @see #digit(char, int)
1683:    * @see #forDigit(int, int)
1684:    * @see Integer#toString(int, int)
1685:    * @see Integer#valueOf(String)
1686:    */
1687:   public static final int MAX_RADIX = 36;
1688: 
1689:   /**
1690:    * The minimum value the char data type can hold.
1691:    * This value is <code>'\\u0000'</code>.
1692:    */
1693:   public static final char MIN_VALUE = '\u0000';
1694: 
1695:   /**
1696:    * The maximum value the char data type can hold.
1697:    * This value is <code>'\\uFFFF'</code>.
1698:    */
1699:   public static final char MAX_VALUE = '\uFFFF';
1700: 
1701:   /**
1702:    * Class object representing the primitive char data type.
1703:    *
1704:    * @since 1.1
1705:    */
1706:   public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
1707: 
1708:   /**
1709:    * The number of bits needed to represent a <code>char</code>.
1710:    * @since 1.5
1711:    */
1712:   public static final int SIZE = 16;
1713: 
1714:   // This caches some Character values, and is used by boxing
1715:   // conversions via valueOf().  We must cache at least 0..127;
1716:   // this constant controls how much we actually cache.
1717:   private static final int MAX_CACHE = 127;
1718:   private static Character[] charCache = new Character[MAX_CACHE + 1];
1719: 
1720:   /**
1721:    * Lu = Letter, Uppercase (Informative).
1722:    *
1723:    * @since 1.1
1724:    */
1725:   public static final byte UPPERCASE_LETTER = 1;
1726: 
1727:   /**
1728:    * Ll = Letter, Lowercase (Informative).
1729:    *
1730:    * @since 1.1
1731:    */
1732:   public static final byte LOWERCASE_LETTER = 2;
1733: 
1734:   /**
1735:    * Lt = Letter, Titlecase (Informative).
1736:    *
1737:    * @since 1.1
1738:    */
1739:   public static final byte TITLECASE_LETTER = 3;
1740: 
1741:   /**
1742:    * Mn = Mark, Non-Spacing (Normative).
1743:    *
1744:    * @since 1.1
1745:    */
1746:   public static final byte NON_SPACING_MARK = 6;
1747: 
1748:   /**
1749:    * Mc = Mark, Spacing Combining (Normative).
1750:    *
1751:    * @since 1.1
1752:    */
1753:   public static final byte COMBINING_SPACING_MARK = 8;
1754: 
1755:   /**
1756:    * Me = Mark, Enclosing (Normative).
1757:    *
1758:    * @since 1.1
1759:    */
1760:   public static final byte ENCLOSING_MARK = 7;
1761: 
1762:   /**
1763:    * Nd = Number, Decimal Digit (Normative).
1764:    *
1765:    * @since 1.1
1766:    */
1767:   public static final byte DECIMAL_DIGIT_NUMBER = 9;
1768: 
1769:   /**
1770:    * Nl = Number, Letter (Normative).
1771:    *
1772:    * @since 1.1
1773:    */
1774:   public static final byte LETTER_NUMBER = 10;
1775: 
1776:   /**
1777:    * No = Number, Other (Normative).
1778:    *
1779:    * @since 1.1
1780:    */
1781:   public static final byte OTHER_NUMBER = 11;
1782: 
1783:   /**
1784:    * Zs = Separator, Space (Normative).
1785:    *
1786:    * @since 1.1
1787:    */
1788:   public static final byte SPACE_SEPARATOR = 12;
1789: 
1790:   /**
1791:    * Zl = Separator, Line (Normative).
1792:    *
1793:    * @since 1.1
1794:    */
1795:   public static final byte LINE_SEPARATOR = 13;
1796: 
1797:   /**
1798:    * Zp = Separator, Paragraph (Normative).
1799:    *
1800:    * @since 1.1
1801:    */
1802:   public static final byte PARAGRAPH_SEPARATOR = 14;
1803: 
1804:   /**
1805:    * Cc = Other, Control (Normative).
1806:    *
1807:    * @since 1.1
1808:    */
1809:   public static final byte CONTROL = 15;
1810: 
1811:   /**
1812:    * Cf = Other, Format (Normative).
1813:    *
1814:    * @since 1.1
1815:    */
1816:   public static final byte FORMAT = 16;
1817: 
1818:   /**
1819:    * Cs = Other, Surrogate (Normative).
1820:    *
1821:    * @since 1.1
1822:    */
1823:   public static final byte SURROGATE = 19;
1824: 
1825:   /**
1826:    * Co = Other, Private Use (Normative).
1827:    *
1828:    * @since 1.1
1829:    */
1830:   public static final byte PRIVATE_USE = 18;
1831: 
1832:   /**
1833:    * Cn = Other, Not Assigned (Normative).
1834:    *
1835:    * @since 1.1
1836:    */
1837:   public static final byte UNASSIGNED = 0;
1838: 
1839:   /**
1840:    * Lm = Letter, Modifier (Informative).
1841:    *
1842:    * @since 1.1
1843:    */
1844:   public static final byte MODIFIER_LETTER = 4;
1845: 
1846:   /**
1847:    * Lo = Letter, Other (Informative).
1848:    *
1849:    * @since 1.1
1850:    */
1851:   public static final byte OTHER_LETTER = 5;
1852: 
1853:   /**
1854:    * Pc = Punctuation, Connector (Informative).
1855:    *
1856:    * @since 1.1
1857:    */
1858:   public static final byte CONNECTOR_PUNCTUATION = 23;
1859: 
1860:   /**
1861:    * Pd = Punctuation, Dash (Informative).
1862:    *
1863:    * @since 1.1
1864:    */
1865:   public static final byte DASH_PUNCTUATION = 20;
1866: 
1867:   /**
1868:    * Ps = Punctuation, Open (Informative).
1869:    *
1870:    * @since 1.1
1871:    */
1872:   public static final byte START_PUNCTUATION = 21;
1873: 
1874:   /**
1875:    * Pe = Punctuation, Close (Informative).
1876:    *
1877:    * @since 1.1
1878:    */
1879:   public static final byte END_PUNCTUATION = 22;
1880: 
1881:   /**
1882:    * Pi = Punctuation, Initial Quote (Informative).
1883:    *
1884:    * @since 1.4
1885:    */
1886:   public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
1887: 
1888:   /**
1889:    * Pf = Punctuation, Final Quote (Informative).
1890:    *
1891:    * @since 1.4
1892:    */
1893:   public static final byte FINAL_QUOTE_PUNCTUATION = 30;
1894: 
1895:   /**
1896:    * Po = Punctuation, Other (Informative).
1897:    *
1898:    * @since 1.1
1899:    */
1900:   public static final byte OTHER_PUNCTUATION = 24;
1901: 
1902:   /**
1903:    * Sm = Symbol, Math (Informative).
1904:    *
1905:    * @since 1.1
1906:    */
1907:   public static final byte MATH_SYMBOL = 25;
1908: 
1909:   /**
1910:    * Sc = Symbol, Currency (Informative).
1911:    *
1912:    * @since 1.1
1913:    */
1914:   public static final byte CURRENCY_SYMBOL = 26;
1915: 
1916:   /**
1917:    * Sk = Symbol, Modifier (Informative).
1918:    *
1919:    * @since 1.1
1920:    */
1921:   public static final byte MODIFIER_SYMBOL = 27;
1922: 
1923:   /**
1924:    * So = Symbol, Other (Informative).
1925:    *
1926:    * @since 1.1
1927:    */
1928:   public static final byte OTHER_SYMBOL = 28;
1929: 
1930:   /**
1931:    * Undefined bidirectional character type. Undefined char values have
1932:    * undefined directionality in the Unicode specification.
1933:    *
1934:    * @since 1.4
1935:    */
1936:   public static final byte DIRECTIONALITY_UNDEFINED = -1;
1937: 
1938:   /**
1939:    * Strong bidirectional character type "L".
1940:    *
1941:    * @since 1.4
1942:    */
1943:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
1944: 
1945:   /**
1946:    * Strong bidirectional character type "R".
1947:    *
1948:    * @since 1.4
1949:    */
1950:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
1951: 
1952:   /**
1953:    * Strong bidirectional character type "AL".
1954:    *
1955:    * @since 1.4
1956:    */
1957:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
1958: 
1959:   /**
1960:    * Weak bidirectional character type "EN".
1961:    *
1962:    * @since 1.4
1963:    */
1964:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
1965: 
1966:   /**
1967:    * Weak bidirectional character type "ES".
1968:    *
1969:    * @since 1.4
1970:    */
1971:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
1972: 
1973:   /**
1974:    * Weak bidirectional character type "ET".
1975:    *
1976:    * @since 1.4
1977:    */
1978:   public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
1979: 
1980:   /**
1981:    * Weak bidirectional character type "AN".
1982:    *
1983:    * @since 1.4
1984:    */
1985:   public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
1986: 
1987:   /**
1988:    * Weak bidirectional character type "CS".
1989:    *
1990:    * @since 1.4
1991:    */
1992:   public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
1993: 
1994:   /**
1995:    * Weak bidirectional character type "NSM".
1996:    *
1997:    * @since 1.4
1998:    */
1999:   public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
2000: 
2001:   /**
2002:    * Weak bidirectional character type "BN".
2003:    *
2004:    * @since 1.4
2005:    */
2006:   public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
2007: 
2008:   /**
2009:    * Neutral bidirectional character type "B".
2010:    *
2011:    * @since 1.4
2012:    */
2013:   public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
2014: 
2015:   /**
2016:    * Neutral bidirectional character type "S".
2017:    *
2018:    * @since 1.4
2019:    */
2020:   public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
2021: 
2022:   /**
2023:    * Strong bidirectional character type "WS".
2024:    *
2025:    * @since 1.4
2026:    */
2027:   public static final byte DIRECTIONALITY_WHITESPACE = 12;
2028: 
2029:   /**
2030:    * Neutral bidirectional character type "ON".
2031:    *
2032:    * @since 1.4
2033:    */
2034:   public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
2035: 
2036:   /**
2037:    * Strong bidirectional character type "LRE".
2038:    *
2039:    * @since 1.4
2040:    */
2041:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
2042: 
2043:   /**
2044:    * Strong bidirectional character type "LRO".
2045:    *
2046:    * @since 1.4
2047:    */
2048:   public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
2049: 
2050:   /**
2051:    * Strong bidirectional character type "RLE".
2052:    *
2053:    * @since 1.4
2054:    */
2055:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
2056: 
2057:   /**
2058:    * Strong bidirectional character type "RLO".
2059:    *
2060:    * @since 1.4
2061:    */
2062:   public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
2063: 
2064:   /**
2065:    * Weak bidirectional character type "PDF".
2066:    *
2067:    * @since 1.4
2068:    */
2069:   public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
2070: 
2071:   /**
2072:    * Mask for grabbing the type out of the result of readChar.
2073:    * @see #readChar(char)
2074:    */
2075:   private static final int TYPE_MASK = 0x1F;
2076: 
2077:   /**
2078:    * Mask for grabbing the non-breaking space flag out of the result of
2079:    * readChar.
2080:    * @see #readChar(char)
2081:    */
2082:   private static final int NO_BREAK_MASK = 0x20;
2083: 
2084:   /**
2085:    * Mask for grabbing the mirrored directionality flag out of the result
2086:    * of readChar.
2087:    * @see #readChar(char)
2088:    */
2089:   private static final int MIRROR_MASK = 0x40;
2090: 
2091:   /**
2092:    * Min value for supplementary code point.
2093:    *
2094:    * @since 1.5
2095:    */
2096:   public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
2097: 
2098:   /**
2099:    * Min value for code point.
2100:    *
2101:    * @since 1.5
2102:    */
2103:   public static final int MIN_CODE_POINT = 0; 
2104:  
2105:  
2106:   /**
2107:    * Max value for code point.
2108:    *
2109:    * @since 1.5
2110:    */
2111:   public static final int MAX_CODE_POINT = 0x010ffff;
2112: 
2113: 
2114:   /**
2115:    * Minimum high surrogate code in UTF-16 encoding.
2116:    *
2117:    * @since 1.5
2118:    */
2119:   public static final char MIN_HIGH_SURROGATE = '\ud800';
2120: 
2121:   /**
2122:    * Maximum high surrogate code in UTF-16 encoding.
2123:    *
2124:    * @since 1.5
2125:    */
2126:   public static final char MAX_HIGH_SURROGATE = '\udbff';
2127:  
2128:   /**
2129:    * Minimum low surrogate code in UTF-16 encoding.
2130:    *
2131:    * @since 1.5
2132:    */
2133:   public static final char MIN_LOW_SURROGATE = '\udc00';
2134: 
2135:   /**
2136:    * Maximum low surrogate code in UTF-16 encoding.
2137:    *
2138:    * @since 1.5
2139:    */
2140:   public static final char MAX_LOW_SURROGATE = '\udfff';
2141: 
2142:   /**
2143:    * Minimum surrogate code in UTF-16 encoding.
2144:    *
2145:    * @since 1.5
2146:    */
2147:   public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
2148: 
2149:   /**
2150:    * Maximum low surrogate code in UTF-16 encoding.
2151:    *
2152:    * @since 1.5
2153:    */
2154:   public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
2155: 
2156:   /**
2157:    * Grabs an attribute offset from the Unicode attribute database. The lower
2158:    * 5 bits are the character type, the next 2 bits are flags, and the top
2159:    * 9 bits are the offset into the attribute tables. Note that the top 9
2160:    * bits are meaningless in this context; they are useful only in the native
2161:    * code.
2162:    *
2163:    * @param ch the character to look up
2164:    * @return the character's attribute offset and type
2165:    * @see #TYPE_MASK
2166:    * @see #NO_BREAK_MASK
2167:    * @see #MIRROR_MASK
2168:    */
2169:   private static native char readChar(char ch);
2170: 
2171:   /**
2172:    * Grabs an attribute offset from the Unicode attribute database. The lower
2173:    * 5 bits are the character type, the next 2 bits are flags, and the top
2174:    * 9 bits are the offset into the attribute tables. Note that the top 9
2175:    * bits are meaningless in this context; they are useful only in the native
2176:    * code.
2177:    *
2178:    * @param codePoint the character to look up
2179:    * @return the character's attribute offset and type
2180:    * @see #TYPE_MASK
2181:    * @see #NO_BREAK_MASK
2182:    * @see #MIRROR_MASK
2183:    */
2184:   private static native char readCodePoint(int codePoint);
2185: 
2186:   /**
2187:    * Wraps up a character.
2188:    *
2189:    * @param value the character to wrap
2190:    */
2191:   public Character(char value)
2192:   {
2193:     this.value = value;
2194:   }
2195: 
2196:   /**
2197:    * Returns the character which has been wrapped by this class.
2198:    *
2199:    * @return the character wrapped
2200:    */
2201:   public char charValue()
2202:   {
2203:     return value;
2204:   }
2205: 
2206:   /**
2207:    * Returns the numerical value (unsigned) of the wrapped character.
2208:    * Range of returned values: 0x0000-0xFFFF.
2209:    *
2210:    * @return the value of the wrapped character
2211:    */
2212:   public int hashCode()
2213:   {
2214:     return value;
2215:   }
2216: 
2217:   /**
2218:    * Determines if an object is equal to this object. This is only true for
2219:    * another Character object wrapping the same value.
2220:    *
2221:    * @param o object to compare
2222:    * @return true if o is a Character with the same value
2223:    */
2224:   public boolean equals(Object o)
2225:   {
2226:     return o instanceof Character && value == ((Character) o).value;
2227:   }
2228: 
2229:   /**
2230:    * Converts the wrapped character into a String.
2231:    *
2232:    * @return a String containing one character -- the wrapped character
2233:    *         of this instance
2234:    */
2235:   public String toString()
2236:   {
2237:     // This assumes that String.valueOf(char) can create a single-character
2238:     // String more efficiently than through the public API.
2239:     return String.valueOf(value);
2240:   }
2241: 
2242:   /**
2243:    * Returns a String of length 1 representing the specified character.
2244:    *
2245:    * @param ch the character to convert
2246:    * @return a String containing the character
2247:    * @since 1.4
2248:    */
2249:   public static String toString(char ch)
2250:   {
2251:     // This assumes that String.valueOf(char) can create a single-character
2252:     // String more efficiently than through the public API.
2253:     return String.valueOf(ch);
2254:   }
2255: 
2256:   /**
2257:    * Determines if a character is a Unicode lowercase letter. For example,
2258:    * <code>'a'</code> is lowercase.
2259:    * <br>
2260:    * lowercase = [Ll]
2261:    *
2262:    * @param ch character to test
2263:    * @return true if ch is a Unicode lowercase letter, else false
2264:    * @see #isUpperCase(char)
2265:    * @see #isTitleCase(char)
2266:    * @see #toLowerCase(char)
2267:    * @see #getType(char)
2268:    */
2269:   public static boolean isLowerCase(char ch)
2270:   {
2271:     return getType(ch) == LOWERCASE_LETTER;
2272:   }
2273: 
2274:   /**
2275:    * Determines if a character is a Unicode lowercase letter. For example,
2276:    * <code>'a'</code> is lowercase.  Unlike isLowerCase(char), this method
2277:    * supports supplementary Unicode code points.
2278:    * <br>
2279:    * lowercase = [Ll]
2280:    *
2281:    * @param codePoint character to test
2282:    * @return true if codePoint is a Unicode lowercase letter, else false
2283:    * @see #isUpperCase(int)
2284:    * @see #isTitleCase(int)
2285:    * @see #toLowerCase(int)
2286:    * @see #getType(int)
2287:    * @since 1.5
2288:    */
2289:   public static boolean isLowerCase(int codePoint)
2290:   {
2291:     return getType(codePoint) == LOWERCASE_LETTER;
2292:   }
2293: 
2294:   /**
2295:    * Determines if a character is a Unicode uppercase letter. For example,
2296:    * <code>'A'</code> is uppercase.
2297:    * <br>
2298:    * uppercase = [Lu]
2299:    *
2300:    * @param ch character to test
2301:    * @return true if ch is a Unicode uppercase letter, else false
2302:    * @see #isLowerCase(char)
2303:    * @see #isTitleCase(char)
2304:    * @see #toUpperCase(char)
2305:    * @see #getType(char)
2306:    */
2307:   public static boolean isUpperCase(char ch)
2308:   {
2309:     return getType(ch) == UPPERCASE_LETTER;
2310:   }
2311: 
2312:   /**
2313:    * Determines if a character is a Unicode uppercase letter. For example,
2314:    * <code>'A'</code> is uppercase.  Unlike isUpperCase(char), this method
2315:    * supports supplementary Unicode code points.
2316:    * <br>
2317:    * uppercase = [Lu]
2318:    *
2319:    * @param codePoint character to test
2320:    * @return true if codePoint is a Unicode uppercase letter, else false
2321:    * @see #isLowerCase(int)
2322:    * @see #isTitleCase(int)
2323:    * @see #toUpperCase(int)
2324:    * @see #getType(int)
2325:    * @since 1.5
2326:    */
2327:   public static boolean isUpperCase(int codePoint)
2328:   {
2329:     return getType(codePoint) == UPPERCASE_LETTER;
2330:   }
2331: 
2332:   /**
2333:    * Determines if a character is a Unicode titlecase letter. For example,
2334:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2335:    * <br>
2336:    * titlecase = [Lt]
2337:    *
2338:    * @param ch character to test
2339:    * @return true if ch is a Unicode titlecase letter, else false
2340:    * @see #isLowerCase(char)
2341:    * @see #isUpperCase(char)
2342:    * @see #toTitleCase(char)
2343:    * @see #getType(char)
2344:    */
2345:   public static boolean isTitleCase(char ch)
2346:   {
2347:     return getType(ch) == TITLECASE_LETTER;
2348:   }
2349: 
2350:   /**
2351:    * Determines if a character is a Unicode titlecase letter. For example,
2352:    * the character "Lj" (Latin capital L with small letter j) is titlecase.
2353:    * Unlike isTitleCase(char), this method supports supplementary Unicode
2354:    * code points.
2355:    * <br>
2356:    * titlecase = [Lt]
2357:    *
2358:    * @param codePoint character to test
2359:    * @return true if codePoint is a Unicode titlecase letter, else false
2360:    * @see #isLowerCase(int)
2361:    * @see #isUpperCase(int)
2362:    * @see #toTitleCase(int)
2363:    * @see #getType(int)
2364:    * @since 1.5
2365:    */
2366:   public static boolean isTitleCase(int codePoint)
2367:   {
2368:     return getType(codePoint) == TITLECASE_LETTER;
2369:   }
2370: 
2371:   /**
2372:    * Determines if a character is a Unicode decimal digit. For example,
2373:    * <code>'0'</code> is a digit.
2374:    * <br>
2375:    * Unicode decimal digit = [Nd]
2376:    *
2377:    * @param ch character to test
2378:    * @return true if ch is a Unicode decimal digit, else false
2379:    * @see #digit(char, int)
2380:    * @see #forDigit(int, int)
2381:    * @see #getType(char)
2382:    */
2383:   public static boolean isDigit(char ch)
2384:   {
2385:     return getType(ch) == DECIMAL_DIGIT_NUMBER;
2386:   }
2387: 
2388:   /**
2389:    * Determines if a character is a Unicode decimal digit. For example,
2390:    * <code>'0'</code> is a digit.  Unlike isDigit(char), this method
2391:    * supports supplementary Unicode code points.
2392:    * <br>
2393:    * Unicode decimal digit = [Nd]
2394:    *
2395:    * @param codePoint character to test
2396:    * @return true if ccodePoint is a Unicode decimal digit, else false
2397:    * @see #digit(int, int)
2398:    * @see #forDigit(int, int)
2399:    * @see #getType(int)
2400:    * @since 1.5
2401:    */
2402:   public static boolean isDigit(int codePoint)
2403:   {
2404:     return getType(codePoint) == DECIMAL_DIGIT_NUMBER;
2405:   }
2406: 
2407:   /**
2408:    * Determines if a character is part of the Unicode Standard. This is an
2409:    * evolving standard, but covers every character in the data file.
2410:    * <br>
2411:    * defined = not [Cn]
2412:    *
2413:    * @param ch character to test
2414:    * @return true if ch is a Unicode character, else false
2415:    * @see #isDigit(char)
2416:    * @see #isLetter(char)
2417:    * @see #isLetterOrDigit(char)
2418:    * @see #isLowerCase(char)
2419:    * @see #isTitleCase(char)
2420:    * @see #isUpperCase(char)
2421:    */
2422:   public static boolean isDefined(char ch)
2423:   {
2424:     return getType(ch) != UNASSIGNED;
2425:   }
2426: 
2427:   /**
2428:    * Determines if a character is part of the Unicode Standard. This is an
2429:    * evolving standard, but covers every character in the data file.  Unlike
2430:    * isDefined(char), this method supports supplementary Unicode code points.
2431:    * <br>
2432:    * defined = not [Cn]
2433:    *
2434:    * @param codePoint character to test
2435:    * @return true if codePoint is a Unicode character, else false
2436:    * @see #isDigit(int)
2437:    * @see #isLetter(int)
2438:    * @see #isLetterOrDigit(int)
2439:    * @see #isLowerCase(int)
2440:    * @see #isTitleCase(int)
2441:    * @see #isUpperCase(int)
2442:    * @since 1.5
2443:    */
2444:   public static boolean isDefined(int codePoint)
2445:   {
2446:     return getType(codePoint) != UNASSIGNED;
2447:   }
2448: 
2449:   /**
2450:    * Determines if a character is a Unicode letter. Not all letters have case,
2451:    * so this may return true when isLowerCase and isUpperCase return false.
2452:    * <br>
2453:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2454:    *
2455:    * @param ch character to test
2456:    * @return true if ch is a Unicode letter, else false
2457:    * @see #isDigit(char)
2458:    * @see #isJavaIdentifierStart(char)
2459:    * @see #isJavaLetter(char)
2460:    * @see #isJavaLetterOrDigit(char)
2461:    * @see #isLetterOrDigit(char)
2462:    * @see #isLowerCase(char)
2463:    * @see #isTitleCase(char)
2464:    * @see #isUnicodeIdentifierStart(char)
2465:    * @see #isUpperCase(char)
2466:    */
2467:   public static boolean isLetter(char ch)
2468:   {
2469:     return ((1 << getType(ch))
2470:             & ((1 << UPPERCASE_LETTER)
2471:                | (1 << LOWERCASE_LETTER)
2472:                | (1 << TITLECASE_LETTER)
2473:                | (1 << MODIFIER_LETTER)
2474:                | (1 << OTHER_LETTER))) != 0;
2475:   }
2476: 
2477:   /**
2478:    * Determines if a character is a Unicode letter. Not all letters have case,
2479:    * so this may return true when isLowerCase and isUpperCase return false.
2480:    * Unlike isLetter(char), this method supports supplementary Unicode code
2481:    * points.
2482:    * <br>
2483:    * letter = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]
2484:    *
2485:    * @param codePoint character to test
2486:    * @return true if codePoint is a Unicode letter, else false
2487:    * @see #isDigit(int)
2488:    * @see #isJavaIdentifierStart(int)
2489:    * @see #isJavaLetter(int)
2490:    * @see #isJavaLetterOrDigit(int)
2491:    * @see #isLetterOrDigit(int)
2492:    * @see #isLowerCase(int)
2493:    * @see #isTitleCase(int)
2494:    * @see #isUnicodeIdentifierStart(int)
2495:    * @see #isUpperCase(int)
2496:    * @since 1.5
2497:    */
2498:   public static boolean isLetter(int codePoint)
2499:   {
2500:     return ((1 << getType(codePoint))
2501:             & ((1 << UPPERCASE_LETTER)
2502:                | (1 << LOWERCASE_LETTER)
2503:                | (1 << TITLECASE_LETTER)
2504:                | (1 << MODIFIER_LETTER)
2505:                | (1 << OTHER_LETTER))) != 0;
2506:   }
2507: 
2508:   /**
2509:    * Determines if a character is a Unicode letter or a Unicode digit. This
2510:    * is the combination of isLetter and isDigit.
2511:    * <br>
2512:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
2513:    *
2514:    * @param ch character to test
2515:    * @return true if ch is a Unicode letter or a Unicode digit, else false
2516:    * @see #isDigit(char)
2517:    * @see #isJavaIdentifierPart(char)
2518:    * @see #isJavaLetter(char)
2519:    * @see #isJavaLetterOrDigit(char)
2520:    * @see #isLetter(char)
2521:    * @see #isUnicodeIdentifierPart(char)
2522:    */
2523:   public static boolean isLetterOrDigit(char ch)
2524:   {
2525:     return ((1 << getType(ch))
2526:             & ((1 << UPPERCASE_LETTER)
2527:                | (1 << LOWERCASE_LETTER)
2528:                | (1 << TITLECASE_LETTER)
2529:                | (1 << MODIFIER_LETTER)
2530:                | (1 << OTHER_LETTER)
2531:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0;
2532:   }
2533: 
2534:   /**
2535:    * Determines if a character is a Unicode letter or a Unicode digit. This
2536:    * is the combination of isLetter and isDigit.  Unlike isLetterOrDigit(char),
2537:    * this method supports supplementary Unicode code points.
2538:    * <br>
2539:    * letter or digit = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nd]
2540:    *
2541:    * @param codePoint character to test
2542:    * @return true if codePoint is a Unicode letter or a Unicode digit, else false
2543:    * @see #isDigit(int)
2544:    * @see #isJavaIdentifierPart(int)
2545:    * @see #isJavaLetter(int)
2546:    * @see #isJavaLetterOrDigit(int)
2547:    * @see #isLetter(int)
2548:    * @see #isUnicodeIdentifierPart(int)
2549:    * @since 1.5
2550:    */
2551:   public static boolean isLetterOrDigit(int codePoint)
2552:   {
2553:     return ((1 << getType(codePoint)
2554:             & ((1 << UPPERCASE_LETTER)
2555:                | (1 << LOWERCASE_LETTER)
2556:                | (1 << TITLECASE_LETTER)
2557:                | (1 << MODIFIER_LETTER)
2558:                | (1 << OTHER_LETTER)
2559:                | (1 << DECIMAL_DIGIT_NUMBER))) != 0);
2560:   }
2561: 
2562:   /**
2563:    * Determines if a character can start a Java identifier. This is the
2564:    * combination of isLetter, any character where getType returns
2565:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2566:    * (like '_').
2567:    *
2568:    * @param ch character to test
2569:    * @return true if ch can start a Java identifier, else false
2570:    * @deprecated Replaced by {@link #isJavaIdentifierStart(char)}
2571:    * @see #isJavaLetterOrDigit(char)
2572:    * @see #isJavaIdentifierStart(char)
2573:    * @see #isJavaIdentifierPart(char)
2574:    * @see #isLetter(char)
2575:    * @see #isLetterOrDigit(char)
2576:    * @see #isUnicodeIdentifierStart(char)
2577:    */
2578:   public static boolean isJavaLetter(char ch)
2579:   {
2580:     return isJavaIdentifierStart(ch);
2581:   }
2582: 
2583:   /**
2584:    * Determines if a character can start a Java identifier. This is the
2585:    * combination of isLetter, any character where getType returns
2586:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2587:    * (like '_'). Unlike isJavaIdentifierStart(char), this method supports
2588:    * supplementary Unicode code points.
2589:    * <br>
2590:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
2591:    *
2592:    * @param codePoint character to test
2593:    * @return true if codePoint can start a Java identifier, else false
2594:    * @see #isJavaIdentifierPart(int)
2595:    * @see #isLetter(int)
2596:    * @see #isUnicodeIdentifierStart(int)
2597:    * @since 1.5
2598:    */
2599:   public static boolean isJavaIdentifierStart(int codePoint)
2600:   {
2601:     return ((1 << getType(codePoint))
2602:             & ((1 << UPPERCASE_LETTER)
2603:                | (1 << LOWERCASE_LETTER)
2604:                | (1 << TITLECASE_LETTER)
2605:                | (1 << MODIFIER_LETTER)
2606:                | (1 << OTHER_LETTER)
2607:                | (1 << LETTER_NUMBER)
2608:                | (1 << CURRENCY_SYMBOL)
2609:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
2610:   }
2611: 
2612:   /**
2613:    * Determines if a character can follow the first letter in
2614:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2615:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2616:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2617:    * or isIdentifierIgnorable.
2618:    *
2619:    * @param ch character to test
2620:    * @return true if ch can follow the first letter in a Java identifier
2621:    * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
2622:    * @see #isJavaLetter(char)
2623:    * @see #isJavaIdentifierStart(char)
2624:    * @see #isJavaIdentifierPart(char)
2625:    * @see #isLetter(char)
2626:    * @see #isLetterOrDigit(char)
2627:    * @see #isUnicodeIdentifierPart(char)
2628:    * @see #isIdentifierIgnorable(char)
2629:    */
2630:   public static boolean isJavaLetterOrDigit(char ch)
2631:   {
2632:     return isJavaIdentifierPart(ch);
2633:   }
2634: 
2635:   /**
2636:    * Determines if a character can start a Java identifier. This is the
2637:    * combination of isLetter, any character where getType returns
2638:    * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
2639:    * (like '_').
2640:    * <br>
2641:    * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
2642:    *
2643:    * @param ch character to test
2644:    * @return true if ch can start a Java identifier, else false
2645:    * @see #isJavaIdentifierPart(char)
2646:    * @see #isLetter(char)
2647:    * @see #isUnicodeIdentifierStart(char)
2648:    * @since 1.1
2649:    */
2650:   public static boolean isJavaIdentifierStart(char ch)
2651:   {
2652:     return ((1 << getType(ch))
2653:             & ((1 << UPPERCASE_LETTER)
2654:                | (1 << LOWERCASE_LETTER)
2655:                | (1 << TITLECASE_LETTER)
2656:                | (1 << MODIFIER_LETTER)
2657:                | (1 << OTHER_LETTER)
2658:                | (1 << LETTER_NUMBER)
2659:                | (1 << CURRENCY_SYMBOL)
2660:                | (1 << CONNECTOR_PUNCTUATION))) != 0;
2661:   }
2662: 
2663:   /**
2664:    * Determines if a character can follow the first letter in
2665:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2666:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2667:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2668:    * or isIdentifierIgnorable.
2669:    * <br>
2670:    * Java identifier extender =
2671:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
2672:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2673:    *
2674:    * @param ch character to test
2675:    * @return true if ch can follow the first letter in a Java identifier
2676:    * @see #isIdentifierIgnorable(char)
2677:    * @see #isJavaIdentifierStart(char)
2678:    * @see #isLetterOrDigit(char)
2679:    * @see #isUnicodeIdentifierPart(char)
2680:    * @since 1.1
2681:    */
2682:   public static boolean isJavaIdentifierPart(char ch)
2683:   {
2684:     int category = getType(ch);
2685:     return ((1 << category)
2686:             & ((1 << UPPERCASE_LETTER)
2687:                | (1 << LOWERCASE_LETTER)
2688:                | (1 << TITLECASE_LETTER)
2689:                | (1 << MODIFIER_LETTER)
2690:                | (1 << OTHER_LETTER)
2691:                | (1 << NON_SPACING_MARK)
2692:                | (1 << COMBINING_SPACING_MARK)
2693:                | (1 << DECIMAL_DIGIT_NUMBER)
2694:                | (1 << LETTER_NUMBER)
2695:                | (1 << CURRENCY_SYMBOL)
2696:                | (1 << CONNECTOR_PUNCTUATION)
2697:                | (1 << FORMAT))) != 0
2698:       || (category == CONTROL && isIdentifierIgnorable(ch));
2699:   }
2700: 
2701:   /**
2702:    * Determines if a character can follow the first letter in
2703:    * a Java identifier.  This is the combination of isJavaLetter (isLetter,
2704:    * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
2705:    * numeric letter (like Roman numerals), combining marks, non-spacing marks,
2706:    * or isIdentifierIgnorable. Unlike isJavaIdentifierPart(char), this method
2707:    * supports supplementary Unicode code points.
2708:    * <br>
2709:    * Java identifier extender =
2710:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
2711:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2712:    *
2713:    * @param codePoint character to test
2714:    * @return true if codePoint can follow the first letter in a Java identifier
2715:    * @see #isIdentifierIgnorable(int)
2716:    * @see #isJavaIdentifierStart(int)
2717:    * @see #isLetterOrDigit(int)
2718:    * @see #isUnicodeIdentifierPart(int)
2719:    * @since 1.5
2720:    */
2721:   public static boolean isJavaIdentifierPart(int codePoint)
2722:   {
2723:     int category = getType(codePoint);
2724:     return ((1 << category)
2725:             & ((1 << UPPERCASE_LETTER)
2726:                | (1 << LOWERCASE_LETTER)
2727:                | (1 << TITLECASE_LETTER)
2728:                | (1 << MODIFIER_LETTER)
2729:                | (1 << OTHER_LETTER)
2730:                | (1 << NON_SPACING_MARK)
2731:                | (1 << COMBINING_SPACING_MARK)
2732:                | (1 << DECIMAL_DIGIT_NUMBER)
2733:                | (1 << LETTER_NUMBER)
2734:                | (1 << CURRENCY_SYMBOL)
2735:                | (1 << CONNECTOR_PUNCTUATION)
2736:                | (1 << FORMAT))) != 0
2737:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
2738:   }
2739: 
2740:   /**
2741:    * Determines if a character can start a Unicode identifier.  Only
2742:    * letters can start a Unicode identifier, but this includes characters
2743:    * in LETTER_NUMBER.
2744:    * <br>
2745:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
2746:    *
2747:    * @param ch character to test
2748:    * @return true if ch can start a Unicode identifier, else false
2749:    * @see #isJavaIdentifierStart(char)
2750:    * @see #isLetter(char)
2751:    * @see #isUnicodeIdentifierPart(char)
2752:    * @since 1.1
2753:    */
2754:   public static boolean isUnicodeIdentifierStart(char ch)
2755:   {
2756:     return ((1 << getType(ch))
2757:             & ((1 << UPPERCASE_LETTER)
2758:                | (1 << LOWERCASE_LETTER)
2759:                | (1 << TITLECASE_LETTER)
2760:                | (1 << MODIFIER_LETTER)
2761:                | (1 << OTHER_LETTER)
2762:                | (1 << LETTER_NUMBER))) != 0;
2763:   }
2764: 
2765:   /**
2766:    * Determines if a character can start a Unicode identifier.  Only
2767:    * letters can start a Unicode identifier, but this includes characters
2768:    * in LETTER_NUMBER.  Unlike isUnicodeIdentifierStart(char), this method
2769:    * supports supplementary Unicode code points.
2770:    * <br>
2771:    * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
2772:    *
2773:    * @param codePoint character to test
2774:    * @return true if codePoint can start a Unicode identifier, else false
2775:    * @see #isJavaIdentifierStart(int)
2776:    * @see #isLetter(int)
2777:    * @see #isUnicodeIdentifierPart(int)
2778:    * @since 1.5
2779:    */
2780:   public static boolean isUnicodeIdentifierStart(int codePoint)
2781:   {
2782:     return ((1 << getType(codePoint))
2783:             & ((1 << UPPERCASE_LETTER)
2784:                | (1 << LOWERCASE_LETTER)
2785:                | (1 << TITLECASE_LETTER)
2786:                | (1 << MODIFIER_LETTER)
2787:                | (1 << OTHER_LETTER)
2788:                | (1 << LETTER_NUMBER))) != 0;
2789:   }
2790: 
2791:   /**
2792:    * Determines if a character can follow the first letter in
2793:    * a Unicode identifier. This includes letters, connecting punctuation,
2794:    * digits, numeric letters, combining marks, non-spacing marks, and
2795:    * isIdentifierIgnorable.
2796:    * <br>
2797:    * Unicode identifier extender =
2798:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
2799:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2800:    *
2801:    * @param ch character to test
2802:    * @return true if ch can follow the first letter in a Unicode identifier
2803:    * @see #isIdentifierIgnorable(char)
2804:    * @see #isJavaIdentifierPart(char)
2805:    * @see #isLetterOrDigit(char)
2806:    * @see #isUnicodeIdentifierStart(char)
2807:    * @since 1.1
2808:    */
2809:   public static boolean isUnicodeIdentifierPart(char ch)
2810:   {
2811:     int category = getType(ch);
2812:     return ((1 << category)
2813:             & ((1 << UPPERCASE_LETTER)
2814:                | (1 << LOWERCASE_LETTER)
2815:                | (1 << TITLECASE_LETTER)
2816:                | (1 << MODIFIER_LETTER)
2817:                | (1 << OTHER_LETTER)
2818:                | (1 << NON_SPACING_MARK)
2819:                | (1 << COMBINING_SPACING_MARK)
2820:                | (1 << DECIMAL_DIGIT_NUMBER)
2821:                | (1 << LETTER_NUMBER)
2822:                | (1 << CONNECTOR_PUNCTUATION)
2823:                | (1 << FORMAT))) != 0
2824:       || (category == CONTROL && isIdentifierIgnorable(ch));
2825:   }
2826: 
2827:   /**
2828:    * Determines if a character can follow the first letter in
2829:    * a Unicode identifier. This includes letters, connecting punctuation,
2830:    * digits, numeric letters, combining marks, non-spacing marks, and
2831:    * isIdentifierIgnorable.  Unlike isUnicodeIdentifierPart(char), this method
2832:    * supports supplementary Unicode code points.
2833:    * <br>
2834:    * Unicode identifier extender =
2835:    *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
2836:    *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
2837:    *
2838:    * @param codePoint character to test
2839:    * @return true if codePoint can follow the first letter in a Unicode 
2840:    *         identifier
2841:    * @see #isIdentifierIgnorable(int)
2842:    * @see #isJavaIdentifierPart(int)
2843:    * @see #isLetterOrDigit(int)
2844:    * @see #isUnicodeIdentifierStart(int)
2845:    * @since 1.5
2846:    */
2847:   public static boolean isUnicodeIdentifierPart(int codePoint)
2848:   {
2849:     int category = getType(codePoint);
2850:     return ((1 << category)
2851:             & ((1 << UPPERCASE_LETTER)
2852:                | (1 << LOWERCASE_LETTER)
2853:                | (1 << TITLECASE_LETTER)
2854:                | (1 << MODIFIER_LETTER)
2855:                | (1 << OTHER_LETTER)
2856:                | (1 << NON_SPACING_MARK)
2857:                | (1 << COMBINING_SPACING_MARK)
2858:                | (1 << DECIMAL_DIGIT_NUMBER)
2859:                | (1 << LETTER_NUMBER)
2860:                | (1 << CONNECTOR_PUNCTUATION)
2861:                | (1 << FORMAT))) != 0
2862:       || (category == CONTROL && isIdentifierIgnorable(codePoint));
2863:   }
2864: 
2865:   /**
2866:    * Determines if a character is ignorable in a Unicode identifier. This
2867:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
2868:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
2869:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
2870:    * <code>'\u009F'</code>), and FORMAT characters.
2871:    * <br>
2872:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
2873:    *    |U+007F-U+009F
2874:    *
2875:    * @param ch character to test
2876:    * @return true if ch is ignorable in a Unicode or Java identifier
2877:    * @see #isJavaIdentifierPart(char)
2878:    * @see #isUnicodeIdentifierPart(char)
2879:    * @since 1.1
2880:    */
2881:   public static boolean isIdentifierIgnorable(char ch)
2882:   {
2883:     return (ch <= '\u009F' && (ch < '\t' || ch >= '\u007F'
2884:                                || (ch <= '\u001B' && ch >= '\u000E')))
2885:       || getType(ch) == FORMAT;
2886:   }
2887: 
2888:   /**
2889:    * Determines if a character is ignorable in a Unicode identifier. This
2890:    * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
2891:    * through <code>'\u0008'</code>, <code>'\u000E'</code> through
2892:    * <code>'\u001B'</code>, and <code>'\u007F'</code> through
2893:    * <code>'\u009F'</code>), and FORMAT characters.  Unlike 
2894:    * isIdentifierIgnorable(char), this method supports supplementary Unicode
2895:    * code points.
2896:    * <br>
2897:    * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
2898:    *    |U+007F-U+009F
2899:    *
2900:    * @param codePoint character to test
2901:    * @return true if codePoint is ignorable in a Unicode or Java identifier
2902:    * @see #isJavaIdentifierPart(int)
2903:    * @see #isUnicodeIdentifierPart(int)
2904:    * @since 1.5
2905:    */
2906:   public static boolean isIdentifierIgnorable(int codePoint)
2907:   {
2908:     return ((codePoint >= 0 && codePoint <= 0x0008)
2909:         || (codePoint >= 0x000E && codePoint <= 0x001B)
2910:         || (codePoint >= 0x007F && codePoint <= 0x009F)
2911:             || getType(codePoint) == FORMAT);
2912:   }
2913: 
2914:   /**
2915:    * Converts a Unicode character into its lowercase equivalent mapping.
2916:    * If a mapping does not exist, then the character passed is returned.
2917:    * Note that isLowerCase(toLowerCase(ch)) does not always return true.
2918:    *
2919:    * @param ch character to convert to lowercase
2920:    * @return lowercase mapping of ch, or ch if lowercase mapping does
2921:    *         not exist
2922:    * @see #isLowerCase(char)
2923:    * @see #isUpperCase(char)
2924:    * @see #toTitleCase(char)
2925:    * @see #toUpperCase(char)
2926:    */
2927:   public static native char toLowerCase(char ch);
2928: 
2929:   /**
2930:    * Converts a Unicode character into its lowercase equivalent mapping.
2931:    * If a mapping does not exist, then the character passed is returned.
2932:    * Note that isLowerCase(toLowerCase(codePoint)) does not always return true.
2933:    * Unlike toLowerCase(char), this method supports supplementary Unicode
2934:    * code points.
2935:    *
2936:    * @param codePoint character to convert to lowercase
2937:    * @return lowercase mapping of codePoint, or codePoint if lowercase 
2938:    *         mapping does not exist
2939:    * @see #isLowerCase(int)
2940:    * @see #isUpperCase(int)
2941:    * @see #toTitleCase(int)
2942:    * @see #toUpperCase(int)
2943:    * @since 1.5
2944:    */
2945:   public static native int toLowerCase(int codePoint);
2946: 
2947:   /**
2948:    * Converts a Unicode character into its uppercase equivalent mapping.
2949:    * If a mapping does not exist, then the character passed is returned.
2950:    * Note that isUpperCase(toUpperCase(ch)) does not always return true.
2951:    *
2952:    * @param ch character to convert to uppercase
2953:    * @return uppercase mapping of ch, or ch if uppercase mapping does
2954:    *         not exist
2955:    * @see #isLowerCase(char)
2956:    * @see #isUpperCase(char)
2957:    * @see #toLowerCase(char)
2958:    * @see #toTitleCase(char)
2959:    */
2960:   public static native char toUpperCase(char ch);
2961: 
2962:   /**
2963:    * Converts a Unicode character into its uppercase equivalent mapping.
2964:    * If a mapping does not exist, then the character passed is returned.
2965:    * Note that isUpperCase(toUpperCase(codePoint)) does not always return true.
2966:    * Unlike toUpperCase(char), this method supports supplementary 
2967:    * Unicode code points.
2968:    *
2969:    * @param codePoint character to convert to uppercase
2970:    * @return uppercase mapping of codePoint, or codePoint if uppercase 
2971:    *         mapping does not exist
2972:    * @see #isLowerCase(int)
2973:    * @see #isUpperCase(int)
2974:    * @see #toLowerCase(int)
2975:    * @see #toTitleCase(int)
2976:    * @since 1.5
2977:    */
2978:   public static native int toUpperCase(int codePoint);
2979: 
2980:   /**
2981:    * Converts a Unicode character into its titlecase equivalent mapping.
2982:    * If a mapping does not exist, then the character passed is returned.
2983:    * Note that isTitleCase(toTitleCase(ch)) does not always return true.
2984:    *
2985:    * @param ch character to convert to titlecase
2986:    * @return titlecase mapping of ch, or ch if titlecase mapping does
2987:    *         not exist
2988:    * @see #isTitleCase(char)
2989:    * @see #toLowerCase(char)
2990:    * @see #toUpperCase(char)
2991:    */
2992:   public static native char toTitleCase(char ch);
2993: 
2994:   /**
2995:    * Converts a Unicode character into its titlecase equivalent mapping.
2996:    * If a mapping does not exist, then the character passed is returned.
2997:    * Note that isTitleCase(toTitleCase(codePoint)) does not always return true.
2998:    * Unlike toTitleCase(char), this method supports supplementary 
2999:    * Unicode code points.
3000:    * 
3001:    * @param codePoint character to convert to titlecase
3002:    * @return titlecase mapping of codePoint, or codePoint if titlecase 
3003:    *         mapping does not exist
3004:    * @see #isTitleCase(int)
3005:    * @see #toLowerCase(int)
3006:    * @see #toUpperCase(int)
3007:    * @since 1.5
3008:    */
3009:   public static native int toTitleCase(int codePoint);
3010: 
3011:   /**
3012:    * Converts a character into a digit of the specified radix. If the radix
3013:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
3014:    * exceeds the radix, or if ch is not a decimal digit or in the case
3015:    * insensitive set of 'a'-'z', the result is -1.
3016:    * <br>
3017:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3018:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3019:    *
3020:    * @param ch character to convert into a digit
3021:    * @param radix radix in which ch is a digit
3022:    * @return digit which ch represents in radix, or -1 not a valid digit
3023:    * @see #MIN_RADIX
3024:    * @see #MAX_RADIX
3025:    * @see #forDigit(int, int)
3026:    * @see #isDigit(char)
3027:    * @see #getNumericValue(char)
3028:    */
3029:   public static native int digit(char ch, int radix);
3030: 
3031:   /**
3032:    * Converts a character into a digit of the specified radix. If the radix
3033:    * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(int)
3034:    * exceeds the radix, or if codePoint is not a decimal digit or in the case
3035:    * insensitive set of 'a'-'z', the result is -1.  Unlike digit(char, int), 
3036:    * this method supports supplementary Unicode code points.
3037:    * <br>
3038:    * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
3039:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3040:    *
3041:    * @param codePoint character to convert into a digit
3042:    * @param radix radix in which codePoint is a digit
3043:    * @return digit which codePoint represents in radix, or -1 not a valid digit
3044:    * @see #MIN_RADIX
3045:    * @see #MAX_RADIX
3046:    * @see #forDigit(int, int)
3047:    * @see #isDigit(int)
3048:    * @see #getNumericValue(int)
3049:    * @since 1.5
3050:    */
3051:   public static native int digit(int codePoint, int radix);
3052: 
3053:   /**
3054:    * Returns the Unicode numeric value property of a character. For example,
3055:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3056:    *
3057:    * <p>This method also returns values for the letters A through Z, (not
3058:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3059:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3060:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3061:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3062:    * <code>'\uFF5A'</code> (full width variants).
3063:    *
3064:    * <p>If the character lacks a numeric value property, -1 is returned.
3065:    * If the character has a numeric value property which is not representable
3066:    * as a nonnegative integer, such as a fraction, -2 is returned.
3067:    *
3068:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3069:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3070:    *
3071:    * @param ch character from which the numeric value property will
3072:    *        be retrieved
3073:    * @return the numeric value property of ch, or -1 if it does not exist, or
3074:    *         -2 if it is not representable as a nonnegative integer
3075:    * @see #forDigit(int, int)
3076:    * @see #digit(char, int)
3077:    * @see #isDigit(char)
3078:    * @since 1.1
3079:    */
3080:   public static native int getNumericValue(char ch);
3081: 
3082:   /**
3083:    * Returns the Unicode numeric value property of a character. For example,
3084:    * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
3085:    *
3086:    * <p>This method also returns values for the letters A through Z, (not
3087:    * specified by Unicode), in these ranges: <code>'\u0041'</code>
3088:    * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
3089:    * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
3090:    * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
3091:    * <code>'\uFF5A'</code> (full width variants).
3092:    *
3093:    * <p>If the character lacks a numeric value property, -1 is returned.
3094:    * If the character has a numeric value property which is not representable
3095:    * as a nonnegative integer, such as a fraction, -2 is returned.
3096:    *
3097:    * Unlike getNumericValue(char), this method supports supplementary Unicode
3098:    * code points.
3099:    *
3100:    * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
3101:    *    |U+FF21-U+FF3A|U+FF41-U+FF5A
3102:    *
3103:    * @param codePoint character from which the numeric value property will
3104:    *        be retrieved
3105:    * @return the numeric value property of codePoint, or -1 if it does not 
3106:    *         exist, or -2 if it is not representable as a nonnegative integer
3107:    * @see #forDigit(int, int)
3108:    * @see #digit(int, int)
3109:    * @see #isDigit(int)
3110:    * @since 1.5
3111:    */
3112:   public static native int getNumericValue(int codePoint);
3113: 
3114:   /**
3115:    * Determines if a character is a ISO-LATIN-1 space. This is only the five
3116:    * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
3117:    * <code>'\r'</code>, and <code>' '</code>.
3118:    * <br>
3119:    * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
3120:    *
3121:    * @param ch character to test
3122:    * @return true if ch is a space, else false
3123:    * @deprecated Replaced by {@link #isWhitespace(char)}
3124:    * @see #isSpaceChar(char)
3125:    * @see #isWhitespace(char)
3126:    */
3127:   public static boolean isSpace(char ch)
3128:   {
3129:     // Performing the subtraction up front alleviates need to compare longs.
3130:     return ch-- <= ' ' && ((1 << ch)
3131:                            & ((1 << (' ' - 1))
3132:                               | (1 << ('\t' - 1))
3133:                               | (1 << ('\n' - 1))
3134:                               | (1 << ('\r' - 1))
3135:                               | (1 << ('\f' - 1)))) != 0;
3136:   }
3137: 
3138:   /**
3139:    * Determines if a character is a Unicode space character. This includes
3140:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
3141:    * <br>
3142:    * Unicode space = [Zs]|[Zp]|[Zl]
3143:    *
3144:    * @param ch character to test
3145:    * @return true if ch is a Unicode space, else false
3146:    * @see #isWhitespace(char)
3147:    * @since 1.1
3148:    */
3149:   public static boolean isSpaceChar(char ch)
3150:   {
3151:     return ((1 << getType(ch))
3152:             & ((1 << SPACE_SEPARATOR)
3153:                | (1 << LINE_SEPARATOR)
3154:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3155:   }
3156: 
3157:   /**
3158:    * Determines if a character is a Unicode space character. This includes
3159:    * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.  Unlike
3160:    * isSpaceChar(char), this method supports supplementary Unicode code points.
3161:    * <br>
3162:    * Unicode space = [Zs]|[Zp]|[Zl]
3163:    *
3164:    * @param codePoint character to test
3165:    * @return true if codePoint is a Unicode space, else false
3166:    * @see #isWhitespace(int)
3167:    * @since 1.5
3168:    */
3169:   public static boolean isSpaceChar(int codePoint)
3170:   {
3171:     return ((1 << getType(codePoint))
3172:             & ((1 << SPACE_SEPARATOR)
3173:                | (1 << LINE_SEPARATOR)
3174:                | (1 << PARAGRAPH_SEPARATOR))) != 0;
3175:   }
3176: 
3177:   /**
3178:    * Determines if a character is Java whitespace. This includes Unicode
3179:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3180:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3181:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3182:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3183:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3184:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3185:    * and <code>'\u001F'</code>.
3186:    * <br>
3187:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3188:    *
3189:    * @param ch character to test
3190:    * @return true if ch is Java whitespace, else false
3191:    * @see #isSpaceChar(char)
3192:    * @since 1.1
3193:    */
3194:   public static boolean isWhitespace(char ch)
3195:   {
3196:     int attr = readChar(ch);
3197:     return ((((1 << (attr & TYPE_MASK))
3198:               & ((1 << SPACE_SEPARATOR)
3199:                  | (1 << LINE_SEPARATOR)
3200:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3201:             && (attr & NO_BREAK_MASK) == 0)
3202:       || (ch <= '\u001F' && ((1 << ch)
3203:                              & ((1 << '\t')
3204:                                 | (1 << '\n')
3205:                                 | (1 << '\u000B')
3206:                                 | (1 << '\u000C')
3207:                                 | (1 << '\r')
3208:                                 | (1 << '\u001C')
3209:                                 | (1 << '\u001D')
3210:                                 | (1 << '\u001E')
3211:                                 | (1 << '\u001F'))) != 0);
3212:   }
3213: 
3214:   /**
3215:    * Determines if a character is Java whitespace. This includes Unicode
3216:    * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
3217:    * PARAGRAPH_SEPARATOR) except the non-breaking spaces
3218:    * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
3219:    * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
3220:    * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
3221:    * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
3222:    * and <code>'\u001F'</code>.  Unlike isWhitespace(char), this method
3223:    * supports supplementary Unicode code points.
3224:    * <br>
3225:    * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
3226:    *
3227:    * @param codePoint character to test
3228:    * @return true if codePoint is Java whitespace, else false
3229:    * @see #isSpaceChar(int)
3230:    * @since 1.5
3231:    */
3232:   public static boolean isWhitespace(int codePoint)
3233:   {
3234:     int plane = codePoint >>> 16;
3235:     if (plane > 2 && plane != 14)
3236:       return false;
3237:     int attr = readCodePoint(codePoint);
3238:     return ((((1 << (attr & TYPE_MASK))
3239:               & ((1 << SPACE_SEPARATOR)
3240:                  | (1 << LINE_SEPARATOR)
3241:                  | (1 << PARAGRAPH_SEPARATOR))) != 0)
3242:             && (attr & NO_BREAK_MASK) == 0)
3243:       || (codePoint <= '\u001F' && ((1 << codePoint)
3244:                              & ((1 << '\t')
3245:                                 | (1 << '\n')
3246:                                 | (1 << '\u000B')
3247:                                 | (1 << '\u000C')
3248:                                 | (1 << '\r')
3249:                                 | (1 << '\u001C')
3250:                                 | (1 << '\u001D')
3251:                                 | (1 << '\u001E')
3252:                                 | (1 << '\u001F'))) != 0);
3253:   }
3254: 
3255:   /**
3256:    * Determines if a character has the ISO Control property.
3257:    * <br>
3258:    * ISO Control = [Cc]
3259:    *
3260:    * @param ch character to test
3261:    * @return true if ch is an ISO Control character, else false
3262:    * @see #isSpaceChar(char)
3263:    * @see #isWhitespace(char)
3264:    * @since 1.1
3265:    */
3266:   public static boolean isISOControl(char ch)
3267:   {
3268:     return getType(ch) == CONTROL;
3269:   }
3270: 
3271:   /**
3272:    * Determines if a character has the ISO Control property.  Unlike
3273:    * isISOControl(char), this method supports supplementary unicode
3274:    * code points.
3275:    * <br>
3276:    * ISO Control = [Cc]
3277:    *
3278:    * @param codePoint character to test
3279:    * @return true if codePoint is an ISO Control character, else false
3280:    * @see #isSpaceChar(int)
3281:    * @see #isWhitespace(int)
3282:    * @since 1.5
3283:    */
3284:   public static boolean isISOControl(int codePoint)
3285:   {
3286:     return getType(codePoint) == CONTROL;
3287:   }
3288: 
3289:   /**
3290:    * Returns the Unicode general category property of a character.
3291:    *
3292:    * @param ch character from which the general category property will
3293:    *        be retrieved
3294:    * @return the character category property of ch as an integer
3295:    * @see #UNASSIGNED
3296:    * @see #UPPERCASE_LETTER
3297:    * @see #LOWERCASE_LETTER
3298:    * @see #TITLECASE_LETTER
3299:    * @see #MODIFIER_LETTER
3300:    * @see #OTHER_LETTER
3301:    * @see #NON_SPACING_MARK
3302:    * @see #ENCLOSING_MARK
3303:    * @see #COMBINING_SPACING_MARK
3304:    * @see #DECIMAL_DIGIT_NUMBER
3305:    * @see #LETTER_NUMBER
3306:    * @see #OTHER_NUMBER
3307:    * @see #SPACE_SEPARATOR
3308:    * @see #LINE_SEPARATOR
3309:    * @see #PARAGRAPH_SEPARATOR
3310:    * @see #CONTROL
3311:    * @see #FORMAT
3312:    * @see #PRIVATE_USE
3313:    * @see #SURROGATE
3314:    * @see #DASH_PUNCTUATION
3315:    * @see #START_PUNCTUATION
3316:    * @see #END_PUNCTUATION
3317:    * @see #CONNECTOR_PUNCTUATION
3318:    * @see #OTHER_PUNCTUATION
3319:    * @see #MATH_SYMBOL
3320:    * @see #CURRENCY_SYMBOL
3321:    * @see #MODIFIER_SYMBOL
3322:    * @see #INITIAL_QUOTE_PUNCTUATION
3323:    * @see #FINAL_QUOTE_PUNCTUATION
3324:    * @since 1.1
3325:    */
3326:   public static native int getType(char ch);
3327: 
3328:   /**
3329:    * Returns the Unicode general category property of a character.  Supports
3330:    * supplementary Unicode code points.
3331:    *
3332:    * @param codePoint character from which the general category property will
3333:    *        be retrieved
3334:    * @return the character category property of codePoint as an integer
3335:    * @see #UNASSIGNED
3336:    * @see #UPPERCASE_LETTER
3337:    * @see #LOWERCASE_LETTER
3338:    * @see #TITLECASE_LETTER
3339:    * @see #MODIFIER_LETTER
3340:    * @see #OTHER_LETTER
3341:    * @see #NON_SPACING_MARK
3342:    * @see #ENCLOSING_MARK
3343:    * @see #COMBINING_SPACING_MARK
3344:    * @see #DECIMAL_DIGIT_NUMBER
3345:    * @see #LETTER_NUMBER
3346:    * @see #OTHER_NUMBER
3347:    * @see #SPACE_SEPARATOR
3348:    * @see #LINE_SEPARATOR
3349:    * @see #PARAGRAPH_SEPARATOR
3350:    * @see #CONTROL
3351:    * @see #FORMAT
3352:    * @see #PRIVATE_USE
3353:    * @see #SURROGATE
3354:    * @see #DASH_PUNCTUATION
3355:    * @see #START_PUNCTUATION
3356:    * @see #END_PUNCTUATION
3357:    * @see #CONNECTOR_PUNCTUATION
3358:    * @see #OTHER_PUNCTUATION
3359:    * @see #MATH_SYMBOL
3360:    * @see #CURRENCY_SYMBOL
3361:    * @see #MODIFIER_SYMBOL
3362:    * @see #INITIAL_QUOTE_PUNCTUATION
3363:    * @see #FINAL_QUOTE_PUNCTUATION
3364:    * @since 1.5
3365:    */
3366:   public static native int getType(int codePoint);
3367: 
3368:   /**
3369:    * Converts a digit into a character which represents that digit
3370:    * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
3371:    * or the digit exceeds the radix, then the null character <code>'\0'</code>
3372:    * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
3373:    * <br>
3374:    * return value boundary = U+0030-U+0039|U+0061-U+007A
3375:    *
3376:    * @param digit digit to be converted into a character
3377:    * @param radix radix of digit
3378:    * @return character representing digit in radix, or '\0'
3379:    * @see #MIN_RADIX
3380:    * @see #MAX_RADIX
3381:    * @see #digit(char, int)
3382:    */
3383:   public static char forDigit(int digit, int radix)
3384:   {
3385:     if (radix < MIN_RADIX || radix > MAX_RADIX
3386:         || digit < 0 || digit >= radix)
3387:       return '\0';
3388:     return (char) (digit < 10 ? ('0' + digit) : ('a' - 10 + digit));
3389:   }
3390: 
3391:   /**
3392:    * Returns the Unicode directionality property of the character. This
3393:    * is used in the visual ordering of text.
3394:    *
3395:    * @param ch the character to look up
3396:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
3397:    * @see #DIRECTIONALITY_UNDEFINED
3398:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
3399:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
3400:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
3401:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
3402:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
3403:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
3404:    * @see #DIRECTIONALITY_ARABIC_NUMBER
3405:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
3406:    * @see #DIRECTIONALITY_NONSPACING_MARK
3407:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
3408:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
3409:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
3410:    * @see #DIRECTIONALITY_WHITESPACE
3411:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
3412:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
3413:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
3414:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
3415:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
3416:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
3417:    * @since 1.4
3418:    */
3419:   public static native byte getDirectionality(char ch);
3420: 
3421:   /**
3422:    * Returns the Unicode directionality property of the character. This
3423:    * is used in the visual ordering of text.  Unlike getDirectionality(char),
3424:    * this method supports supplementary Unicode code points.
3425:    *
3426:    * @param codePoint the character to look up
3427:    * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
3428:    * @see #DIRECTIONALITY_UNDEFINED
3429:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT
3430:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT
3431:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
3432:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER
3433:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
3434:    * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
3435:    * @see #DIRECTIONALITY_ARABIC_NUMBER
3436:    * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
3437:    * @see #DIRECTIONALITY_NONSPACING_MARK
3438:    * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
3439:    * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
3440:    * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
3441:    * @see #DIRECTIONALITY_WHITESPACE
3442:    * @see #DIRECTIONALITY_OTHER_NEUTRALS
3443:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
3444:    * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
3445:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
3446:    * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
3447:    * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
3448:    * @since 1.5
3449:    */
3450:   public static native byte getDirectionality(int codePoint);
3451: 
3452:   /**
3453:    * Determines whether the character is mirrored according to Unicode. For
3454:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
3455:    * left-to-right text, but ')' in right-to-left text.
3456:    *
3457:    * @param ch the character to look up
3458:    * @return true if the character is mirrored
3459:    * @since 1.4
3460:    */
3461:   public static boolean isMirrored(char ch)
3462:   {
3463:     return (readChar(ch) & MIRROR_MASK) != 0;
3464:   }
3465: 
3466:   /**
3467:    * Determines whether the character is mirrored according to Unicode. For
3468:    * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
3469:    * left-to-right text, but ')' in right-to-left text.  Unlike 
3470:    * isMirrored(char), this method supports supplementary Unicode code points.
3471:    *
3472:    * @param codePoint the character to look up
3473:    * @return true if the character is mirrored
3474:    * @since 1.5
3475:    */
3476:   public static boolean isMirrored(int codePoint)
3477:   {
3478:     int plane = codePoint >>> 16;
3479:     if (plane > 2 && plane != 14)
3480:       return false;
3481:     return (readCodePoint(codePoint) & MIRROR_MASK) != 0;
3482:   }
3483: 
3484:   /**
3485:    * Compares another Character to this Character, numerically.
3486:    *
3487:    * @param anotherCharacter Character to compare with this Character
3488:    * @return a negative integer if this Character is less than
3489:    *         anotherCharacter, zero if this Character is equal, and
3490:    *         a positive integer if this Character is greater
3491:    * @throws NullPointerException if anotherCharacter is null
3492:    * @since 1.2
3493:    */
3494:   public int compareTo(Character anotherCharacter)
3495:   {
3496:     return value - anotherCharacter.value;
3497:   }
3498: 
3499:   /**
3500:    * Compares an object to this Character.  Assuming the object is a
3501:    * Character object, this method performs the same comparison as
3502:    * compareTo(Character).
3503:    *
3504:    * @param o object to compare
3505:    * @return the comparison value
3506:    * @throws ClassCastException if o is not a Character object
3507:    * @throws NullPointerException if o is null
3508:    * @see #compareTo(Character)
3509:    * @since 1.2
3510:    */
3511:   public int compareTo(Object o)
3512:   {
3513:     return compareTo((Character) o);
3514:   }
3515: 
3516:   /**
3517:    * Returns an <code>Character</code> object wrapping the value.
3518:    * In contrast to the <code>Character</code> constructor, this method
3519:    * will cache some values.  It is used by boxing conversion.
3520:    *
3521:    * @param val the value to wrap
3522:    * @return the <code>Character</code>
3523:    * 
3524:    * @since 1.5
3525:    */
3526:   public static Character valueOf(char val)
3527:   {
3528:     if (val > MAX_CACHE)
3529:       return new Character(val);
3530:     synchronized (charCache)
3531:       {
3532:     if (charCache[val - MIN_VALUE] == null)
3533:       charCache[val - MIN_VALUE] = new Character(val);
3534:     return charCache[val - MIN_VALUE];
3535:       }
3536:   }
3537: 
3538:   /**
3539:    * Reverse the bytes in val.
3540:    * @since 1.5
3541:    */
3542:   public static char reverseBytes(char val)
3543:   {
3544:     return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
3545:   }
3546: 
3547:   /**
3548:    * Converts a unicode code point to a UTF-16 representation of that
3549:    * code point.
3550:    * 
3551:    * @param codePoint the unicode code point
3552:    *
3553:    * @return the UTF-16 representation of that code point
3554:    *
3555:    * @throws IllegalArgumentException if the code point is not a valid
3556:    *         unicode code point
3557:    *
3558:    * @since 1.5
3559:    */
3560:   public static char[] toChars(int codePoint)
3561:   {
3562:     char[] result = new char[charCount(codePoint)];
3563:     int ignore = toChars(codePoint, result, 0);
3564:     return result;
3565:   }
3566: 
3567:   /**
3568:    * Converts a unicode code point to its UTF-16 representation.
3569:    *
3570:    * @param codePoint the unicode code point
3571:    * @param dst the target char array
3572:    * @param dstIndex the start index for the target
3573:    *
3574:    * @return number of characters written to <code>dst</code>
3575:    *
3576:    * @throws IllegalArgumentException if <code>codePoint</code> is not a
3577:    *         valid unicode code point
3578:    * @throws NullPointerException if <code>dst</code> is <code>null</code>
3579:    * @throws IndexOutOfBoundsException if <code>dstIndex</code> is not valid
3580:    *         in <code>dst</code> or if the UTF-16 representation does not
3581:    *         fit into <code>dst</code>
3582:    *
3583:    * @since 1.5
3584:    */
3585:   public static int toChars(int codePoint, char[] dst, int dstIndex)
3586:   {
3587:     if (!isValidCodePoint(codePoint))
3588:       {
3589:         throw new IllegalArgumentException("not a valid code point: "
3590:                                            + codePoint);
3591:       }
3592: 
3593:     int result;
3594:     if (isSupplementaryCodePoint(codePoint))
3595:       {
3596:         // Write second char first to cause IndexOutOfBoundsException
3597:         // immediately.
3598:         final int cp2 = codePoint - 0x10000;
3599:         dst[dstIndex + 1] = (char) ((cp2 % 0x400) + (int) MIN_LOW_SURROGATE);
3600:         dst[dstIndex] = (char) ((cp2 / 0x400) + (int) MIN_HIGH_SURROGATE);
3601:         result = 2;
3602:       }
3603:     else
3604:       {
3605:         dst[dstIndex] = (char) codePoint;
3606:         result = 1; 
3607:       }
3608:     return result;
3609:   }
3610: 
3611:   /**
3612:    * Return number of 16-bit characters required to represent the given
3613:    * code point.
3614:    *
3615:    * @param codePoint a unicode code point
3616:    *
3617:    * @return 2 if codePoint >= 0x10000, 1 otherwise.
3618:    *
3619:    * @since 1.5
3620:    */
3621:   public static int charCount(int codePoint)
3622:   {
3623:     return 
3624:       (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT) 
3625:       ? 2 
3626:       : 1;
3627:   }
3628: 
3629:   /**
3630:    * Determines whether the specified code point is
3631:    * in the range 0x10000 .. 0x10FFFF, i.e. the character is within the Unicode
3632:    * supplementary character range.
3633:    *
3634:    * @param codePoint a Unicode code point
3635:    *
3636:    * @return <code>true</code> if code point is in supplementary range
3637:    *
3638:    * @since 1.5
3639:    */
3640:   public static boolean isSupplementaryCodePoint(int codePoint)
3641:   {
3642:     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
3643:       && codePoint <= MAX_CODE_POINT;
3644:   }
3645: 
3646:   /**
3647:    * Determines whether the specified code point is
3648:    * in the range 0x0000 .. 0x10FFFF, i.e. it is a valid Unicode code point.
3649:    *
3650:    * @param codePoint a Unicode code point
3651:    *
3652:    * @return <code>true</code> if code point is valid
3653:    *
3654:    * @since 1.5
3655:    */
3656:   public static boolean isValidCodePoint(int codePoint)
3657:   {
3658:     return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
3659:   }
3660: 
3661:   /**
3662:    * Return true if the given character is a high surrogate.
3663:    * @param ch the character
3664:    * @return true if the character is a high surrogate character
3665:    *
3666:    * @since 1.5
3667:    */
3668:   public static boolean isHighSurrogate(char ch)
3669:   {
3670:     return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
3671:   }
3672: 
3673:   /**
3674:    * Return true if the given character is a low surrogate.
3675:    * @param ch the character
3676:    * @return true if the character is a low surrogate character
3677:    *
3678:    * @since 1.5
3679:    */
3680:   public static boolean isLowSurrogate(char ch)
3681:   {
3682:     return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
3683:   }
3684: 
3685:   /**
3686:    * Return true if the given characters compose a surrogate pair.
3687:    * This is true if the first character is a high surrogate and the
3688:    * second character is a low surrogate.
3689:    * @param ch1 the first character
3690:    * @param ch2 the first character
3691:    * @return true if the characters compose a surrogate pair
3692:    *
3693:    * @since 1.5
3694:    */
3695:   public static boolean isSurrogatePair(char ch1, char ch2)
3696:   {
3697:     return isHighSurrogate(ch1) && isLowSurrogate(ch2);
3698:   }
3699: 
3700:   /**
3701:    * Given a valid surrogate pair, this returns the corresponding
3702:    * code point.
3703:    * @param high the high character of the pair
3704:    * @param low the low character of the pair
3705:    * @return the corresponding code point
3706:    *
3707:    * @since 1.5
3708:    */
3709:   public static int toCodePoint(char high, char low)
3710:   {
3711:     return ((high - MIN_HIGH_SURROGATE) * 0x400) +
3712:       (low - MIN_LOW_SURROGATE) + 0x10000;
3713:   }
3714: 
3715:   /**
3716:    * Get the code point at the specified index in the CharSequence.
3717:    * This is like CharSequence#charAt(int), but if the character is
3718:    * the start of a surrogate pair, and there is a following
3719:    * character, and this character completes the pair, then the
3720:    * corresponding supplementary code point is returned.  Otherwise,
3721:    * the character at the index is returned.
3722:    *
3723:    * @param sequence the CharSequence
3724:    * @param index the index of the codepoint to get, starting at 0
3725:    * @return the codepoint at the specified index
3726:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3727:    * @since 1.5
3728:    */
3729:   public static int codePointAt(CharSequence sequence, int index)
3730:   {
3731:     int len = sequence.length();
3732:     if (index < 0 || index >= len)
3733:       throw new IndexOutOfBoundsException();
3734:     char high = sequence.charAt(index);
3735:     if (! isHighSurrogate(high) || ++index >= len)
3736:       return high;
3737:     char low = sequence.charAt(index);
3738:     if (! isLowSurrogate(low))
3739:       return high;
3740:     return toCodePoint(high, low);
3741:   }
3742: 
3743:   /**
3744:    * Get the code point at the specified index in the CharSequence.
3745:    * If the character is the start of a surrogate pair, and there is a
3746:    * following character, and this character completes the pair, then
3747:    * the corresponding supplementary code point is returned.
3748:    * Otherwise, the character at the index is returned.
3749:    *
3750:    * @param chars the character array in which to look
3751:    * @param index the index of the codepoint to get, starting at 0
3752:    * @return the codepoint at the specified index
3753:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3754:    * @since 1.5
3755:    */
3756:   public static int codePointAt(char[] chars, int index)
3757:   {
3758:     return codePointAt(chars, index, chars.length);
3759:   }
3760: 
3761:   /**
3762:    * Get the code point at the specified index in the CharSequence.
3763:    * If the character is the start of a surrogate pair, and there is a
3764:    * following character within the specified range, and this
3765:    * character completes the pair, then the corresponding
3766:    * supplementary code point is returned.  Otherwise, the character
3767:    * at the index is returned.
3768:    *
3769:    * @param chars the character array in which to look
3770:    * @param index the index of the codepoint to get, starting at 0
3771:    * @param limit the limit past which characters should not be examined
3772:    * @return the codepoint at the specified index
3773:    * @throws IndexOutOfBoundsException if index is negative or &gt;=
3774:    * limit, or if limit is negative or &gt;= the length of the array
3775:    * @since 1.5
3776:    */
3777:   public static int codePointAt(char[] chars, int index, int limit)
3778:   {
3779:     if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
3780:       throw new IndexOutOfBoundsException();
3781:     char high = chars[index];
3782:     if (! isHighSurrogate(high) || ++index >= limit)
3783:       return high;
3784:     char low = chars[index];
3785:     if (! isLowSurrogate(low))
3786:       return high;
3787:     return toCodePoint(high, low);
3788:   }
3789: 
3790:   /**
3791:    * Get the code point before the specified index.  This is like
3792:    * #codePointAt(char[], int), but checks the characters at
3793:    * <code>index-1</code> and <code>index-2</code> to see if they form
3794:    * a supplementary code point.  If they do not, the character at
3795:    * <code>index-1</code> is returned.
3796:    *
3797:    * @param chars the character array
3798:    * @param index the index just past the codepoint to get, starting at 0
3799:    * @return the codepoint at the specified index
3800:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3801:    * @since 1.5
3802:    */
3803:   public static int codePointBefore(char[] chars, int index)
3804:   {
3805:     return codePointBefore(chars, index, 1);
3806:   }
3807: 
3808:   /**
3809:    * Get the code point before the specified index.  This is like
3810:    * #codePointAt(char[], int), but checks the characters at
3811:    * <code>index-1</code> and <code>index-2</code> to see if they form
3812:    * a supplementary code point.  If they do not, the character at
3813:    * <code>index-1</code> is returned.  The start parameter is used to
3814:    * limit the range of the array which may be examined.
3815:    *
3816:    * @param chars the character array
3817:    * @param index the index just past the codepoint to get, starting at 0
3818:    * @param start the index before which characters should not be examined
3819:    * @return the codepoint at the specified index
3820:    * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
3821:    * the length of the array, or if limit is negative or &gt;= the
3822:    * length of the array
3823:    * @since 1.5
3824:    */
3825:   public static int codePointBefore(char[] chars, int index, int start)
3826:   {
3827:     if (index < start || index > chars.length
3828:     || start < 0 || start >= chars.length)
3829:       throw new IndexOutOfBoundsException();
3830:     --index;
3831:     char low = chars[index];
3832:     if (! isLowSurrogate(low) || --index < start)
3833:       return low;
3834:     char high = chars[index];
3835:     if (! isHighSurrogate(high))
3836:       return low;
3837:     return toCodePoint(high, low);
3838:   }
3839: 
3840:   /**
3841:    * Get the code point before the specified index.  This is like
3842:    * #codePointAt(CharSequence, int), but checks the characters at
3843:    * <code>index-1</code> and <code>index-2</code> to see if they form
3844:    * a supplementary code point.  If they do not, the character at
3845:    * <code>index-1</code> is returned.
3846:    *
3847:    * @param sequence the CharSequence
3848:    * @param index the index just past the codepoint to get, starting at 0
3849:    * @return the codepoint at the specified index
3850:    * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
3851:    * @since 1.5
3852:    */
3853:   public static int codePointBefore(CharSequence sequence, int index)
3854:   {
3855:     int len = sequence.length();
3856:     if (index < 1 || index > len)
3857:       throw new IndexOutOfBoundsException();
3858:     --index;
3859:     char low = sequence.charAt(index);
3860:     if (! isLowSurrogate(low) || --index < 0)
3861:       return low;
3862:     char high = sequence.charAt(index);
3863:     if (! isHighSurrogate(high))
3864:       return low;
3865:     return toCodePoint(high, low);
3866:   }
3867: } // class Character