Source for java.text.CollationElementIterator

   1: /* CollationElementIterator.java -- Walks through collation elements
   2:    Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004  Free Software Foundation
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10:  
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package java.text;
  40: 
  41: import java.util.ArrayList;
  42: 
  43: /* Written using "Java Class Libraries", 2nd edition, plus online
  44:  * API docs for JDK 1.2 from http://www.javasoft.com.
  45:  * Status: Believed complete and correct to JDK 1.1.
  46:  */
  47: 
  48: /**
  49:  * This class walks through the character collation elements of a 
  50:  * <code>String</code> as defined by the collation rules in an instance of 
  51:  * <code>RuleBasedCollator</code>.  There is no public constructor for
  52:  * this class.  An instance is created by calling the
  53:  * <code>getCollationElementIterator</code> method on 
  54:  * <code>RuleBasedCollator</code>.
  55:  *
  56:  * @author Aaron M. Renn (arenn@urbanophile.com)
  57:  * @author Tom Tromey (tromey@cygnus.com)
  58:  * @author Guilhem Lavaux (guilhem.lavaux@free.fr)
  59:  */
  60: public final class CollationElementIterator
  61: {
  62:   /**
  63:    * This is a constant value that is returned to indicate that the end of 
  64:    * the string was encountered.
  65:    */
  66:   public static final int NULLORDER = -1;
  67: 
  68:   /**
  69:    * This is the RuleBasedCollator this object was created from.
  70:    */
  71:   RuleBasedCollator collator;
  72: 
  73:   /**
  74:    * This is the String that is being iterated over.
  75:    */
  76:   String text;
  77: 
  78:   /**
  79:    * This is the index into the collation decomposition where we are currently scanning.
  80:    */
  81:   int index;
  82: 
  83:   /**
  84:    * This is the index into the String where we are currently scanning.
  85:    */
  86:   int textIndex;
  87: 
  88:   /**
  89:    * Array containing the collation decomposition of the
  90:    * text given to the constructor.
  91:    */
  92:   private RuleBasedCollator.CollationElement[] text_decomposition;
  93: 
  94:   /**
  95:    * Array containing the index of the specified block.
  96:    */
  97:   private int[] text_indexes;
  98: 
  99:   /**
 100:    * This method initializes a new instance of <code>CollationElementIterator</code>
 101:    * to iterate over the specified <code>String</code> using the rules in the
 102:    * specified <code>RuleBasedCollator</code>.
 103:    *
 104:    * @param collator The <code>RuleBasedCollation</code> used for calculating collation values
 105:    * @param text The <code>String</code> to iterate over.
 106:    */
 107:   CollationElementIterator(RuleBasedCollator collator, String text)
 108:   {
 109:     this.collator = collator;
 110:     
 111:     setText (text);    
 112:   }
 113: 
 114:   RuleBasedCollator.CollationElement nextBlock()
 115:   {
 116:     if (index >= text_decomposition.length)
 117:       return null;
 118:     
 119:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 120:     
 121:     textIndex = text_indexes[index+1];
 122: 
 123:     index++;
 124: 
 125:     return e;
 126:   }
 127: 
 128:   RuleBasedCollator.CollationElement previousBlock()
 129:   {
 130:     if (index == 0)
 131:       return null;
 132:     
 133:     index--;
 134:     RuleBasedCollator.CollationElement e = text_decomposition[index];
 135: 
 136:     textIndex = text_indexes[index+1];
 137:     
 138:     return e;
 139:   }
 140: 
 141:   /**
 142:    * This method returns the collation ordering value of the next character sequence
 143:    * in the string (it may be an extended character following collation rules).
 144:    * This method will return <code>NULLORDER</code> if the
 145:    * end of the string was reached.
 146:    *
 147:    * @return The collation ordering value.
 148:    */
 149:   public int next()
 150:   {
 151:     RuleBasedCollator.CollationElement e = nextBlock();
 152: 
 153:     if (e == null)
 154:       return NULLORDER;
 155:     
 156:     return e.getValue();
 157:   }
 158: 
 159:   /**
 160:    * This method returns the collation ordering value of the previous character
 161:    * in the string.  This method will return <code>NULLORDER</code> if the
 162:    * beginning of the string was reached.
 163:    *
 164:    * @return The collation ordering value.
 165:    */
 166:   public int previous()
 167:   {
 168:     RuleBasedCollator.CollationElement e = previousBlock();
 169: 
 170:     if (e == null)
 171:       return NULLORDER;
 172:     
 173:     return e.getValue();
 174:   }
 175: 
 176:   /**
 177:    * This method returns the primary order value for the given collation
 178:    * value.
 179:    *
 180:    * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
 181:    *
 182:    * @return The primary order value of the specified collation value.  This is the high 16 bits.
 183:    */
 184:   public static int primaryOrder(int order)
 185:   {
 186:     // From the JDK 1.2 spec.
 187:     return order >>> 16;
 188:   }
 189: 
 190:   /**
 191:    * This method resets the internal position pointer to read from the
 192:    * beginning of the <code>String</code> again.
 193:    */
 194:   public void reset()
 195:   {
 196:     index = 0;
 197:     textIndex = 0;
 198:   }
 199: 
 200:   /**
 201:    * This method returns the secondary order value for the given collation
 202:    * value.
 203:    *
 204:    * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
 205:    *
 206:    * @return The secondary order value of the specified collation value.  This is the bits 8-15.
 207:    */
 208:   public static short secondaryOrder(int order)
 209:   {
 210:     // From the JDK 1.2 spec.
 211:     return (short) ((order >>> 8) & 255);
 212:   }
 213: 
 214:   /**
 215:    * This method returns the tertiary order value for the given collation
 216:    * value.
 217:    *
 218:    * @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
 219:    *
 220:    * @return The tertiary order value of the specified collation value.  This is the low eight bits.
 221:    */
 222:   public static short tertiaryOrder(int order)
 223:   {
 224:     // From the JDK 1.2 spec.
 225:     return (short) (order & 255);
 226:   }
 227: 
 228:   /**
 229:    * This method sets the <code>String</code> that it is iterating over
 230:    * to the specified <code>String</code>.
 231:    *
 232:    * @param text The new <code>String</code> to iterate over.
 233:    *
 234:    * @since 1.2
 235:    */
 236:   public void setText(String text)
 237:   {
 238:     int idx = 0;
 239:     int idx_idx = 0;
 240:     int alreadyExpanded = 0;
 241:     int idxToMove = 0;
 242: 
 243:     this.text = text;
 244:     this.index = 0;
 245: 
 246:     String work_text = text.intern();
 247: 
 248:     ArrayList a_element = new ArrayList();
 249:     ArrayList a_idx = new ArrayList();
 250: 
 251:     // Build element collection ordered as they come in "text".
 252:     while (idx < work_text.length())
 253:       {
 254:     String key, key_old;
 255: 
 256:     Object object = null;
 257:     int p = 1;
 258:     
 259:     // IMPROVE: use a TreeMap with a prefix-ordering rule.
 260:     key_old = key = null;
 261:     do
 262:       {
 263:         if (object != null)
 264:           key_old = key;
 265:         key = work_text.substring (idx, idx+p);
 266:         object = collator.prefix_tree.get (key);
 267:         if (object != null && idx < alreadyExpanded)
 268:           {
 269:         RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
 270:         if (prefix.expansion != null && 
 271:             prefix.expansion.startsWith(work_text.substring(0, idx)))
 272:         {
 273:           object = null;
 274:           key = key_old;
 275:         }
 276:           }
 277:         p++;
 278:       }
 279:     while (idx+p <= work_text.length());
 280:     
 281:     if (object == null)
 282:       key = key_old;
 283:     
 284:     RuleBasedCollator.CollationElement prefix =
 285:       (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
 286: 
 287:     /*
 288:      * First case: There is no such sequence in the database.
 289:      * We will have to build one from the context.
 290:      */
 291:     if (prefix == null)
 292:       {
 293:         /*
 294:          * We are dealing with sequences in an expansion. They
 295:          * are treated as accented characters (tertiary order).
 296:          */
 297:         if (alreadyExpanded > 0)
 298:           {
 299:         RuleBasedCollator.CollationElement e =
 300:           collator.getDefaultAccentedElement (work_text.charAt (idx));
 301:         
 302:         a_element.add (e);
 303:         a_idx.add (new Integer(idx_idx));
 304:         idx++;
 305:         alreadyExpanded--;
 306:         if (alreadyExpanded == 0)
 307:           {
 308:             /* There is not any characters left in the expansion set.
 309:              * We can increase the pointer in the source string.
 310:              */
 311:             idx_idx += idxToMove;
 312:             idxToMove = 0; 
 313:           }
 314:         else
 315:           idx_idx++;
 316:           }
 317:         else
 318:           {
 319:         /* This is a normal character. */
 320:         RuleBasedCollator.CollationElement e =
 321:           collator.getDefaultElement (work_text.charAt (idx));
 322:         Integer i_ref = new Integer(idx_idx);
 323: 
 324:         /* Don't forget to mark it as a special sequence so the
 325:          * string can be ordered.
 326:          */
 327:         a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
 328:         a_idx.add (i_ref);
 329:         a_element.add (e);
 330:         a_idx.add (i_ref);
 331:         idx_idx++;
 332:         idx++;
 333:           }
 334:         continue;
 335:       }
 336:  
 337:     /*
 338:      * Second case: Here we have found a matching sequence.
 339:      * Here we have an expansion string prepend it to the "work text" and
 340:      * add the corresponding sorting element. We must also mark 
 341:      */
 342:     if (prefix.expansion != null)
 343:       {
 344:         work_text = prefix.expansion
 345:           + work_text.substring (idx+prefix.key.length());
 346:         idx = 0;
 347:         a_element.add (prefix);
 348:         a_idx.add (new Integer(idx_idx));
 349:         if (alreadyExpanded == 0)
 350:           idxToMove = prefix.key.length();
 351:         alreadyExpanded += prefix.expansion.length()-prefix.key.length();
 352:       }
 353:     else
 354:       {
 355:         /* Third case: the simplest. We have got the prefix and it
 356:          * has not to be expanded.
 357:          */
 358:         a_element.add (prefix);
 359:         a_idx.add (new Integer(idx_idx));
 360:         idx += prefix.key.length();
 361:         /* If the sequence is in an expansion, we must decrease the
 362:          * counter.
 363:          */
 364:         if (alreadyExpanded > 0)
 365:           {
 366:         alreadyExpanded -= prefix.key.length();
 367:         if (alreadyExpanded == 0)
 368:           {
 369:             idx_idx += idxToMove;
 370:             idxToMove = 0;
 371:           }
 372:           }
 373:         else
 374:           idx_idx += prefix.key.length();
 375:       }
 376:       }
 377:     
 378:     text_decomposition = (RuleBasedCollator.CollationElement[])
 379:        a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
 380:     text_indexes = new int[a_idx.size()+1];
 381:     for (int i = 0; i < a_idx.size(); i++) 
 382:       {
 383:     text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
 384:       }
 385:     text_indexes[a_idx.size()] = text.length();
 386:   }
 387: 
 388:   /**
 389:    * This method sets the <code>String</code> that it is iterating over
 390:    * to the <code>String</code> represented by the specified
 391:    * <code>CharacterIterator</code>.
 392:    *
 393:    * @param source The <code>CharacterIterator</code> containing the new
 394:    * <code>String</code> to iterate over.
 395:    */
 396:   public void setText(CharacterIterator source)
 397:   {
 398:     StringBuffer expand = new StringBuffer();
 399: 
 400:     // For now assume we read from the beginning of the string.
 401:     for (char c = source.first();
 402:      c != CharacterIterator.DONE;
 403:      c = source.next())
 404:       expand.append(c);
 405: 
 406:     setText(expand.toString());
 407:   }
 408: 
 409:   /**
 410:    * This method returns the current offset into the <code>String</code>
 411:    * that is being iterated over.
 412:    *
 413:    * @return The iteration index position.
 414:    *
 415:    * @since 1.2
 416:    */
 417:   public int getOffset()
 418:   {
 419:     return textIndex;
 420:   }
 421: 
 422:   /**
 423:    * This method sets the iteration index position into the current
 424:    * <code>String</code> to the specified value.  This value must not
 425:    * be negative and must not be greater than the last index position
 426:    * in the <code>String</code>.
 427:    *
 428:    * @param offset The new iteration index position.
 429:    *
 430:    * @exception IllegalArgumentException If the new offset is not valid.
 431:    */
 432:   public void setOffset(int offset)
 433:   {
 434:     if (offset < 0)
 435:       throw new IllegalArgumentException("Negative offset: " + offset);
 436: 
 437:     if (offset > (text.length() - 1))
 438:       throw new IllegalArgumentException("Offset too large: " + offset);
 439:     
 440:     for (index = 0; index < text_decomposition.length; index++)
 441:       {    
 442:     if (offset <= text_indexes[index])
 443:       break;
 444:       }
 445:     /*
 446:      * As text_indexes[0] == 0, we should not have to take care whether index is
 447:      * greater than 0. It is always.
 448:      */
 449:     if (text_indexes[index] == offset)
 450:       textIndex = offset;
 451:     else
 452:       textIndex = text_indexes[index-1];
 453:   }
 454: 
 455:   /**
 456:    * This method returns the maximum length of any expansion sequence that
 457:    * ends with the specified collation order value.  (Whatever that means).
 458:    *
 459:    * @param value The collation order value
 460:    *
 461:    * @param The maximum length of an expansion sequence.
 462:    */
 463:   public int getMaxExpansion(int value)
 464:   {
 465:     return 1;
 466:   }
 467: }