Source for javax.swing.text.html.parser.DTD

   1: /* DTD.java --
   2:    Copyright (C) 2005 Free Software Foundation, Inc.
   3: 
   4: This file is part of GNU Classpath.
   5: 
   6: GNU Classpath is free software; you can redistribute it and/or modify
   7: it under the terms of the GNU General Public License as published by
   8: the Free Software Foundation; either version 2, or (at your option)
   9: any later version.
  10: 
  11: GNU Classpath is distributed in the hope that it will be useful, but
  12: WITHOUT ANY WARRANTY; without even the implied warranty of
  13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14: General Public License for more details.
  15: 
  16: You should have received a copy of the GNU General Public License
  17: along with GNU Classpath; see the file COPYING.  If not, write to the
  18: Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19: 02110-1301 USA.
  20: 
  21: Linking this library statically or dynamically with other modules is
  22: making a combined work based on this library.  Thus, the terms and
  23: conditions of the GNU General Public License cover the whole
  24: combination.
  25: 
  26: As a special exception, the copyright holders of this library give you
  27: permission to link this library with independent modules to produce an
  28: executable, regardless of the license terms of these independent
  29: modules, and to copy and distribute the resulting executable under
  30: terms of your choice, provided that you also meet, for each linked
  31: independent module, the terms and conditions of the license of that
  32: module.  An independent module is a module which is not derived from
  33: or based on this library.  If you modify this library, you may extend
  34: this exception to your version of the library, but you are not
  35: obligated to do so.  If you do not wish to do so, delete this
  36: exception statement from your version. */
  37: 
  38: 
  39: package javax.swing.text.html.parser;
  40: 
  41: import java.io.DataInputStream;
  42: import java.io.EOFException;
  43: import java.io.IOException;
  44: import java.io.ObjectInputStream;
  45: import java.lang.reflect.Field;
  46: import java.lang.reflect.Modifier;
  47: import java.util.BitSet;
  48: import java.util.Hashtable;
  49: import java.util.StringTokenizer;
  50: import java.util.Vector;
  51: 
  52: /**
  53:  * <p>Representation or the SGML DTD document.
  54:  * Provides basis for describing a syntax of the
  55:  * HTML documents. The fields of this class are NOT initialized in
  56:  * constructor. You need to do this separately before passing this data
  57:  * structure to the HTML parser. The subclasses with the fields, pre-
  58:  * initialized, for example, for HTML 4.01, can be available only between
  59:  * the implementation specific classes
  60:  * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
  61:  * in this implementation).</p>
  62:  * <p>
  63:  * If you need more information about SGML DTD documents,
  64:  * the author suggests to read SGML tutorial on
  65:  * {@link http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html}.
  66:  * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
  67:  * Oxford University Press, 688 p, ISBN: 0198537379.
  68:  * </p>
  69:  * <p>
  70:  * Warning: the html, head and other tag fields will only be automatically
  71:  * assigned if the VM has the correctly implemented reflection mechanism.
  72:  * As these fields are not used anywhere in the implementation, not
  73:  * exception will be thrown in the opposite case.
  74:  * </p>
  75:  *
  76:  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
  77:  */
  78: public class DTD
  79:   implements DTDConstants
  80: {
  81:   /**
  82:    * The version of the persistent data format.
  83:    */
  84:   public static int FILE_VERSION = 1;
  85: 
  86:   /**
  87:    * The table of existing available DTDs.
  88:    */
  89:   static Hashtable dtdHash = new Hashtable();
  90: 
  91:   /**
  92:    * The applet element for this DTD.
  93:    */
  94:   public Element applet;
  95: 
  96:   /**
  97:    * The base element for this DTD.
  98:    */
  99:   public Element base;
 100: 
 101:   /**
 102:    * The body element for this DTD.
 103:    */
 104:   public Element body;
 105: 
 106:   /**
 107:    * The head element for this DTD.
 108:    */
 109:   public Element head;
 110: 
 111:   /**
 112:    * The html element for this DTD.
 113:    */
 114:   public Element html;
 115: 
 116:   /**
 117:    * The isindex element of for this DTD.
 118:    */
 119:   public Element isindex;
 120: 
 121:   /**
 122:    * The meta element for this DTD.
 123:    */
 124:   public Element meta;
 125: 
 126:   /**
 127:    * The p element for this DTD.
 128:    */
 129:   public Element p;
 130: 
 131:   /**
 132:    * The param element for this DTD.
 133:    */
 134:   public Element param;
 135: 
 136:   /**
 137:    * The pcdata for this DTD.
 138:    */
 139:   public Element pcdata;
 140: 
 141:   /**
 142:    * The title element for this DTD.
 143:    */
 144:   public Element title;
 145: 
 146:   /**
 147:    * The element for accessing all DTD elements by name.
 148:    */
 149:   public Hashtable elementHash = new Hashtable();
 150: 
 151:   /**
 152:    * The entity table for accessing all DTD entities by name.
 153:    */
 154:   public Hashtable entityHash = new Hashtable();
 155: 
 156:   /**
 157:    *  The name of this DTD.
 158:    */
 159:   public String name;
 160: 
 161:   /**
 162:    * Contains all elements in this DTD. The
 163:    * javax.swing.text.html.parser.Element#index field of all elements
 164:    * in this vector is set to the element position in this vector.
 165:    */
 166:   public Vector elements = new Vector();
 167: 
 168:   /** Create a new DTD with the specified name. */
 169:   protected DTD(String a_name)
 170:   {
 171:     name = a_name;
 172:   }
 173: 
 174:   /** Get this DTD by name. The current implementation
 175:    * only looks in the internal table of DTD documents. If no corresponding
 176:    * entry is found, the new entry is created, placed into
 177:    * the table and returned. */
 178:   public static DTD getDTD(String name)
 179:                     throws IOException
 180:   {
 181:     DTD d = (DTD) dtdHash.get(name);
 182: 
 183:     if (d == null)
 184:       {
 185:         d = new DTD(name);
 186:         dtdHash.put(d.name, d);
 187:       }
 188: 
 189:     return d;
 190:   }
 191: 
 192:   /**
 193:    * Get the element by the element name. If the element is not yet
 194:    * defined, it is newly created and placed into the element table.
 195:    * If the element name matches (ingoring case) a public non static
 196:    * element field in this class, this field is assigned to the value
 197:    * of the newly created element.
 198:    */
 199:   public Element getElement(String element_name)
 200:   {
 201:     return newElement(element_name);
 202:   }
 203: 
 204:   /**
 205:    * Get the element by the value of its
 206:    * {@link javax.swing.text.html.parser.Element#index} field.
 207:    */
 208:   public Element getElement(int index)
 209:   {
 210:     return (Element) elements.get(index);
 211:   }
 212: 
 213:   /**
 214:    * Get the entity with the given identifier.
 215:    * @param id that can be returned by
 216:    * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
 217:    * @return The entity from this DTD or null if there is no entity with
 218:    * such id or such entity is not present in the table of this instance.
 219:    */
 220:   public Entity getEntity(int id)
 221:   {
 222:     String name = Entity.mapper.get(id);
 223: 
 224:     if (name != null)
 225:       return (Entity) entityHash.get(name);
 226:     else
 227:       return null;
 228:   }
 229: 
 230:   /**
 231:    * Get the named entity by its name.
 232:    */
 233:   public Entity getEntity(String entity_name)
 234:   {
 235:     return (Entity) entityHash.get(entity_name);
 236:   }
 237: 
 238:   /**
 239:    * Get the name of this instance of DTD
 240:    */
 241:   public String getName()
 242:   {
 243:     return name;
 244:   }
 245: 
 246:   /**
 247:    * Creates, adds into the entity table and returns the
 248:    * character entity like <code>&amp;lt;</code>
 249:    *  (means '<code>&lt;</code>' );
 250:    * @param name The entity name (without heading &amp; and closing ;)
 251:    * @param type The entity type
 252:    * @param character The entity value (single character)
 253:    * @return The created entity
 254:    */
 255:   public Entity defEntity(String name, int type, int character)
 256:   {
 257:     Entity e = newEntity(name, type);
 258:     e.data = new char[] { (char) character };
 259:     return e;
 260:   }
 261: 
 262:   /**
 263:    * Define the attributes for the element with the given name.
 264:    * If the element is not exist, it is created.
 265:    * @param forElement
 266:    * @param attributes
 267:    */
 268:   public void defineAttributes(String forElement, AttributeList attributes)
 269:   {
 270:     Element e = (Element) elementHash.get(forElement.toLowerCase());
 271: 
 272:     if (e == null)
 273:       e = newElement(forElement);
 274: 
 275:     e.atts = attributes;
 276:   }
 277: 
 278:   /**
 279:    * Defines the element and adds it to the element table. Sets the
 280:    * <code>Element.index</code> field to the value, unique for this
 281:    * instance of DTD. If the element with the given name already exists,
 282:    * replaces all other its settings by the method argument values.
 283:    * @param name the name of the element
 284:    * @param type the type of the element
 285:    * @param headless true if the element needs no starting tag
 286:    * (should not occur in HTML).
 287:    * @param tailless true if the element needs no ending tag (like
 288:    * <code>&lt;hr&gt;</code>
 289:    * @param content the element content
 290:    * @param exclusions the set of elements that must not occur inside
 291:    * this element. The <code>Element.index</code> value defines which
 292:    * bit in this bitset corresponds to that element.
 293:    * @param inclusions the set of elements that can occur inside this
 294:    * element. the <code>Element.index</code> value defines which
 295:    * bit in this bitset corresponds to that element.
 296:    * @param attributes the element attributes.
 297:    * @return the newly defined element.
 298:    */
 299:   public Element defineElement(String name, int type, boolean headless,
 300:                                boolean tailless, ContentModel content,
 301:                                BitSet exclusions, BitSet inclusions,
 302:                                AttributeList attributes
 303:                               )
 304:   {
 305:     Element e = newElement(name);
 306:     e.type = type;
 307:     e.oStart = headless;
 308:     e.oEnd = tailless;
 309:     e.content = content;
 310:     e.exclusions = exclusions;
 311:     e.inclusions = inclusions;
 312:     e.atts = attributes;
 313: 
 314:     return e;
 315:   }
 316: 
 317:   /**
 318:    * Creates, intializes and adds to the entity table the new
 319:    * entity.
 320:    * @param name the name of the entity
 321:    * @param type the type of the entity
 322:    * @param data the data section of the entity
 323:    * @return the created entity
 324:    */
 325:   public Entity defineEntity(String name, int type, char[] data)
 326:   {
 327:     Entity e = newEntity(name, type);
 328:     e.data = data;
 329: 
 330:     return e;
 331:   }
 332: 
 333:   /** Place this DTD into the DTD table. */
 334:   public static void putDTDHash(String name, DTD dtd)
 335:   {
 336:     dtdHash.put(name, dtd);
 337:   }
 338: 
 339:   /**
 340:    * <p>Reads DTD from an archived format. This format is not standardized
 341:    * and differs between implementations.</p><p> This implementation
 342:    * reads and defines all entities and elements using
 343:    * ObjectInputStream. The elements and entities can be written into the
 344:    * stream in any order. The objects other than elements and entities
 345:    * are ignored.</p>
 346:    * @param stream A data stream to read from.
 347:    * @throws java.io.IOException If one is thrown by the input stream
 348:    */
 349:   public void read(DataInputStream stream)
 350:             throws java.io.IOException
 351:   {
 352:     ObjectInputStream oi = new ObjectInputStream(stream);
 353:     Object def;
 354:     try
 355:       {
 356:         while (true)
 357:           {
 358:             def = oi.readObject();
 359:             if (def instanceof Element)
 360:               {
 361:                 Element e = (Element) def;
 362:                 elementHash.put(e.name.toLowerCase(), e);
 363:                 assignField(e);
 364:               }
 365:             else if (def instanceof Entity)
 366:               {
 367:                 Entity e = (Entity) def;
 368:                 entityHash.put(e.name, e);
 369:               }
 370:           }
 371:       }
 372:     catch (ClassNotFoundException ex)
 373:       {
 374:         throw new IOException(ex.getMessage());
 375:       }
 376:     catch (EOFException ex)
 377:       {
 378:         // ok EOF
 379:       }
 380:   }
 381: 
 382:   /**
 383:    * Returns the name of this instance of DTD.
 384:    */
 385:   public String toString()
 386:   {
 387:     return name;
 388:   }
 389: 
 390:   /**
 391:    * Creates and returns new attribute (not an attribute list).
 392:    * @param name the name of this attribute
 393:    * @param type the type of this attribute (FIXED, IMPLIED or
 394:    * REQUIRED from <code>DTDConstants</code>).
 395:    * @param modifier the modifier of this attribute
 396:    * @param default_value the default value of this attribute
 397:    * @param allowed_values the allowed values of this attribute. The multiple
 398:    * possible values in this parameter are supposed to be separated by
 399:    * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
 400:    * can be null if no list of allowed values is specified.
 401:    * @param atts the previous attribute of this element. This is
 402:    * placed to the field
 403:    * {@link javax.swing.text.html.parser.AttributeList#next },
 404:    * creating a linked list.
 405:    * @return
 406:    */
 407:   protected AttributeList defAttributeList(String name, int type, int modifier,
 408:                                            String default_value,
 409:                                            String allowed_values,
 410:                                            AttributeList atts
 411:                                           )
 412:   {
 413:     AttributeList al = new AttributeList(name);
 414:     al.modifier = modifier;
 415:     al.value = default_value;
 416:     al.next = atts;
 417: 
 418:     if (allowed_values != null)
 419:       {
 420:         StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
 421:         Vector v = new Vector(st.countTokens());
 422: 
 423:         while (st.hasMoreTokens())
 424:           v.add(st.nextToken());
 425: 
 426:         al.values = v;
 427:       }
 428: 
 429:     return al;
 430:   }
 431: 
 432:   /**
 433:    * Creates a new content model.
 434:    * @param type specifies the BNF operation for this content model.
 435:    * The valid operations are documented in the
 436:    * {@link javax.swing.text.html.parser.ContentModel#type }.
 437:    * @param content the content of this content model
 438:    * @param next if the content model is specified by BNF-like
 439:    * expression, contains the rest of this expression.
 440:    * @return The newly created content model.
 441:    */
 442:   protected ContentModel defContentModel(int type, Object content,
 443:                                          ContentModel next
 444:                                         )
 445:   {
 446:     ContentModel model = new ContentModel();
 447:     model.type = type;
 448:     model.next = next;
 449:     model.content = content;
 450: 
 451:     return model;
 452:   }
 453: 
 454:   /**
 455:    * Defines a new element and adds it to the element table.
 456:    * If the element alredy exists,
 457:    * overrides it settings with the specified values.
 458:    * @param name the name of the new element
 459:    * @param type the type of the element
 460:    * @param headless true if the element needs no starting tag
 461:    * @param tailless true if the element needs no closing tag
 462:    * @param content the element content.
 463:    * @param exclusions the elements that must be excluded from the
 464:    * content of this element, in all levels of the hierarchy.
 465:    * @param inclusions the elements that can be included as the
 466:    * content of this element.
 467:    * @param attributes the element attributes.
 468:    * @return the created or updated element.
 469:    */
 470:   protected Element defElement(String name, int type, boolean headless,
 471:                                boolean tailless, ContentModel content,
 472:                                String[] exclusions, String[] inclusions,
 473:                                AttributeList attributes
 474:                               )
 475:   {
 476:     // compute the bit sets
 477:     BitSet exclude = bitSet(exclusions);
 478:     BitSet include = bitSet(inclusions);
 479: 
 480:     Element e =
 481:       defineElement(name, type, headless, tailless, content, exclude, include,
 482:                     attributes
 483:                    );
 484: 
 485:     return e;
 486:   }
 487: 
 488:   /**
 489:    * Creates, intializes and adds to the entity table the new
 490:    * entity.
 491:    * @param name the name of the entity
 492:    * @param type the type of the entity
 493:    * @param data the data section of the entity
 494:    * @return the created entity
 495:    */
 496:   protected Entity defEntity(String name, int type, String data)
 497:   {
 498:     Entity e = newEntity(name, type);
 499:     e.data = data.toCharArray();
 500: 
 501:     return e;
 502:   }
 503: 
 504:   private void assignField(Element e)
 505:   {
 506:     String element_name = e.name;
 507:     try
 508:       {
 509:         // Assign the field via reflection.
 510:         Field f = getClass().getField(element_name.toLowerCase());
 511:         if ((f.getModifiers() & Modifier.PUBLIC) != 0)
 512:           if ((f.getModifiers() & Modifier.STATIC) == 0)
 513:             if (f.getType().isAssignableFrom(e.getClass()))
 514:               f.set(this, e);
 515:       }
 516:     catch (IllegalAccessException ex)
 517:       {
 518:         unexpected(ex);
 519:       }
 520:     catch (NoSuchFieldException ex)
 521:       {
 522:         // This is ok.
 523:       }
 524: 
 525:     // Some virtual machines may still lack the proper
 526:     // implementation of reflection. As the tag fields
 527:     // are not used anywhere in this implementation,
 528:     // (and this class is also rarely used by the end user),
 529:     // it may be better not to crash everything by throwing an error
 530:     // for each case when the HTML parsing is required.
 531:     catch (Throwable t)
 532:       {
 533:         // This VM has no reflection mechanism implemented!
 534:         if (t instanceof OutOfMemoryError)
 535:           throw (Error) t;
 536:       }
 537:   }
 538: 
 539:   /**
 540:    * Create the bit set for this array of elements.
 541:    * The unknown elements are automatically defined and added
 542:    * to the element table.
 543:    * @param elements
 544:    * @return
 545:    */
 546:   private BitSet bitSet(String[] elements)
 547:   {
 548:     BitSet b = new BitSet();
 549: 
 550:     for (int i = 0; i < elements.length; i++)
 551:       {
 552:         Element e = getElement(elements [ i ]);
 553: 
 554:         if (e == null)
 555:           e = newElement(elements [ i ]);
 556: 
 557:         b.set(e.index);
 558:       }
 559: 
 560:     return b;
 561:   }
 562: 
 563:   /**
 564:    * Find the element with the given name in the element table.
 565:    * If not find, create a new element with this name and add to the
 566:    * table.
 567:    * @param name the name of the element
 568:    * @return the found or created element.
 569:    */
 570:   private Element newElement(String name)
 571:   {
 572:     Element e = (Element) elementHash.get(name.toLowerCase());
 573: 
 574:     if (e == null)
 575:       {
 576:         e = new Element();
 577:         e.name = name;
 578:         e.index = elements.size();
 579:         elements.add(e);
 580:         elementHash.put(e.name.toLowerCase(), e);
 581:         assignField(e);
 582:       }
 583:     return e;
 584:   }
 585: 
 586:   /**
 587:    * Creates and adds to the element table the entity with an
 588:    * unitialized data section. Used internally.
 589:    * @param name the name of the entity
 590:    * @param type the type of the entity, a bitwise combination
 591:    * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
 592:    * @throws an error if the parameter is both GENERAL and PARAMETER
 593:    * of both PUBLIC and SYSTEM.
 594:    * @return the created entity
 595:    */
 596:   private Entity newEntity(String name, int type)
 597:   {
 598:     Entity e = new Entity(name, type, null);
 599:     entityHash.put(e.name, e);
 600:     return e;
 601:   }
 602: 
 603:   private void unexpected(Exception ex)
 604:   {
 605:     throw new Error("This should never happen, report a bug", ex);
 606:   }
 607: }