Introduction to syntactic annotation


Acronyms

Initial letters in acronyms, nicknames and related expressions are grouped together as much as possible.
( (NP (NPR FDR)))

( (NP (N-COMP (NPR JC) (NPR Hall))))

( (NP (N-COMP (NPR US) (NPR Steel))))

( (NP (D the)
      (NPR UMWA)))

( (NP (D the)
      (N-COMP (NPR UMW)
	      (PP (P of)
		  (NP (NPR A))))))

( (NP (D the)
      (NPR USA)))

( (NP (D the)
      (N-COMP (NPR US)
	      (PP (P of)
		  (NP (NPR A))))))

( (NP (D the)
      (N-COMP (NPR C) (NPR and) (NPR O) (N canal))))	← AND prevents grouping

Dates

Calendar dates are defined as expressions containing reference to a month together with reference to a day of the month or a year (or both). They are indicated by DATE suffixed to the subcategory of NP that is appropriate in context.
( (IP-MAT (NP-SBJ-DATE ...)
          (VP (BED is)
	      (NP-PRD (D a)
	      	      (N holiday)))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (NP-POS (N$ Today's))
                  (N date))
          (VP (BED is)
	      (NP-PRD-DATE ...))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO He))
          (VP (BED was)
	      (VP (VAN born)
                  (NP-TMP-DATE ...)))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO He))
          (VP (BED was)
	      (VP (VAN born)
                  (PP (P on)
		      (NP-DATE ...))))
	  (PUNC .)))

The internal structure of dates is generally treated as a list of two or three components (the month, and the day or year or both). The internal structure of the list components follows general principles. In the simplest case, the list consists of three sister NPs dominated by NP-DATE.

( (NP-DATE (NP (NPR January))
	   (NP (D the)
	       (ADJP (ADJ second)))
	   (NP (NUMP (NUM-COMP (NUM nineteen) (NUM fourteen))))))

( (NP-DATE (NP (NPR January))
	   (NP (ADJP (ADJ second)))

( (NP-DATE (NP (NUMP (NUM-COMP (NUM nineteen) (NUM fourteen))))
	   (NP (NPR January))
	   (NP (ADJP (ADJ second)))))

( (NP-DATE (NP (D the)
               (ADJP (ADJ first))
	       (N month))
	   (NP (D the)
	       (ADJP (ADJ last))					← LAST rather than ordinary ordinal
	       (N day))
	   (NP (NUMP (NUM fourteen)))))
	   
( (NP-DATE (NP (NPR January))
	   (NP (NUMP (NUM two)))					← cardinal (NUM) rather than ordinal (ADJ)

( (NP-DATE (NP (NUMP (NUM One)))
           (NP (NUMP (NUM two)))
	   (NP (NUMP (NUM-COMP (NUM nineteen) (NUM fourteen))))))

Dates are not always expressed as canonical lists, in which case they are annotated according to general principles as far as possible. Note the different attachment levels of PPs

( (NP-DATE (NP (D the)
	       (ADJP (ADJ second))
	       (N day)
	       (PP (P of)
		   (NP (NPR January))))
	   (PP (P in)							← IN phrase attaches high always
	       (NP (NUMP (NUM fourteen))))))

( (NP-DATE (NP (NPR January))
	   (NP (D the)
	       (ADJP (ADJ second)))
	   (PP (P of)							← OF phrase attaches high
	       (NP (NUMP (NUM fourteen)))))))


( (NP-DATE (D the)							← no unary-branching NP under NP-DATE
	   (ADJP (ADJ second))
	   (N day)
	   (PP (P of)
	       (NP (NPR January)
		   (PP (P of)						← OF phrase attaches low
		       (NP (NUMP (NUM fourteen))))))))

Exocentric constructions

Numbers

Cardinal numbers are tagged as
NUM. Ordinal numbers are tagged as ADJ.

Numbers up to 100 are treated as single orthographic words and tagged as simple numerals (NUM) rather than as compound numerals (NUM-COMP).

( (NUMP (NUM seventy-nine)))

Otherwise, multi-word linguistic expressions that refer to the natural or real numbers and that could be replaced by the numbers themselves in context are treated as compound numerals and enclosed by NUM-COMP.

( (NUMP (NUM-COMP (NUM seven)				cf. "700"
		  (NUM hundred))))	

( (NUMP (NUM-COMP (NUM nine)
		  (NUM hundred)
		  (NUM eighy-one))))			cf. "981"

For simplicity, all daughters of NUM-COMP are tagged NUM, regardless of whether they are ordinarily so tagged.

( (NP (NUMP (NUM-COMP (NUM a)				cf. "100"
		      (NUM hundred)))
      (NS dollars)))

( (NP (NUMP (NUM-COMP (NUM nineteen)			cf. "1906"
		      (NUM O)
		      (NUM six)))))

( (NP (NUMP (NUM-COMP (NUM point)			cf. "0.38"
		      (NUM three)
		      (NUM eight)))))

NUM (and NUM-COMP) is used for ordinary cardinal numbers as well as for other uses of numbers, such as naming years. The usual convention applies of NUMP not being able to function as a complement without a higher NP.

( (PP (P in)
      (NP (NUMP (NUM-COMP (NUM eighteen)		cf. "1864"
			  (NUM sixty-four))))))

Fractions bearing nominal inflectional morphology are treated as nouns. HALF is treated separately.

( (NP (NS Three-eighths)				despite ".375"
      (PP (P of)
	  (NP (D the) (N total)))))

Expressions like COUPLE, DOZEN, SCORE and so forth are treated as NUM or N depending on whether they bear ordinary nominal morphology and whether it is natural to replace them with a natural number. In unclear cases, the default for these words is N.

( (NP (NUMP (NUM-COMP (NUM a) (NUM dozen)))		cf. "12"
      (NS eggs)))

( (NP (D a)
      (N-COMP (N$ baker's) (N dozen))))

( (NP (NP-MSR (D a)
	      (ADJP (ADJ full))
	      (N dozen))
      (NS eggs)))

( (ADVP-TMP (NP-MSR (NUMP (NUM-COMP (NUM four)		cf. "87"
				    (NUM score)
				    (NUM and)
				    (NUM seven)))
		    (NS years))
	    (ADV ago)))

( (NP (NS scores)
      (PP (P of)
	  (NP (NS cases)))))

Complex expressions that function as NUMP are treated as exocentric constructions. In other words, the expression is annotated with the structure it would ordinarily have and then simply enclosed in NUMP brackets.

( (NP (NUMP (P over) (D a))			← minimum of structure for difficult case
      (N year)))

( (NP (NUMP (PP (P over)
		(NP (NUMP (NUM five)))))
      (NS widgets)))

( (NP (NUMP (PP (RP up)
      	    	(P to)
		(NP (NUMP (NUM five)))))
      (NS widgets)))

( (NP (NUMP (PP (P between)
      	    	(NP (NUMP (NUMP (NUM two))
			  (CONJP (CONJ and)
				 (NUMP (NUM five)))))))
      (NS widgets)))

( (NP (NUMP (NP-MSR (NP (NUMP (NUM two)))
		    (PP (P to)
			(NP (NUMP (NUM five))))))
      (NS widgets)))

( (NP (NUMP (NP-MSR (PP (P from)
			(NP (NUMP (NUM two))))
		    (PP (P to)
			(NP (NUMP (NUM five))))))
      (NS widgets)))

( (NP (NUMP (ADVP (ADV anywhere))
	    (NP-MSR (PP (P from)
			(NP (NUMP (NUM two))))
		    (PP (P to)
			(NP (NUMP (NUM five))))))
      (NS widgets)))

Scalar constructions

Expressions containing a measure phrase that is understood as modifying a dimension (= expressing a point on a scale) are ordinarily headed by an adjective or adverb expressing the dimension. The following is an exhaustive list of the scalar heads in question (their comparative and superlative forms belong as well).
Spatial: DEEP, DISTANT, FAR, HIGH, LONG, TALL, WIDE
Temporal: LATE, OLD
Other: WORTH
( (ADVP (NP-MSR (D a) (N bit))
        (ADVR later)))				← head of scalar construction = ADVR

( (ADJP (NP-MSR (NUMP (NUM thirty-six))
                (NS inches))
        (ADJ long)))				← head of scalar construction = ADJ

( (ADJP (NP-MSR (NUMP (NUM twelve))
                (NS years))
        (ADJ old)))				← head of scalar construction = ADJ

( (ADJP (NP-MSR (D a) (N foot))
        (ADJR wider)))				← head of scalar construction = ADJR

The adjectival or adverbial head of dimension constructions is not always overtly expressed, and the same goes for the nominal head of the measure phrase. In such cases, empty heads are added as necessary in order to assimilate these structures to their fully spelled-out counterparts.

( (IP-MAT (NP-SBJ (PRO She))
          (VP (BEP is)
	      (ADJP-PRD (NP-MSR (NUMP (NUM twelve))
				(NS 0))
			(ADJ 0)))
	  (PUNC .)))

In addition to the measure phrase, the (possibly silent) scalar head may be further modified.

( (ADJP (NP-MSR (NUMP (NUM two))
		(NS miles))
	(ADJ 0)						0 = FAR / DISTANT
	(PP (P from)
	    (NP (N home)))))

( (ADJP (NP-MSR (NUMP (NUM thirty-six))
		(NS inches))
	(ADJ 0)						0 = LONG
	(ADVP (ADV around))				← AROUND is not a scalar term; hence, not the head
	(PP (P in)
	    (NP (N circumference)))))

When the dimension is expressed by a scalar noun, as in the following cases, the entire scalar construction is annotated as a noun phrase, with a dash tag appropriate to the syntactic context.

( (NP (NUMP (NUM thirty-six))
      (NS years)					← head of dimension construction = NS
      (PP (P of)
	  (NP (N age)))))				← dimension expressed, but not as ADJ

( (PP (P from)
      (NP (NUMP (NUM thirty-six))
	  (NS years)
	  (PP (P of)
	      (NP (N age))))))

( (IP-MAT (NP-SBJ (PRO It))
	  (VP (BEP is)
	      (NP-PRD (NUMP (NUM thirty-six))
		      (NS inches)			← head of dimension construction = NS
		      (PP (P in)
			  (NP (N length)))))		← dimension expressed, but not as ADJ
	  (PUNC .)))

The integration of scalar constructions into larger constituents follows general principles.

( (NP (D a)
      (N child)
      (ADJP (NP-MSR (NUMP (NUM eight))			← postnominal ADJP
	                (NS years))
                (ADJ old))))				← head of dimension construction = ADJ

( (NP (D a)
      (N child)
      (PP (P of)					← postnominal PP
	  (ADJP (NP-MSR (NUMP (NUM eight))
	                (NS years))
                (ADJ old)))))				← head of dimension construction = ADJ

( (NP (D a)
      (N child)
      (PP (P of)
          (ADJP (NP-MSR (NUMP (NUM eight))
			(NS 0))
		(ADJ 0)))))

( (NP (D a)
      (N child)
      (IP-RRC (NP-PRD (NUMP (NUM eight))		← postnominal NP integrated via reduced relative clause
      	      	      (NS years) 			← head of dimension construction = NS
		      (PP (P of)
		      	  (NP (N age)))))))

In scalar constructions, WORTH is always tagged as ADJ and then treated like other scalar heads. AS is more generally the case, the measure phrase can be expressed by a morphological possessive.

( (ADJP (ADJ worth)
        (NP-MSR (PRO it))))

( (ADJP (ADJ worth)
        (NP-MSR (NUMP (NUM twenty))
		(NS dollars))))

( (ADJP (ADJ worth)
        (NP-MSR (D a)
		(N dollar))))

( (ADJP (NP-MSR (D a)
		(N$ dollar's))
	(ADJ worth)))

When WORTH is modified by clauses functioning as measure expressions, the NP-MSR around the clause is omitted for simplicity. ( (ADJP (ADJ worth) (IP-PPL (VP (DAG doing) (ADVP (ADV well)))))) ( (ADJP (ADJ worth) (CP-EOP (WNP-1 0) (IP-PPL (VP (VAG waiting) (PP (P for) (NP *T*-1)))))))

The following examples illustrate the integration of adjective phrases headed by WORTH into larger structures. Where necessary, the ADJP is treated as modifying a silent nominal head.

( (NP (N merchandise)
      (ADJP (ADJ worth)				← postnominal ADJP
            (NP-MSR (NUMP (NUM twenty))
		    (NS dollars)))))

( (NP (ADJP (NP-MSR (D a)			← NP headed by silent head
		    (N$ dollar's))
	    (ADJ worth))			
      (PP (P of)
          (NP (N merchandise)))))

( (VP (VB buy)
      (NP-OB1 (ADJP (NP-MSR (D a)		← NP headed by silent head
			    (N$ dollar's))
		    (ADJ worth)))))