Syntactic topics


Amalgams

We treat amalgams as
exocentric constructions. Three common types discussed in the literature are listed and illustrated below, but amalgams occur beyond these types.

Like free relatives, amalgams are dominated by a phrasal category that indicates its function in the larger structure. They differ from free relatives in that that phrasal category immediately dominates IP-MAT rather than CP-FRL.

( (IP-MAT (NP-SBJ (PRO We))
	  (VP (VBD found)
	      (NP-OB1 (IP-MAT (NP-SBJ (PRO I))
			      (VP (VBP forget)
				  (CP-QUE-SUB (WNP-1 (WPRO what))
					      (IP-SUB (NP-SBJ-2 (PRO it@))
						      (VP (BEP @'s)
							  (VP (VAN called)
							      (IP-ECM (NP-SBJ *-2)
								      (NP-PRD *T*-1))))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO We))
	  (VP (VBD found)
	      (NP-OB1 (D a)
		      (N (IP-MAT (NP-SBJ (PRO I))
				 (VP (VBP forget@)
				     (CP-QUE-SUB (WNP-1 (WPRO what))
						 (IP-SUB (NP-SBJ-2 (PRO it@))
							 (VP (BEP @'s)
							     (VP (VAN called)
								 (IP-ECM (NP-SBJ *-2)
									 (NP-PRD *T*-1)))))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO We))
	  (VP (VBD found)
	      (NP-OB1 (IP-MAT (NP-SBJ (PRO I))
			      (VP (VBP forget)
				  (CP-QUE-SUB (WNP-1 (WPRO what))
					      (IP-SUB (NP-SBJ-2 (PRO we))
						      (VP (VBD found)
							  (NP-OB1 *T*-1))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO We))				← Andrews amalgam (with sluicing)
	  (VP (VBD found)
	      (NP-OB1 (IP-MAT (NP-SBJ (PRO I))
			      (VP (VBP forget)
				  (CP-QUE-SUB (WNP-x (WPRO what)))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO We))				← Andrews amalgam (with sluicing)
	  (VP (VBD stayed)
	      (NP-OB1 (IP-MAT (NP-SBJ (PRO I))
			      (VP (MD ca@)
				  (NEG @n't)
				  (VP (VB remember)
				      (CP-QUE-SUB (WADVP-x (WADVP (WADV how))
							   (ADV long))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO They))
	  (VP (VBD worshipped)
	      (NP-OB1 (IP-MAT (NP-SBJ (PRO I))
			      (VP (VBP think)
				  (CP-THT (C that / 0)
					  (IP-SUB (NP-SBJ (PRO it))
						  (VP (BED was)
						      (NP-PRD (D a)
							      (ADJP (ADJ golden))
							      (N calf))
						      (CP-CLF (WNP-1 (WPRO 0))
							      (C that / 0)
							      (IP-SUB (NP-SBJ (PRO they))
								      (VP (VBD worshipped)
									  (NP-OB1 *T*-1)))))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO They))				← Horn amalgam (with complete elision of CP-CLF)
	  (VP (VBD worshipped)
	      (NP-OB1 (IP-MAT (NP-SBJ (PRO I))
			      (VP (VBP think)
				  (CP-THT (C that / 0)
					  (IP-SUB (NP-SBJ (PRO it))
						  (VP (BED was)
						      (NP-PRD (D a)
							      (ADJP (ADJ golden))
							      (N calf)))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO They))				← Horn amalgam (with elision of CP-CLF)
	  (VP (VBD persisted)
	      (PP (P in)
		  (NP (D this)
		      (N behavior)))
	      (NP-MSR (IP-MAT (NP-SBJ (PRO it))
			      (VP (MD must)
				  (VP (HV have)
				      (VP (BEN been)
					  (PP (P for)
					      (NP (NS years)))))))))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (D That@))		← copular amalgam (second part of amalgam consists of CP-FRL subject and simple predicate)
	  (VP (BEP @'s)
	      (NP-PRD (IP-MAT (NP-SBJ (CP-FRL (WNP-1 (WPRO what))
					      (IP-SUB (NP-SBJ-2 (PRO it@))
						      (VP (BEP @'s)
							  (VP (VAN called)
							      (IP-ECM (NP-SBJ *T*-2)
								      (NP-PRD *T*-1)))))))
			      (VP (BEP is)
				  (NP-PRD (N horticulture))))))))

( (IP-MAT (NP-SBJ (D It@))		← copular amalgam (second part of amalgam consists of simple subject and CP-FRL predicate)
	  (VP (BEP @'s)
	      (NP-PRD (IP-MAT (NP-SBJ (D a)
				      (N problem))
			      (VP (BEP is)
				  (NP-PRD CP-FRL (WNP-1 (WPRO what))
					  (IP-SUB (NP-SBJ (PRO it))
						  (VP (BEP is)
						      (NP-PRD *T*-1))))))))))

Calendar dates

Calendar dates are defined (for purposes of annotation) as expressions containing reference to a month together with reference to a day of the month or a year (or both). They are indicated by DATE suffixed to the subcategory of NP that is appropriate in context.
( (IP-MAT (NP-SBJ-DATE ...)
          (VP (BED is)
	      (NP-PRD (D a)
	      	      (N holiday)))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (NP-POS (N$ Today's))
                  (N date))
          (VP (BED is)
	      (NP-PRD-DATE ...))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO He))
          (VP (BED was)
	      (VP (VAN born)
                  (NP-TMP-DATE ...)))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO He))
          (VP (BED was)
	      (VP (VAN born)
                  (PP (P on)
		      (NP-DATE ...))))
	  (PUNC .)))

The internal structure of dates is generally treated as a list of two or three components (the month, and the day or year or both). The internal structure of the list components follows general principles. In the simplest case, the list consists of three sister NPs dominated by NP-DATE.

( (NP-DATE (NP (NPR January))
	   (NP (D the)
	       (ADJP (ADJ second)))
	   (NP (NUMP (NUM-COMP (NUM nineteen) (NUM fourteen))))))

( (NP-DATE (NP (NPR January))
	   (NP (ADJP (ADJ second)))

( (NP-DATE (NP (NUMP (NUM-COMP (NUM nineteen) (NUM fourteen))))
	   (NP (NPR January))
	   (NP (ADJP (ADJ second)))))

( (NP-DATE (NP (D the)
               (ADJP (ADJ first))
	       (N month))
	   (NP (D the)
	       (ADJP (ADJ last))					← LAST rather than ordinary ordinal
	       (N day))
	   (NP (NUMP (NUM fourteen)))))
	   
( (NP-DATE (NP (NPR January))
	   (NP (NUMP (NUM two)))					← cardinal (NUM) rather than ordinal (ADJ)

( (NP-DATE (NP (NUMP (NUM One)))
           (NP (NUMP (NUM two)))
	   (NP (NUMP (NUM-COMP (NUM nineteen) (NUM fourteen))))))

Dates are not always expressed as canonical lists, in which case they are annotated according to general principles as far as possible. Note the different attachment levels of the PPs in the following examples.

( (NP-DATE (NP (D the)
	       (ADJP (ADJ second))
	       (N day)
	       (PP (P of)
		   (NP (NPR January))))
	   (PP (P in)							← IN phrase attaches high always
	       (NP (NUMP (NUM fourteen))))))

( (NP-DATE (NP (NPR January))
	   (NP (D the)
	       (ADJP (ADJ second)))
	   (PP (P of)							← OF phrase attaches high
	       (NP (NUMP (NUM fourteen)))))))


( (NP-DATE (D the)							← no unary-branching NP under NP-DATE
	   (ADJP (ADJ second))
	   (N day)
	   (PP (P of)
	       (NP (NPR January)
		   (PP (P of)						← OF phrase attaches low
		       (NP (NUMP (NUM fourteen))))))))

Conjunction and gapping

Degree and comparative constructions

Empty categories

Exocentric structures

Exocentric structures include, among others:

Exocentric structures have ordinary internal structure, but are (as far as we can tell) integrated into larger structures by brute force. For instance, free relative clauses have the internal structure of an ordinary wh- movement construction, but appear in contexts not ordinarily available to wh- clauses. We annotate the internal structure of exocentric structures according to general relevant principles and then simply add an appropriate label indicating the function that the exocentric structure serves in the larger structure.

( (IP-MAT (NP-SBJ (CP-FRL (WNP-1 (WD whatever) (N track))
			  (IP-SUB (NP-SBJ (D the) (N train))
				  (VP (VBP runs)
				      (PP (P on)
					  (NP *T*-1))))))
	  (VP (VBP needs)
	      (IP-INF (TO to)
		      (VP (BE be)
			  (VP (VAN repaired)))))
	  (PUNC .)))

( (VP (VB walk)
      (PP (P along)
	  (NP (CP-FRL (WNP-1 (WD whatever) (N track))
		      (IP-SUB (NP-SBJ (D the) (N train))
			      (VP (VBP runs)
				  (PP (P on)
				      (NP *T*-1)))))))))

Numbers

Numbers are transcribed as text, rather than with Arabic numerals. When zero is pronounced like the interjection "oh", the transcription can have either "oh" or capital "O", but not the digit "0" (which is a reserved character for
empty categories). The choice of "oh" vs. "O" should be consistent within a corpus.

Ordinal numbers are tagged as ADJ. Cardinal numbers that do not serve a counting function (more precisely, that do not count the head that the number is construed with) are also tagged as ADJ. For instance, when a cardinal number refers to the model year of a car, it is the year that is being counted (in a loose sense), but in any event, certainly not the car. Analogously for numbers referring to the gauge of firearms, the year a law was passed, and so on. Otherwise, cardinal numbers are tagged as NUM.

Multi-word expressions (including ones under 100) that refer to the natural or real numbers and that could be replaced in context by the numbers themselves are treated as compound numerals and enclosed by NUM-COMP.

( (NUMP (NUM-COMP (NUM seven)				cf. "700"
		  (NUM hundred))))	

( (NUMP (NUM-COMP (NUM nine)
		  (NUM hundred)
		  (NUM eighty-one))))			cf. "981"

For simplicity, all daughters of NUM-COMP are tagged NUM, regardless of whether they are ordinarily so tagged.

( (NP (NUMP (NUM-COMP (NUM a)				cf. "100"
		      (NUM hundred)))
      (NS dollars)))

( (NP (NUMP (NUM-COMP (NUM nineteen)			cf. "1906"
		      (NUM O)				← letter "O"
		      (NUM six)))))

( (NP (NUMP (NUM-COMP (NUM point)			cf. "0.38"
		      (NUM three)
		      (NUM eight)))))

NUM (and NUM-COMP) is used for ordinary cardinal numbers as well as for other uses of numbers, such as naming years. The usual convention applies of NUMP not being able to function as a complement without a higher NP.

( (PP (P in)
      (NP (NUMP (NUM-COMP (NUM eighteen)		cf. "1864"
			  (NUM sixty-four))))))

But when numbers are used as names of concrete entities, such as highways, mines, and so on, they are treated as proper nouns.

( (PP (P on)
      (NP (N-COMP (NPR US) (NPR Six) (NPR Seventy-six)))))

( (PP (P at)
      (NP (N-COMP (NPR Number) (NPR Two) (NPR Mines)))))

Fractions bearing nominal inflectional morphology are treated as nouns. HALF raises special issues that are discussed separately.

( (NP (NS Three-eighths)				despite ".375"
      (PP (P of)
	  (NP (D the) (N total)))))

Expressions like COUPLE, DOZEN, SCORE, and so forth are treated as NUM or N depending on whether they bear ordinary nominal morphology and whether it is natural to replace them with a natural number. The default for unclear cases is N.

( (NP (NUMP (NUM-COMP (NUM a) (NUM dozen)))		cf. "12"
      (NS eggs)))

( (NP (D a)
      (N-COMP (N$ baker's) (N dozen))))

( (NP (NP-MSR (D a)
	      (ADJP (ADJ full))
	      (N dozen))
      (NS eggs)))

( (ADVP-TMP (NP-MSR (NUMP (NUM-COMP (NUM four)		cf. "87"
				    (NUM score)
				    (NUM and)
				    (NUM seven)))
		    (NS years))
	    (ADV ago)))

( (NP (NS scores)
      (PP (P of)
	  (NP (NS cases)))))

Complex expressions that function as NUMP are treated as exocentric constructions. In other words, the expression is annotated with the structure it would ordinarily have and then simply enclosed in NUMP brackets.

( (NP (NUMP (P over) (D a))			← minimum of structure for difficult case
      (N year)))

( (NP (NUMP (PP (P over)
		(NP (NUMP (NUM five)))))
      (NS widgets)))

( (NP (NUMP (PP (RP up)
     	    	(P to)
		(NP (NUMP (NUM five)))))
      (NS widgets)))

( (NP (NUMP (PP (P between)
      	    	(NP (NUMP (NUMP (NUM two))
			  (CONJP (CONJ and)
				 (NUMP (NUM five)))))))
      (NS widgets)))

( (NP (NUMP (NP-MSR (NP (NUMP (NUM two)))
		    (PP (P to)
			(NP (NUMP (NUM five))))))
      (NS widgets)))

( (NP (NUMP (NP-MSR (PP (P from)
			(NP (NUMP (NUM two))))
		    (PP (P to)
			(NP (NUMP (NUM five))))))
      (NS widgets)))

( (NP (NUMP (ADVP (ADV anywhere))
	    (NP-MSR (PP (P from)
			(NP (NUMP (NUM two))))
		    (PP (P to)
			(NP (NUMP (NUM five))))))
      (NS widgets)))

Profanity

For specific lexical items, see
Individual words and phrases.

When required by the linguistic context (for instance, transitivity), profane expressions are annotated as fulfilling ordinary argument roles in a sentence.

( (VP (VB scare)
      (NP-OB1 (D the)
	      (N fuck / hell / shit))
      (PP (RP out)
	  (PP (P of)
	      (NP (Q+N somebody))))))

In other contexts, profane expressions are enclosed in INTJP brackets and attached as high as possible in the structure.

( (CP-QUE-MAT (WNP-1 (WPRO what))
	      (INTJP (NP (D the)
			 (N fuck / hell)))
	      (IP-SUB (NP-SBJ *T*-1)
		      (VP (ADVP-TMP (ADV just))
			  (VBN happened)))
	      (PUNC ?)))

( (IP-IMP (VP (VBI Let@)
	      (IP-ECM (NP-SBJ @'s)
		      (VP (GT get)
			  (INTJP (NP (D the)
				     (N fuck / hell)))
			  (PP (RP out)
			      (PP (P of)
				  (ADVP here))))))
	  (PUNC !)))

( (IP-MAT (INTJP (NP (D the)
		     (N fuck / hell)))
	  (NP-SBJ (PRO it))
	  (VP (BEP is))
	  (PUNC !)))

Titles (of books, movies, and so on)

Titles of books, movies, and so on are indicated with the dash tag -TTL. Very rarely, the syntactic context requires the title to be enclosed as a stand-alone TTL constituent.
( (IP-MAT (NP-SBJ (PRO We))
          (VP (VBD read)
	      (PUNC ")
              (NP-OB1-TTL (TO To) (VB Kill) (D a) (N Mockingbird))
	      (PUNC "))
	  (PUNC .)))

( (IP-MAT (NP-SBJ (PRO We))
          (VP (VBD read)
              (NP-OB1 (NP-POS (N-COMP (NPR Flannery) (NPR$ O'Connor's)))
		      (TTL (TO To) (VB Kill) (D a) (N Mockingbird))))
	  (PUNC .)))