Automatically assigned DDC number: 00635
Manually assigned DDC number: 00635
Number of references: 8
Title: Unsupervised Learning of Subcategorisation Information and Its Application in a Parsing Subtask
Author:
Subject: Sabine Buchholz Unsupervised Learning of Subcategorisation Information and Its Application in a Parsing Subtask
Description: This paper is about two aspects of subcategorisation in NLP. First, it is about the automatic extraction of subcategorisation information from corpora. More specifically, we are concerned with unsupervised learning of subcategorisation information from tagged text by means of hierarchical clustering. The second aspect of the paper is the usage of this subcategorisation information for parsing, especially for the distinction between complements and adjuncts. We show that the information learned by unsupervised clustering can be exploited by a memory-based learner, to improve upon the complement-adjunct distinction. We compare the improvement gained by the use of this unsupervised information (1%) to that of different representations of subcategorisation information extracted from the tree-bank annotation (maximum 1.5%). The unsupervised information thus achieves two thirds of the improvement that can be obtained from the hand-crafted treebank information. 1 1 Introduction Subcategoris...
Contributor: The Pennsylvania State University CiteSeer Archives
Publisher: unknown
Date: 1998-11-30
Format: ps
Identifier: http://citeseer.ist.psu.edu/168744.html
Source: ftp://ilk.kub.nl/pub/papers/ilk.9811.ps.gz
Language: en
Relation:
Relation:
Relation:
Relation:
Relation:
Relation:
Relation:
Relation:
Rights: unrestricted
<?xml version="1.0" encoding="UTF-8"?>
<references_metadata>
<rec ID="/569344.html" Type="inproceedings" CiteSeer_Book="ACL Proceedings 25th Annual Meeting" CiteSeer_Volume="" Title="The derivation of grammatically indexed lexicon from the Longman Dictionary of Contemporary English," />
<rec ID="/552546.html" Type="incollection" CiteSeer_Book="Proceedings of the Second Conference on Empirical Methods in Natural Language Processing" CiteSeer_Volume="" Title="Tagging Grammatical Functions,">
<identifier Org="ISBN:1402013345" Paper_ID="/552546.html" Extracted="1402013345" DDC="415" Normalized_DDC="415" Normalized_Weight="0.16666666666666666" />
<identifier Org="ISBN:140202293X" Paper_ID="/552546.html" Extracted="140202293X" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.16666666666666666" />
<identifier Org="ISBN:3110176157" Paper_ID="/552546.html" Extracted="3110176157" DDC="410" Normalized_DDC="41" Normalized_Weight="0.16666666666666666" />
<identifier Org="ISBN:3540230491" Paper_ID="/552546.html" Extracted="3540230491" DDC="006.35" Normalized_DDC="00635" Normalized_Weight="0.16666666666666666" />
<identifier Org="ISBN:3540245235" Paper_ID="/552546.html" Extracted="3540245235" DDC="025.04" Normalized_DDC="02504" Normalized_Weight="0.16666666666666666" />
<identifier Org="ISBN:8483382822" Paper_ID="/552546.html" Extracted="8483382822" />
<identifier Org="ISBN:9027249911" Paper_ID="/552546.html" Extracted="9027249911" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.16666666666666666" />
</rec>
<rec ID="/582398.html" Type="misc" CiteSeer_Book="" CiteSeer_Volume="" Title="Automatic Extraction of Subcategorization from Corpora,">
<identifier Org="ISBN:0130950696" Paper_ID="/582398.html" Extracted="0130950696" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0199292345" Paper_ID="/582398.html" Extracted="0199292345" DDC="413.028" Normalized_DDC="413028" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0824790006" Paper_ID="/582398.html" Extracted="0824790006" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:140202293X" Paper_ID="/582398.html" Extracted="140202293X" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:1558607862" Paper_ID="/582398.html" Extracted="1558607862" />
<identifier Org="ISBN:1588111563" Paper_ID="/582398.html" Extracted="1588111563" DDC="415" Normalized_DDC="415" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:1860672302" Paper_ID="/582398.html" Extracted="1860672302" />
<identifier Org="ISBN:354000680X" Paper_ID="/582398.html" Extracted="354000680X" DDC="006.3/3" Normalized_DDC="00633" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540214593" Paper_ID="/582398.html" Extracted="3540214593" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540240179" Paper_ID="/582398.html" Extracted="3540240179" DDC="025.04" Normalized_DDC="02504" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540244751" Paper_ID="/582398.html" Extracted="3540244751" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540287892" Paper_ID="/582398.html" Extracted="3540287892" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540335870" Paper_ID="/582398.html" Extracted="3540335870" DDC="005.1" Normalized_DDC="0051" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540425578" Paper_ID="/582398.html" Extracted="3540425578" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540650687" Paper_ID="/582398.html" Extracted="3540650687" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:354078134X" Paper_ID="/582398.html" Extracted="354078134X" DDC="005.52" Normalized_DDC="00552" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:382336099X" Paper_ID="/582398.html" Extracted="382336099X" DDC="401/.43" Normalized_DDC="40143" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:8483382822" Paper_ID="/582398.html" Extracted="8483382822" />
</rec>
<rec ID="/46270.html" Type="misc" CiteSeer_Book="" CiteSeer_Volume="" Title="Distinguishing complements from adjuncts using memory-based learning,">
<identifier Org="ISBN:0521808901" Paper_ID="/46270.html" Extracted="0521808901" DDC="006.35" Normalized_DDC="00635" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:3540660445" Paper_ID="/46270.html" Extracted="3540660445" DDC="006.3/31" Normalized_DDC="006331" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:9027249911" Paper_ID="/46270.html" Extracted="9027249911" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:9042005998" Paper_ID="/46270.html" Extracted="9042005998" />
<identifier Org="ISBN:9042006099" Paper_ID="/46270.html" Extracted="9042006099" />
</rec>
<rec ID="/585525.html" Type="misc" CiteSeer_Book="" CiteSeer_Volume="" Title="Can subcategorisation probabilities help a statistical parser,">
<identifier Org="ISBN:0792366166" Paper_ID="/585525.html" Extracted="0792366166" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:1402013345" Paper_ID="/585525.html" Extracted="1402013345" DDC="415" Normalized_DDC="415" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:140202293X" Paper_ID="/585525.html" Extracted="140202293X" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.3333333333333333" />
</rec>
<rec ID="/565817.html" Type="inproceedings" CiteSeer_Book="Proc ARPA Human Language Technology Workshop 93" CiteSeer_Volume="" Title="The Comlex Syntax Project,">
<identifier Org="ISBN:026206197X" Paper_ID="/565817.html" Extracted="026206197X" DDC="423/.1" Normalized_DDC="4231" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:3540662235" Paper_ID="/565817.html" Extracted="3540662235" DDC="006.3/3" Normalized_DDC="00633" Normalized_Weight="0.3333333333333333" />
<identifier Org="ISBN:9051993161" Paper_ID="/565817.html" Extracted="9051993161" DDC="005.1" Normalized_DDC="0051" Normalized_Weight="0.3333333333333333" />
</rec>
<rec ID="/580256.html" Type="inproceedings" CiteSeer_Book="Meeting of the Association for Computational Linguistics" CiteSeer_Volume="" Title="Automatic Acquisition of a Large Subcategorization Dictionary from Corpora,">
<identifier Org="ISBN:0130950696" Paper_ID="/580256.html" Extracted="0130950696" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:019927634X" Paper_ID="/580256.html" Extracted="019927634X" DDC="410.285" Normalized_DDC="410285" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:026206197X" Paper_ID="/580256.html" Extracted="026206197X" DDC="423/.1" Normalized_DDC="4231" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0262523388" Paper_ID="/580256.html" Extracted="0262523388" DDC="410/.1/5192" Normalized_DDC="41015192" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0262611228" Paper_ID="/580256.html" Extracted="0262611228" DDC="410.72" Normalized_DDC="41072" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0521592771" Paper_ID="/580256.html" Extracted="0521592771" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0792344634" Paper_ID="/580256.html" Extracted="0792344634" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0792354990" Paper_ID="/580256.html" Extracted="0792354990" DDC="415" Normalized_DDC="415" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:079236368X" Paper_ID="/580256.html" Extracted="079236368X" DDC="413/.028" Normalized_DDC="413028" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:0824790006" Paper_ID="/580256.html" Extracted="0824790006" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:1558607862" Paper_ID="/580256.html" Extracted="1558607862" />
<identifier Org="ISBN:3540250565" Paper_ID="/580256.html" Extracted="3540250565" DDC="006.3" Normalized_DDC="0063" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540425578" Paper_ID="/580256.html" Extracted="3540425578" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540441484" Paper_ID="/580256.html" Extracted="3540441484" DDC="006.4" Normalized_DDC="0064" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540590404" Paper_ID="/580256.html" Extracted="3540590404" DDC="418/.02/0285" Normalized_DDC="418020285" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3540609253" Paper_ID="/580256.html" Extracted="3540609253" DDC="006.3/5" Normalized_DDC="00635" Normalized_Weight="0.06666666666666667" />
<identifier Org="ISBN:3642008305" Paper_ID="/580256.html" Extracted="3642008305" />
</rec>
<rec ID="/190241.html" Type="misc" CiteSeer_Book="" CiteSeer_Volume="" Title="The Language Environment and Syntactic Word-Class Acquisition,">
<identifier Org="ISBN:0546649289" Paper_ID="/190241.html" Extracted="0546649289" />
<identifier Org="ISBN:3110113082" Paper_ID="/190241.html" Extracted="3110113082" DDC="401.4" Normalized_DDC="4014" Normalized_Weight="0.5" />
<identifier Org="ISBN:3540762639" Paper_ID="/190241.html" Extracted="3540762639" DDC="006.3/2" Normalized_DDC="00632" Normalized_Weight="0.5" />
</rec>
<rec ID="SELF" Type="SELF" CiteSeer_Book="SELF" CiteSeer_Volume="SELF" Title="Unsupervised Learning of Subcategorisation Information and Its Application in a Parsing Subtask" />
</references_metadata>