Automatically assigned DDC number: 006454
Manually assigned DDC number: 00635
Title: Statistical Identification of Language
Author:
Subject: Ted Dunning Statistical Identification of Language
Description: A statistically based program has been written which learns to distinguish between languages. The amount of training text that such a program needs is surprisingly small, and the amount of text needed to make an identification is also quite small. The program incorporates no linguistic presuppositions other than the assumption that text can be encoded as a string of bytes. Such a program can be used to determine which language small bits of text are in. It also shows a potential for what might be called `statistical philology' in that it may be applied directly to phonetic transcriptions to help elucidate family trees among language dialects. A variant of this program has been shown to be useful as a quality control in biochemistry. In this application, genetic sequences are assumed to be expressions in a language peculiar to the organism from which the sequence is taken. Thus language identification becomes species identification. Introduction Given the following 20 character strin...
Contributor: The Pennsylvania State University CiteSeer Archives
Publisher: unknown
Date: 1995-12-22
Pubyear: 1994
Format: ps
Identifier: http://citeseer.ist.psu.edu/140384.html
Source: http://www.comp.lancs.ac.uk/computing/users/paul/ucrel/papers/lingdet.ps
Language: en
Rights: unrestricted
<?xml version="1.0" encoding="UTF-8"?>
<references_metadata>
<rec ID="SELF" Type="SELF" CiteSeer_Book="SELF" CiteSeer_Volume="SELF" Title="Statistical Identification of Language">
<identifier Org="ISBN:0262133601" Paper_ID="SELF" Extracted="0262133601" DDC="410/.285" Normalized_DDC="410285" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:038725482X" Paper_ID="SELF" Extracted="038725482X" DDC="006.454" Normalized_DDC="006454" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:0818678984" Paper_ID="SELF" Extracted="0818678984" DDC="006.4/2" Normalized_DDC="00642" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:1402012160" Paper_ID="SELF" Extracted="1402012160" DDC="025.04" Normalized_DDC="02504" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:1581133200" Paper_ID="SELF" Extracted="1581133200" />
<identifier Org="ISBN:3486581724" Paper_ID="SELF" Extracted="3486581724" DDC="025.04" Normalized_DDC="02504" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:3540234985" Paper_ID="SELF" Extracted="3540234985" DDC="006.35" Normalized_DDC="00635" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:3540321403" Paper_ID="SELF" Extracted="3540321403" />
<identifier Org="ISBN:3540419330" Paper_ID="SELF" Extracted="3540419330" DDC="005.74" Normalized_DDC="00574" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:3540487123" Paper_ID="SELF" Extracted="3540487123" DDC="658.4'038011 22 22" Normalized_DDC="65840380112222" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:3540652590" Paper_ID="SELF" Extracted="3540652590" DDC="418/.02/0285" Normalized_DDC="418020285" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:3540675892" Paper_ID="SELF" Extracted="3540675892" DDC="001/.01/2" Normalized_DDC="001012" Normalized_Weight="0.09090909090909091" />
<identifier Org="ISBN:3540688218" Paper_ID="SELF" Extracted="3540688218" />
<identifier Org="ISBN:3540747818" Paper_ID="SELF" Extracted="3540747818" />
<identifier Org="ISBN:3540851097" Paper_ID="SELF" Extracted="3540851097" />
<identifier Org="ISBN:3642003818" Paper_ID="SELF" Extracted="3642003818" />
<identifier Org="ISBN:9042009438" Paper_ID="SELF" Extracted="9042009438" DDC="410.285" Normalized_DDC="410285" Normalized_Weight="0.09090909090909091" />
</rec>
</references_metadata>