From tk at edam.speech.cs.cmu.edu Sat Jun 9 23:22:17 2007 From: tk at edam.speech.cs.cmu.edu (tk@edam.speech.cs.cmu.edu) Date: Sat, 9 Jun 2007 23:22:17 -0400 Subject: [RavenclawDev 278] [921] Tools/MakeLM/makelm.pl: Added facility for makelm. pl to incorporate a hand dictionary. Message-ID: <200706100322.l5A3MHRF029408@edam.speech.cs.cmu.edu> An HTML attachment was scrubbed... URL: http://mailman.srv.cs.cmu.edu/pipermail/ravenclaw-developers/attachments/20070609/99728a70/attachment.html -------------- next part -------------- Modified: Tools/MakeLM/makelm.pl =================================================================== --- Tools/MakeLM/makelm.pl 2007-06-08 19:04:20 UTC (rev 920) +++ Tools/MakeLM/makelm.pl 2007-06-10 03:22:17 UTC (rev 921) @@ -52,7 +52,9 @@ $IDNGRAM = $PROJECT.'.idngram'; $DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig'); $LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa'); -$DICT = File::Spec->catfile($DECODERCONFIGDIR, 'Dictionary', $PROJECT.'.dict'); +$DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary'); +$HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict'); +$DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict'); $REDUCED_DICT = $DICT.'.reduced_phoneset'; &say('compile', 'compiling grammar...'); @@ -245,7 +247,11 @@ sub getdicLocal { my ($pin, $pout) = (IO::Handle->new, IO::Handle->new); - my $ppid = open2($pout, $pin, $PRONOUNCE, '-d', $LEXDATA, '-D', 'cmudict.0.6d_SPHINX_NEW', '-P') + my @pronounce_args = ('-P', + '-d', $LEXDATA, + '-D', 'cmudict.0.6d_SPHINX_NEW'); + push (@pronounce_args, '-H', $HAND_DICT) if -e $HAND_DICT; + my $ppid = open2($pout, $pin, $PRONOUNCE, @pronounce_args) || die "Couldn't run pronouniation server: '$PRONOUNCE'"; open(GRAMSTREAM, $_[0]) || die "Can't open gramstream: '$_[0]'"; From tk at edam.speech.cs.cmu.edu Mon Jun 11 12:40:16 2007 From: tk at edam.speech.cs.cmu.edu (tk@edam.speech.cs.cmu.edu) Date: Mon, 11 Jun 2007 12:40:16 -0400 Subject: [RavenclawDev 279] [922] Tools/MakeLM/makelm.pl: 1) better options handling with getopt Message-ID: <200706111640.l5BGeGbG000966@edam.speech.cs.cmu.edu> An HTML attachment was scrubbed... URL: http://mailman.srv.cs.cmu.edu/pipermail/ravenclaw-developers/attachments/20070611/3dbeec83/attachment.html -------------- next part -------------- Modified: Tools/MakeLM/makelm.pl =================================================================== --- Tools/MakeLM/makelm.pl 2007-06-10 03:22:17 UTC (rev 921) +++ Tools/MakeLM/makelm.pl 2007-06-11 16:40:15 UTC (rev 922) @@ -7,65 +7,70 @@ use File::stat; use IO::Handle; use IPC::Open2; +use Getopt::Long; $ENV{'LC_COLLATE'} = 'C'; $ENV{'LC_ALL'} = 'C'; use locale; +use strict; +sub usage { + return "usage: $0 [--resourcesdir {resources directory}] [--samplesize {sample size}] [--source {source}] [--projectname {project name}]$/"; +} + +#open log file my $LOGFILE = 'log.txt'; open(LOG, ">$LOGFILE") if $LOGFILE; +#need access to cygwin dir so that cygwin1.dll can be found $ENV{'Path'} .= ($ENV{'CYGWIN_DIR'} || 'C:\cygwin').'\bin;'; +#setup default vaiables +#needs to run from makelm directory +my $MAKELMDIR = File::Spec->rel2abs(File::Spec->curdir); my $RESOURCESDIR = File::Spec->catdir(File::Spec->updir, File::Spec->updir, 'Resources'); #my $SOURCE = 'fife'; my $SOURCE = 'lexdata'; my $SAMPSIZE = 30000; -while($ARGV[0] =~ /^-/) { - $_ = shift; - if($_ eq "-resourcesdir") { - $RESOURCESDIR = shift; - } elsif($_ eq "-samplesize") { - $SAMPSIZE = shift; - } elsif($_ eq "-source") { - $SOURCE = shift; - } +my $PROJECT; +{ + #guess the project name from the file name of the project directory + my @dir = File::Spec->splitdir((File::Spec->splitpath($MAKELMDIR))[1]); + $PROJECT = $dir[$#dir-2]; } -fail(&usage) if scalar(@ARGV) < 1; -my $PROJECT = shift; -&say('system', "project => $PROJECT, resource dir => $RESOURCEDIR, sample size => $SAMPSIZE"); +#process command line +GetOptions("resourcesdir=s", \$RESOURCESDIR, + "samplesize=s", \$SAMPSIZE, + "source=s", \$SOURCE, + "projectname=s", \$PROJECT); +fail(&usage) if @ARGV; + +&say('system', "project => $PROJECT$/resource dir => $RESOURCESDIR$/sample size => $SAMPSIZE$/source => $SOURCE$/"); + my $PRONOUNCE = 'pronounce'; my $LEXDATA = 'lexdata'; -$MAKELMDIR = File::Spec->rel2abs(File::Spec->curdir); -$GRAMMARDIR = File::Spec->catdir($RESOURCESDIR, 'Grammar'); -$GRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'.gra'); -$FLATGRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'flat.gra'); -$CORPUS = 'tempfile'; -$BASEDIC = File::Spec->catfile($GRAMMARDIR, 'base.dic'); -$VOCAB = 'vocab'; -$CCS = 'temp.ccs'; -$PHOENIX = File::Spec->catfile($GRAMMARDIR, 'compile.exe'); -$TEXT2IDNGRAM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'text2idngram'); -$IDNGRAM2LM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'idngram2lm'); -$RANDOMSAMPS = 'generate_random_samples.pl'; -$IDNGRAM = $PROJECT.'.idngram'; -$DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig'); -$LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa'); -$DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary'); -$HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict'); -$DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict'); -$REDUCED_DICT = $DICT.'.reduced_phoneset'; +my $GRAMMARDIR = File::Spec->catdir($RESOURCESDIR, 'Grammar'); +my $GRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'.gra');my $FLATGRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'flat.gra'); +my $CORPUS = 'tempfile'; +my $BASEDIC = File::Spec->catfile($GRAMMARDIR, 'base.dic'); +my $VOCAB = 'vocab'; +my $CCS = 'temp.ccs'; +my $PHOENIX = File::Spec->catfile($GRAMMARDIR, 'compile.exe'); +my $TEXT2IDNGRAM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'text2idngram'); +my $IDNGRAM2LM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'idngram2lm'); +my $RANDOMSAMPS = 'generate_random_samples.pl'; +my $IDNGRAM = $PROJECT.'.idngram'; +my $DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig'); +my $LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa'); +my $DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary'); +my $HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict'); +my $DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict'); +my $REDUCED_DICT = $DICT.'.reduced_phoneset'; &say('compile', 'compiling grammar...'); chdir($GRAMMARDIR); -if (!-e "${PROJECT}Task.gra") { - open(TEMPLATE, "${PROJECT}Task-template.gra") || die "Can't open template"; - open(TASK, ">${PROJECT}Task.gra") || die "Can't open task"; - print TASK grep(!/%%/,