[RavenclawDev 279] [922] Tools/MakeLM/makelm.pl: 1) better options handling with getopt
tk@edam.speech.cs.cmu.edu
tk at edam.speech.cs.cmu.edu
Mon Jun 11 12:40:16 EDT 2007
An HTML attachment was scrubbed...
URL: http://mailman.srv.cs.cmu.edu/pipermail/ravenclaw-developers/attachments/20070611/3dbeec83/attachment.html
-------------- next part --------------
Modified: Tools/MakeLM/makelm.pl
===================================================================
--- Tools/MakeLM/makelm.pl 2007-06-10 03:22:17 UTC (rev 921)
+++ Tools/MakeLM/makelm.pl 2007-06-11 16:40:15 UTC (rev 922)
@@ -7,65 +7,70 @@
use File::stat;
use IO::Handle;
use IPC::Open2;
+use Getopt::Long;
$ENV{'LC_COLLATE'} = 'C';
$ENV{'LC_ALL'} = 'C';
use locale;
+use strict;
+sub usage {
+ return "usage: $0 [--resourcesdir {resources directory}] [--samplesize {sample size}] [--source {source}] [--projectname {project name}]$/";
+}
+
+#open log file
my $LOGFILE = 'log.txt';
open(LOG, ">$LOGFILE") if $LOGFILE;
+#need access to cygwin dir so that cygwin1.dll can be found
$ENV{'Path'} .= ($ENV{'CYGWIN_DIR'} || 'C:\cygwin').'\bin;';
+#setup default vaiables
+#needs to run from makelm directory
+my $MAKELMDIR = File::Spec->rel2abs(File::Spec->curdir);
my $RESOURCESDIR = File::Spec->catdir(File::Spec->updir, File::Spec->updir, 'Resources');
#my $SOURCE = 'fife';
my $SOURCE = 'lexdata';
my $SAMPSIZE = 30000;
-while($ARGV[0] =~ /^-/) {
- $_ = shift;
- if($_ eq "-resourcesdir") {
- $RESOURCESDIR = shift;
- } elsif($_ eq "-samplesize") {
- $SAMPSIZE = shift;
- } elsif($_ eq "-source") {
- $SOURCE = shift;
- }
+my $PROJECT;
+{
+ #guess the project name from the file name of the project directory
+ my @dir = File::Spec->splitdir((File::Spec->splitpath($MAKELMDIR))[1]);
+ $PROJECT = $dir[$#dir-2];
}
-fail(&usage) if scalar(@ARGV) < 1;
-my $PROJECT = shift;
-&say('system', "project => $PROJECT, resource dir => $RESOURCEDIR, sample size => $SAMPSIZE");
+#process command line
+GetOptions("resourcesdir=s", \$RESOURCESDIR,
+ "samplesize=s", \$SAMPSIZE,
+ "source=s", \$SOURCE,
+ "projectname=s", \$PROJECT);
+fail(&usage) if @ARGV;
+
+&say('system', "project => $PROJECT$/resource dir => $RESOURCESDIR$/sample size => $SAMPSIZE$/source => $SOURCE$/");
+
my $PRONOUNCE = 'pronounce';
my $LEXDATA = 'lexdata';
-$MAKELMDIR = File::Spec->rel2abs(File::Spec->curdir);
-$GRAMMARDIR = File::Spec->catdir($RESOURCESDIR, 'Grammar');
-$GRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'.gra');
-$FLATGRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'flat.gra');
-$CORPUS = 'tempfile';
-$BASEDIC = File::Spec->catfile($GRAMMARDIR, 'base.dic');
-$VOCAB = 'vocab';
-$CCS = 'temp.ccs';
-$PHOENIX = File::Spec->catfile($GRAMMARDIR, 'compile.exe');
-$TEXT2IDNGRAM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'text2idngram');
-$IDNGRAM2LM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'idngram2lm');
-$RANDOMSAMPS = 'generate_random_samples.pl';
-$IDNGRAM = $PROJECT.'.idngram';
-$DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig');
-$LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa');
-$DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary');
-$HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict');
-$DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict');
-$REDUCED_DICT = $DICT.'.reduced_phoneset';
+my $GRAMMARDIR = File::Spec->catdir($RESOURCESDIR, 'Grammar');
+my $GRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'.gra');my $FLATGRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'flat.gra');
+my $CORPUS = 'tempfile';
+my $BASEDIC = File::Spec->catfile($GRAMMARDIR, 'base.dic');
+my $VOCAB = 'vocab';
+my $CCS = 'temp.ccs';
+my $PHOENIX = File::Spec->catfile($GRAMMARDIR, 'compile.exe');
+my $TEXT2IDNGRAM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'text2idngram');
+my $IDNGRAM2LM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'idngram2lm');
+my $RANDOMSAMPS = 'generate_random_samples.pl';
+my $IDNGRAM = $PROJECT.'.idngram';
+my $DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig');
+my $LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa');
+my $DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary');
+my $HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict');
+my $DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict');
+my $REDUCED_DICT = $DICT.'.reduced_phoneset';
&say('compile', 'compiling grammar...');
chdir($GRAMMARDIR);
-if (!-e "${PROJECT}Task.gra") {
- open(TEMPLATE, "${PROJECT}Task-template.gra") || die "Can't open template";
- open(TASK, ">${PROJECT}Task.gra") || die "Can't open task";
- print TASK grep(!/%%/, <TEMPLATE>);
- close TASK; close TEMPLATE;
-}
-system('cmp.bat');
+do 'cmp.pl' || system 'cmp.bat';
&say('compile', 'compiling language model...');
chdir($MAKELMDIR);
&say('compile', 'generating corpus...');
@@ -88,10 +93,6 @@
close(LOG) if $LOGFILE;
exit;
-sub usage {
- return "usage: $0 [-resourcesdir {resources directory}] [-samplesize {sample size}] [-source {source}] {project name}$/";
-}
-
sub getcorpus {
my $corpusfile = shift;
#flatten grammar
@@ -238,7 +239,7 @@
&fail("Need the corpus location as an argument") if !$corpus;
&fail("Need the source") if !$SOURCE;
- if($source eq 'fife') { #source is web form
+ if($SOURCE eq 'fife') { #source is web form
return getdicFife($corpus);
} else {
return getdicLocal($corpus);
More information about the Ravenclaw-developers
mailing list