[RavenclawDev 279] [922] Tools/MakeLM/makelm.pl: 1) better options handling with getopt

tk@edam.speech.cs.cmu.edu tk at edam.speech.cs.cmu.edu
Mon Jun 11 12:40:16 EDT 2007


An HTML attachment was scrubbed...
URL: http://mailman.srv.cs.cmu.edu/pipermail/ravenclaw-developers/attachments/20070611/3dbeec83/attachment.html
-------------- next part --------------
Modified: Tools/MakeLM/makelm.pl
===================================================================
--- Tools/MakeLM/makelm.pl	2007-06-10 03:22:17 UTC (rev 921)
+++ Tools/MakeLM/makelm.pl	2007-06-11 16:40:15 UTC (rev 922)
@@ -7,65 +7,70 @@
 use File::stat;
 use IO::Handle;
 use IPC::Open2;
+use Getopt::Long;
 $ENV{'LC_COLLATE'} = 'C';
 $ENV{'LC_ALL'} = 'C';
 use locale;
+use strict;
 
+sub usage {
+    return "usage: $0 [--resourcesdir {resources directory}] [--samplesize {sample size}] [--source {source}] [--projectname {project name}]$/";
+}
+
+#open log file
 my $LOGFILE = 'log.txt';
 open(LOG, ">$LOGFILE") if $LOGFILE;
 
+#need access to cygwin dir so that cygwin1.dll can be found
 $ENV{'Path'} .= ($ENV{'CYGWIN_DIR'} || 'C:\cygwin').'\bin;';
 
+#setup default vaiables
+#needs to run from makelm directory
+my $MAKELMDIR = File::Spec->rel2abs(File::Spec->curdir);
 my $RESOURCESDIR = File::Spec->catdir(File::Spec->updir, File::Spec->updir, 'Resources');
 #my $SOURCE = 'fife';
 my $SOURCE = 'lexdata';
 my $SAMPSIZE = 30000;
-while($ARGV[0] =~ /^-/) {
-    $_ = shift;
-    if($_ eq "-resourcesdir") {
-	$RESOURCESDIR = shift;
-    } elsif($_ eq "-samplesize") {
-	$SAMPSIZE = shift;
-    } elsif($_ eq "-source") {
-	$SOURCE = shift;
-    }
+my $PROJECT;
+{
+    #guess the project name from the file name of the project directory
+    my @dir = File::Spec->splitdir((File::Spec->splitpath($MAKELMDIR))[1]);
+    $PROJECT = $dir[$#dir-2];
 }
-fail(&usage) if scalar(@ARGV) < 1; 
-my $PROJECT = shift;
 
-&say('system', "project => $PROJECT, resource dir => $RESOURCEDIR, sample size => $SAMPSIZE");
+#process command line
+GetOptions("resourcesdir=s", \$RESOURCESDIR,
+	   "samplesize=s", \$SAMPSIZE,
+	   "source=s", \$SOURCE,
+	   "projectname=s", \$PROJECT);
 
+fail(&usage) if @ARGV; 
+
+&say('system', "project => $PROJECT$/resource dir => $RESOURCESDIR$/sample size => $SAMPSIZE$/source => $SOURCE$/");
+
 my $PRONOUNCE = 'pronounce';
 my $LEXDATA = 'lexdata';
-$MAKELMDIR = File::Spec->rel2abs(File::Spec->curdir);
-$GRAMMARDIR = File::Spec->catdir($RESOURCESDIR, 'Grammar');
-$GRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'.gra');
-$FLATGRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'flat.gra');
-$CORPUS = 'tempfile';
-$BASEDIC = File::Spec->catfile($GRAMMARDIR, 'base.dic');
-$VOCAB = 'vocab';
-$CCS = 'temp.ccs';
-$PHOENIX = File::Spec->catfile($GRAMMARDIR, 'compile.exe');
-$TEXT2IDNGRAM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'text2idngram');
-$IDNGRAM2LM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'idngram2lm');
-$RANDOMSAMPS = 'generate_random_samples.pl';
-$IDNGRAM = $PROJECT.'.idngram';
-$DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig');
-$LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa');
-$DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary');
-$HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict');
-$DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict');
-$REDUCED_DICT = $DICT.'.reduced_phoneset';
+my $GRAMMARDIR = File::Spec->catdir($RESOURCESDIR, 'Grammar');
+my $GRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'.gra');my $FLATGRAMMARFILE = File::Spec->catfile($GRAMMARDIR, $PROJECT.'flat.gra');
+my $CORPUS = 'tempfile';
+my $BASEDIC = File::Spec->catfile($GRAMMARDIR, 'base.dic');
+my $VOCAB = 'vocab';
+my $CCS = 'temp.ccs';
+my $PHOENIX = File::Spec->catfile($GRAMMARDIR, 'compile.exe');
+my $TEXT2IDNGRAM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'text2idngram');
+my $IDNGRAM2LM = File::Spec->catfile('CMU-Cam_Toolkit_v2', 'bin', 'idngram2lm');
+my $RANDOMSAMPS = 'generate_random_samples.pl';
+my $IDNGRAM = $PROJECT.'.idngram';
+my $DECODERCONFIGDIR = File::Spec->catdir($RESOURCESDIR, 'DecoderConfig');
+my $LM = File::Spec->catfile($DECODERCONFIGDIR, 'LanguageModel', $PROJECT.'LM.arpa');
+my $DICTDIR = File::Spec->catdir($DECODERCONFIGDIR, 'Dictionary');
+my $HAND_DICT = File::Spec->catfile($DICTDIR, 'hand.dict');
+my $DICT = File::Spec->catfile($DICTDIR, $PROJECT.'.dict');
+my $REDUCED_DICT = $DICT.'.reduced_phoneset';
 
 &say('compile', 'compiling grammar...');
 chdir($GRAMMARDIR);
-if (!-e "${PROJECT}Task.gra") {
-  open(TEMPLATE, "${PROJECT}Task-template.gra") || die "Can't open template";
-  open(TASK, ">${PROJECT}Task.gra") || die "Can't open task";
-  print TASK grep(!/%%/, <TEMPLATE>);
-  close TASK; close TEMPLATE;
-}
-system('cmp.bat');
+do 'cmp.pl' || system 'cmp.bat';
 &say('compile', 'compiling language model...');
 chdir($MAKELMDIR);
 &say('compile', 'generating corpus...');
@@ -88,10 +93,6 @@
 close(LOG) if $LOGFILE;
 exit;
 
-sub usage {
-    return "usage: $0 [-resourcesdir {resources directory}] [-samplesize {sample size}] [-source {source}] {project name}$/";
-}
-
 sub getcorpus {
     my $corpusfile = shift;
     #flatten grammar
@@ -238,7 +239,7 @@
     &fail("Need the corpus location as an argument") if !$corpus;
     &fail("Need the source") if !$SOURCE;
 
-    if($source eq 'fife') { #source is web form
+    if($SOURCE eq 'fife') { #source is web form
 	return getdicFife($corpus);
     } else {
 	return getdicLocal($corpus);


More information about the Ravenclaw-developers mailing list