forked from wltrimbl/FGS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_FragGeneScan.pl
executable file
·93 lines (79 loc) · 3.47 KB
/
run_FragGeneScan.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
my $genome_file = "";
my $FGS_result = "";
my $FGS_whole = -1;
my $FGS_train_file = "";
my $command;
my $debug=1;
my $program = $0;
my $dir = substr($0, 0, length($0)-19);
my $train_file;
GetOptions(
'genome=s' => \$genome_file,
'out=s' => \$FGS_result,
'complete=s' => \$FGS_whole,
'train=s' => \$FGS_train_file,
);
if (length($genome_file)==0){
print "ERROR: An input genome file was not specified.\n";
print_usage();
exit;
}elsif (! -e $genome_file){
print "ERROR: The input genome file [$genome_file] does not exist.\n";
print_usage();
exit;
}
if (length($FGS_result) == 0 ){
print "ERROR: An output file name was not specified.\n";
print_usage();
exit;
}
unless ($FGS_whole eq "1" || $FGS_whole eq "0"){
print "ERROR: An incorrect value for the option -complete was entered.\n";
print_usage();
exit;
}
$train_file = $dir."train/".$FGS_train_file;
if (length($FGS_train_file)==0){
print "ERROR: A file for model parameters was not specified.\n";
print_usage();
exit;
}elsif (! -e $train_file){
print "ERROR: The file for model parameter [$train_file] does not exist.\n";
print_usage();
exit;
}
$command = $dir."FragGeneScan";
$command .= " -s ".$genome_file;
$command .= " -o ".$FGS_result;
$command .= " -w ".$FGS_whole ;
$command .= " -t ".$FGS_train_file;
if($debug){print "$command\n";}
system($command);
if($? != 0) {print "ERROR: '$command' return value $?\n"; exit;}
if ($FGS_whole eq "1"){
if($debug){print $dir."post_process.pl -genome=".$genome_file." -pre=".$FGS_result." -post=".$FGS_result.".out\n";}
system($dir."post_process.pl -genome=".$genome_file." -pre=".$FGS_result." -post=".$FGS_result.".out");
if(!$debug){system("rm ".$FGS_result);}
}else{
if($debug){print "mv ".$FGS_result." ".$FGS_result.".out\n";}
system("mv ".$FGS_result." ".$FGS_result.".out");
}
sub print_usage{
print "USAGE: ./run_FragGeneScan.pl -genome=[seq_file_name] -out=[output_file_name] -complete=[1 or 0] -train=[train_file_name]\n";
print " [seq_file_name]: sequence file name including the full path\n";
print " [output_file_name]: output file name including the full path\n";
print " [1 or 0]: 1 if the sequence file has complete genomic sequences\n";
print " 0 if the sequence file has short sequence reads\n";
print " [train_file_name]: file name that contains model parameters; this file should be in the \"train\" directory\n";
print " Note that four files containing model parameters already exist in the \"train\" directory\n";
print " [complete] for complete genomic sequences or short sequence reads without sequencing error\n";
print " [sanger_5] for Sanger sequencing reads with about 0.5% error rate\n";
print " [sanger_10] for Sanger sequencing reads with about 1% error rate\n";
print " [454_10] for 454 pyrosequencing reads with about 1% error rate\n";
print " [454_30] for 454 pyrosequencing reads with about 3% error rate\n";
print " [illumina_5] for Illumina sequencing reads with about 0.5% error rate\n";
print " [illumina_10] for Illumina sequencing reads with about 1% error rate\n\n";
}