Perl script help

Hewlett-packard / Dl380
March 15, 2009 at 12:49:17
Specs: RedHat Advanced Server, Octo 2.8Ghz Xeon
I'm not incredibly familiar with Perl, though I've been
asked to write a script that will parse a huge 700MB
file into 24 smaller symbol files. This file contains
nearly 1.8 million lines of text - one line for every
stock option on all U.S. markets.

I've broken it down into 24 files, based on symbol
range; as you'll see below.

As I'm not very familiar with the interworkings of an OS, I'm worried about how all of this file
reading/opening/closing will affect the RedHat AS
Linux server it runs on (I/O and file descriptor usage?)
and if there is a more efficient way of doing it. I've run
it at home on my Mac and everything seemed ok (low
CPU/RAM usage), though this is different than running
it on a production server during pre-Market open
hours. Any recommendations would be appreciated.
Thanks in advance.

#!/usr/bin/perl

##chdir 
"/xcb/wombat/config/IB2_CHI/bin/opra_2.16.45q/bin";
$occ_full = 
"/Users/jb60606/Documents/OCCProject/occ.sym";

open (OCC_FULL, $occ_full) || die "Error  opening the 
raw OCC file: $!\n";
	while ($line=<OCC_FULL>){
		@line = split (/['.',' ',^_]/, $line);

			if ($line[0] =~ /^A$/ or $line[0] =~ 
/^A[A-O][A-Z]*/) #A - AOZ
				{open (OPRA_1, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra1.sy
m") || die "Error: $!\n";
					print OPRA_1 "$line";	
				 close OPRA_1;
					##print "$line[0] - to 
OPRA_1\n";	
				}
                        elsif ($line[0] =~ /^A[P-Z]/ or 
$line[0]=~ /^B$/ or $line[0]=~ /^B[A-M][A-Z]*/) #AP - 
B - BMZ
                                {open (OPRA_2, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra2.sy
m") || die "Error: $!\n";
                                        print OPRA_2 "$line";
                                 close OPRA_2;
					##print "$line[0] - to 
OPRA_2\n";	
                                }
                        elsif ($line[0] =~ /^B[N-Z]/ or 
$line[0]=~ /^C$/ or $line[0]=~ /^C[A-L][A-Z]*/) #BN - C 
- CLZ
                                {open (OPRA_3, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra3.sy
m") || die "Error: $!\n";
                                        print OPRA_3 "$line";
                                 close OPRA_3;
					##print "$line[0] - to 
OPRA_3\n";	
                                }
                        elsif ($line[0] =~ /^C[M-Z]/ or 
$line[0]=~ /^D$/ or $line[0]=~ /^D[A-H][A-Z]*/) #CM - 
D - DHZ
                                {open (OPRA_4, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra4.sy
m") || die "Error: $!\n";
                                        print OPRA_4 "$line";
                                 close OPRA_4;
					##print "$line[0] - to 
OPRA_4\n";	
                                }
                        elsif ($line[0] =~ /^D[I-Z]/ or 
$line[0]=~ /^E$/ or $line[0]=~ /^E[A-O][A-Z]*/)# DI - E 
- EOZ
                                {open (OPRA_5, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra5.sy
m") || die "Error: $!\n";
                                        print OPRA_5 "$line";
                                 close OPRA_5;
					##print "$line[0] - to 
OPRA_5\n";	
                                }
                        elsif ($line[0] =~ /^E[P-Z]/ or 
$line[0]=~ /^F/)# EP-Z - F
                                {open (OPRA_6, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra6.sy
m") || die "Error: $!\n";
                                        print OPRA_6 "$line";
                                 close OPRA_6;
					##print "$line[0] - to 
OPRA_6\n";	
                                }
                        elsif ($line[0] =~ /^G/ or $line[0]=~ 
/^H$/ or $line[0]=~ /^H[A-F][A-Z]*/) #G - H - HFZ
                                {open (OPRA_7, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra7.sy
m") || die "Error: $!\n";
                                        print OPRA_7 "$line";
                                 close OPRA_7;
					##print "$line[0] - to 
OPRA_7\n";	
                                }
                        elsif ($line[0] =~ /^H[G-Z]/ or 
$line[0]=~ /^I$/ or $line[0]=~ /^I[A-Q][A-Z]*/)
                                {open (OPRA_8, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra8.sy
m") || die "Error: $!\n";
                                        print OPRA_8 "$line";
                                 close OPRA_8;
					##print "$line[0] - to 
OPRA_8\n";	
                                }
                        elsif ($line[0] =~ /^I[R-Z]/ or 
$line[0]=~ /^J/ or $line[0]=~ /^K$/ or $line[0]=~ /^K[A-
M][A-Z]*/)
                                {open (OPRA_9, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra9.sy
m") || die "Error: $!\n";
                                        print OPRA_9 "$line";
                                 close OPRA_9;
					##print "$line[0] - to 
OPRA_9\n";	
                                }
                        elsif ($line[0] =~ /^K[N-Z]/ or 
$line[0]=~ /^L$/ or $line[0]=~ /^L[A-V][A-Z]*/)
                                {open (OPRA_10, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra10.s
ym") || die "Error: $!\n";
                                        print OPRA_10 "$line";
                                 close OPRA_10;
					##print "$line[0] - to 
OPRA_10\n";	
                                }
                        elsif ($line[0] =~ /^L[W-Z]/ or 
$line[0]=~ /^M$/ or $line[0]=~ /^M[A-Q][A-Z]*/)
                                {open (OPRA_11, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra11.s
ym") || die "Error: $!\n";
                                        print OPRA_11 "$line";
                                 close OPRA_11;
					##print "$line[0] - to 
OPRA_11\n";	
                                }
                        elsif ($line[0] =~ /^M[R-Z]/ or 
$line[0]=~ /^N$/ or $line[0]=~ /^N[A-Q][A-Z]*/)
                                {open (OPRA_12, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra12.s
ym") || die "Error: $!\n";
                                        print OPRA_12 "$line";
                                 close OPRA_12;
					##print "$line[0] - to 
OPRA_12\n";	
                                }
                        elsif ($line[0] =~ /^N[R-Z]/ or 
$line[0]=~ /^O/ or $line[0] =~ /^P$/ or $line[0]=~ 
/^P[A-C][A-Z]*/)
                                {open (OPRA_13, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra13.s
ym") || die "Error: $!\n";
                                        print OPRA_13 "$line";
                                 close OPRA_13;
					##print "$line[0] - to 
OPRA_13\n";	
                                }
                        elsif ($line[0] =~ /^P[D-Z]/ or 
$line[0]=~ /^Q$/ or $line[0]=~ /^Q[A-F][A-Z]*/)
                                {open (OPRA_14, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra14.s
ym") || die "Error: $!\n";
                                        print OPRA_14 "$line";
                                 close OPRA_14;
					##print "$line[0] - to 
OPRA_14\n";	
                                }
                        elsif ($line[0] =~ /^Q[G-Q]/ or 
$line[0]=~ /^Q[R][A-Z]*/)
                                {open (OPRA_15, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra15.s
ym") || die "Error: $!\n";
                                        print OPRA_15 "$line";
                                 close OPRA_15;
					##print "$line[0] - to 
OPRA_15\n";	
                                }
                        elsif ($line[0] =~ /^Q[S-Z]/ or 
$line[0]=~ /^R[A-T][A-Z]*/)
                                {open (OPRA_16, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra16.s
ym") || die "Error: $!\n";
                                        print OPRA_16 "$line";
                                 close OPRA_16;
					##print "$line[0] - to 
OPRA_16\n";	
                                }
                        elsif ($line[0] =~ /^R[U-Z]/ or 
$line[0]=~ /^S$/ or $line[0]=~ /^S[A-K][A-Z]*/)
                                {open (OPRA_17, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra17.s
ym") || die "Error: $!\n";
                                        print OPRA_17 "$line";
                                 close OPRA_17;
					##print "$line[0] - to 
OPRA_17\n";	
                                }
                        elsif ($line[0] =~ /^S[L-X]/ or 
$line[0]=~ /^S[Y][A-Z]*/)
                                {open (OPRA_18, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra18.s
ym") || die "Error: $!\n";
                                        print OPRA_18 "$line";
                                 close OPRA_18;
					##print "$line[0] - to 
OPRA_18\n";	
                                }
                        elsif ($line[0] =~ /^SZ/ or $line[0]=~ 
/^T/ or $line[0]=~ /^U$/ or $line[0]=~ /^U[A-O][A-Z]*/)
                                {open (OPRA_19, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra19.s
ym") || die "Error: $!\n";
                                        print OPRA_19 "$line";
                                 close OPRA_19;
					##print "$line[0] - to 
OPRA_19\n";	
                                }
                        elsif ($line[0] =~ /^U[P-Z]/ or 
$line[0]=~ /^V$/ or $line[0]=~ /^V[A-R][A-Z]*/)
                                {open (OPRA_20, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra20.s
ym") || die "Error: $!\n";
                                        print OPRA_20 "$line";
                                 close OPRA_20;
					##print "$line[0] - to 
OPRA_20\n";	
                                }
                        elsif ($line[0] =~ /^V[S-Z]/ or 
$line[0]=~ /^W$/ or $line[0]=~ /^W[A-P][A-Z]*/)
                                {open (OPRA_21, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra21.s
ym") || die "Error: $!\n";
                                        print OPRA_21 "$line";
                                 close OPRA_21;
					##print "$line[0] - to 
OPRA_21\n";	
                                }
                        elsif ($line[0] =~ /^W[Q-Z]/ or 
$line[0]=~ /^X$/ or $line[0]=~ /^X[A-O][A-Z]*/)
                                {open (OPRA_22, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra22.s
ym") || die "Error: $!\n";
                                        print OPRA_22 "$line";
                                 close OPRA_22;
					##print "$line[0] - to 
OPRA_22\n";	
                                }
                        elsif ($line[0] =~ /^X[P-Z]/ or 
$line[0]=~ /^Y$/ or $line[0]=~ /^Y[A-M][A-Z]*/)
                                {open (OPRA_23, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra23.s
ym") || die "Error: $!\n";
                                        print OPRA_23 "$line";
                                 close OPRA_23;
					##print "$line[0] - to 
OPRA_23\n";	
                                }
                        elsif ($line[0] =~ /^Y[N-Z]/ or 
$line[0]=~ /^Z$/ or $line[0]=~ /^Z[A-Z][A-Z]*/)
                                {open (OPRA_24, ">> 
/Users/jb60606/Documents/OCCProject/occ_opra24.s
ym") || die "Error: $!\n";
                                        print OPRA_24 "$line";
                                 close OPRA_24;
					##print "$line[0] - to 
OPRA_24\n";	
                                }
			elsif ($line[0] =~ /^[0-9]/)
				{
					next;
				}
			else
				{ open (NOT_FOUND, ">> 
/Users/jb60606/Documents/OCCProject/not_found.txt"
) || die "Error: $!\n";
					print NOT_FOUND 
"$line[0]\n";
				close NOT_FOUND;
					#print "$line[0]  doesn't 
appear to match any regex.\n";
				}
	}
close OCC_FULL;
exit 0;


See More: Perl script help

Report •


#1
March 15, 2009 at 12:50:35
sorry - I thought using the 'pre' tags would format the code so
that it's legible.

Does anyone know how to widen this thing out?


Report •
Related Solutions


Ask Question