Attachment 'sa-stats.pl'
Download 1 #!/usr/bin/perl
2
3 # -------------------------------------------------------------
4 # file: sa-stats.pl (SARE release)
5 # created: 2005-01-31
6 # updated: 2007-01-30
7 # version: 0.93
8 # author: Dallas Engelken <dallase@uribl.com>
9 # desc: Generates Top Spam/Ham Rules fired for SA 3.x installations.
10 #
11 # IMPORTANT NOTES
12 #
13 # See http://www.rulesemporium.com/programs/sa-stats-1.0.txt for
14 # a SA 3.1.x version that supports per-domain and per-user.
15 #
16 # If your top 5 does not contain URIBL_BLACK, see
17 # http://www.uribl.com/usage.shtml
18 # -------------------------------------------------------------
19
20 use Getopt::Long;
21 use Pod::Usage;
22
23 my ($LOG_DIR,$FILE,$TOPRULES,$PRINT_TO_WEB,$HELP);
24
25 GetOptions (
26 'logdir|l=s' => \$LOG_DIR,
27 'filename|f=s' => \$FILE,
28 'num|n=i' => \$TOPRULES,
29 'web|w' => \$PRINT_TO_WEB,
30 'help|h' => \$HELP
31 );
32
33 if ($HELP) {
34 print "usage: $0 [-l <dir>] [-f <file>] [-n <num>] [-w]\n";
35 print "\t--logdir|-l <dir>\tDirectory containing spamd logs\n";
36 print "\t--filename|-f <file>\tFile names or regex to look for in the logdir\n";
37 print "\t--num|-n <num>\tNumber of top rules to display\n";
38 print "\t--web|-w\tMake it web friendly output\n";
39 print "\t--help|-h\tPrints this help\n";
40 exit;
41 }
42
43 if (!defined $TOPRULES) { $TOPRULES=20 }
44 if (!defined $LOG_DIR) { $LOG_DIR="/var/log" }
45 if (!defined $FILE) { $FILE='^maillog$' } # regex
46
47 # LEAVE THE REST ALONE UNLESS YOU KNOW WHAT YOU ARE DOING...
48 ################################################################
49
50 my $NUM_EMAIL=0; my $NUM_SPAM=0; my $NUM_HAM=0;
51 my $EMAIL_HITS=0; my $SPAM_HITS=0; my $HAM_HITS=0;
52 my %SPAM_RULES=(); my %HAM_RULES=();
53 my %SPAM_SCORES=(); my %HAM_SCORES=();
54 my $TOTAL_SPAM_RULES=0; my $TOTAL_HAM_RULES=0;
55 my $ALSPAM=0; my $ALHAM=0; my $ALNO=0;
56 my $HAM_SEC=0; my $SPAM_SEC=0; my $EMAIL_SEC=0;
57
58 my $footer = '</div><div id="footer"><p>CGI by <a href="mailto:dallase@nmgi.com">Dallas Engelken</a></p></div>';
59
60 opendir (DIR,"$LOG_DIR");
61 my @logs = grep /$FILE/i, readdir DIR;
62 closedir DIR;
63
64 foreach my $log (@logs) {
65 &calcstats($LOG_DIR."/".$log);
66 }
67
68 &summarize();
69 exit;
70
71 #############################
72
73 sub calcstats {
74
75 my $log=shift;
76
77 if (!-e $log || -d $log) {
78 print "$log not found..\n";
79 return;
80 }
81
82 open(F,"$log");
83 while(<F>) {
84
85 my ($result,$score,$rules,$time,$size,$learn);
86 my $spam=0;
87 if (/.*result:\s+(\w|\.)\s+(\-?\d+)\s+\-\s+(.*)\s+scantime=([\d\.]+),size=(\d+).*autolearn=(\w+)/) {
88 $result=$1;
89 $score=$2;
90 $rules=$3;
91 $time=$4;
92 $size=$5;
93 $learn=$6;
94 }
95 else {
96 next;
97 }
98
99 if ($result eq "Y") {
100 $SPAM_SEC+=$time;
101 }
102 else {
103 $HAM_SEC+=$time;
104 }
105 $EMAIL_SEC+=$time;
106
107 $spam=1 if ($result =~ m/Y/);
108 if ($learn =~ /ham/) {
109 $ALHAM++;
110 }
111 elsif ($learn =~ /spam/) {
112 $ALSPAM++;
113 }
114 else {
115 $ALNO++;
116 }
117
118 my @tmprules=split(/\,/,$rules);
119 foreach my $r (@tmprules) {
120 if ($spam) {
121 $TOTAL_SPAM_RULES++;
122 if (defined $SPAM_RULES{$r}) {
123 $SPAM_RULES{$r}++;
124 $SPAM_SCORES{$r} += $score;
125 }
126 else {
127 $SPAM_RULES{$r}=1;
128 $SPAM_SCORES{$r} = $score;
129 }
130 }
131 else {
132 $TOTAL_HAM_RULES++;
133 if (defined $HAM_RULES{$r}) {
134 $HAM_RULES{$r}++;
135 $HAM_SCORES{$r} += $score;
136 }
137 else {
138 $HAM_RULES{$r}=1;
139 $HAM_SCORES{$r} = $score;
140 }
141 }
142 }
143
144 if ($spam) {
145 $NUM_SPAM++;
146 $SPAM_HITS += $score;
147 }
148 else {
149 $NUM_HAM++;
150 $HAM_HITS += $score;
151 }
152 $NUM_EMAIL++;
153 $EMAIL_HITS += $score;
154 }
155 close(F);
156
157 }
158
159
160 sub summarize {
161
162 my ($avgspamhits,$avghamhits,$avgemailhits);
163
164 print "Content-type: text/html\n\n" if ($PRINT_TO_WEB);
165 print "<pre>" if ($PRINT_TO_WEB);
166
167 if ($NUM_SPAM > 0) {
168 $avgspamhits= sprintf("%.2f",$SPAM_HITS/$NUM_SPAM);
169 $avgspamtime= sprintf("%.2f",$SPAM_SEC/$NUM_SPAM);
170 }
171 else {
172 $avgspamhits=0;
173 $avgspamtime=0;
174 }
175
176 if ($NUM_HAM > 0) {
177 $avghamhits= sprintf("%.2f",$HAM_HITS/$NUM_HAM);
178 $avghamtime= sprintf("%.2f",$HAM_SEC/$NUM_HAM);
179 }
180 else {
181 $avghamhits=0;
182 $avghamtime=0;
183 }
184
185 if ($NUM_EMAIL > 0) {
186 $avgemailhits= sprintf("%.2f",$EMAIL_HITS/$NUM_EMAIL);
187 $avgemailtime= sprintf("%.2f",$EMAIL_SEC/$NUM_EMAIL);
188 }
189 else {
190 $avgemailhits=0;
191 $avgemailtime=0;
192 }
193
194
195 my $ALTOT=$ALSPAM+$ALHAM;
196 printf("Email: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_EMAIL,$ALTOT,$avgemailhits,$avgemailtime);
197 printf("Spam: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_SPAM,$ALSPAM,$avgspamhits,$avgspamtime);
198 printf("Ham: %8s Autolearn: %5s AvgScore: %6.2f AvgScanTime: %5.2f sec\n",$NUM_HAM,$ALHAM,$avghamhits,$avghamtime);
199
200 &br;
201 printf "Time Spent Running SA: %7.2f hours\n",$EMAIL_SEC/60/60;
202 printf "Time Spent Processing Spam: %7.2f hours\n",$SPAM_SEC/60/60;
203 printf "Time Spent Processing Ham: %7.2f hours\n",$HAM_SEC/60/60;
204
205 &br;
206
207 my $count=0;
208 print "TOP SPAM RULES FIRED\n";
209 &hr;
210 printf("%4s\t%-24s\t%5s %8s %7s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM","AVGSCO");
211 &hr;
212 foreach my $key (sort { $SPAM_RULES{$b} <=> $SPAM_RULES{$a} } keys %SPAM_RULES) {
213 #my $perc1=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_EMAIL)*100);
214 my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
215 my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
216 my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
217 my $perc4=sprintf("%.2f",($SPAM_SCORES{$key}/$SPAM_RULES{$key}));
218 printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$SPAM_RULES{$key},$perc1,$perc2,$perc3,$perc4);
219 $count++;
220 if ($count >= $TOPRULES && $TOPRULES > 0) {
221 last;
222 }
223 }
224 &hr;
225 &br;
226
227 $count=0; # thanks mike
228 print "TOP HAM RULES FIRED\n";
229 &hr;
230 printf("%4s\t%-24s\t%5s %8s %7s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM","AVGSCO");
231 &hr;
232 foreach my $key (sort { $HAM_RULES{$b} <=> $HAM_RULES{$a} } keys %HAM_RULES) {
233 #my $perc1=sprintf("%.2f",($HAM_RULES{$key}/$NUM_EMAIL)*100);
234 my $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
235 my $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
236 my $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
237 my $perc4=sprintf("%.2f",($HAM_SCORES{$key}/$HAM_RULES{$key}));
238 printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$HAM_RULES{$key},$perc1,$perc2,$perc3,$perc4);
239 $count++;
240 if ($count >= $TOPRULES && $TOPRULES > 0) {
241 last;
242 }
243 }
244 &hr;
245 &br;
246 print "</pre>\n" if ($PRINT_TO_WEB);
247 print $footer if ($PRINT_TO_WEB && $footer ne "");
248 print "\n";
249 }
250
251 #######################
252 sub hr {
253 if ($PRINT_TO_WEB) {
254 print "<hr size=1 width=50% align=left>";
255 }
256 else {
257 print "-" x 78 ."\n";
258 }
259 }
260 #######################
261 sub br {
262 if ($PRINT_TO_WEB) {
263 print "<br>";
264 }
265 else {
266 print "\n";
267 }
268 }
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.