#! /usr/bin/perl -w # stat-comments.pl by Teodor Zlatanov, tzz@iglou.com # March 26, 2000 # A script to evaluate the readability of comments # embedded in C++. Utilizes code from demo-decomment.pl, # which is included with the Parse::RecDescent module. # Uses the Lingua::EN::Fathom module to evaluate text # readability. # ORIGINAL BY Helmut Jarausch # EXTENDED BY Damian Conway AND Helmut Jarausch # POLISHED BY Teodor Zlatanov use strict; use Parse::RecDescent; use Lingua::EN::Fathom; use vars qw/ $Grammar /; my $parser = new Parse::RecDescent $Grammar or die "invalid grammar"; undef $/; my $text = @ARGV ? <> : ; my $parts = $parser->program($text) or die "malformed C program"; # only work with comments of length > 0 die "No comments found in input" unless length $parts->{comments}; # convert every comment mark to a period, so separate comments are # separate sentences, if well-formed. Lingua::EN::Fathom is quite # good at figuring out what sentences are valid, so an extra period # in the text won't affect the overall counts. $parts->{comments} =~ s#//#. #g; $parts->{comments} =~ s#/\*#. #g; $parts->{comments} =~ s#\*/#. #g; # we can now evaluate the comments (stored in $parts->{comments}) my $fathom = new Lingua::EN::Fathom; $fathom->analyse_block($parts->{comments}); # voila, the readability report! print($fathom->report); BEGIN { $Grammar=<<'EOF'; program : program : /this shouldn't be here :-/ program : program : /with prejudice/ program : program : program : part(s) { { code=>$Code, comments=>$Comments, strings=>[@Strings]} } part : comment | C_code | string C_code : m{( [^"/]+ # one or more non-delimiters ( # then (optionally)... / # a potential comment delimiter [^*/] # which is not an actual delimiter )? # )+ # all repeated once or more }x { $Code .= $item[1] } string : m{" # a leading delimiter (( # zero or more... \\. # escaped anything | # or [^"] # anything but a delimiter )* ) "}x { $Code .= $item[1]; push @Strings, $1 } comment : m{\s* # optional whitespace // # comment delimiter [^\n]* # anything except a newline \n # then a newline }x { $Code .= "\n"; $Comments .= $item[1] } | m{\s* # optional whitespace /\* # comment opener (?:[^*]+|\*(?!/))* # anything except */ \*/ # comment closer ([ \t]*)? # trailing blanks or tabs }x { $Code .= " "; $Comments .= $item[1] } EOF } __DATA__ program test; // for decomment // using Parse::RecDescent /* We should raise the indices quite a bit with this text section, because it will actually include sentences and structure. See, the problem with most C/C++ programs is that they use comments that are very short and convey little information. */ int main() { /* this should be removed */ char *cp1 = ""; char *cp2 = "cp2"; int i; // a counter // remove this line altogehter int k; int more_indented; // keep indentation int l; /* a loop variable */ // should be completely removed char *str = "/* ceci n'est pas un commentaire */"; return 0; }