ccanlint: add simple check for comment referring to LICENSE file.
authorRusty Russell <rusty@rustcorp.com.au>
Thu, 21 Jul 2011 03:32:27 +0000 (13:02 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Thu, 21 Jul 2011 04:53:01 +0000 (14:23 +0930)
After discussion with various developers (particularly the Samba
team), there's a consensus that a reference to the license in each
source file is useful.  Since CCAN modules are designed to be cut and
paste, this helps avoid any confusion should the LICENSE file go
missing.

We also detect standard boilerplates, in which case a one-line summary
isn't necessary.

tools/ccanlint/Makefile
tools/ccanlint/ccanlint.h
tools/ccanlint/file_analysis.c
tools/ccanlint/licenses.c [new file with mode: 0644]
tools/ccanlint/licenses.h [new file with mode: 0644]
tools/ccanlint/tests/license_comment.c [new file with mode: 0644]

index 3772267ed418b4d845fc10d777a4c64c41792899..6bec8c4a90dbb84c5ea68965a2eb1990cb113118 100644 (file)
@@ -4,6 +4,7 @@ TEST_OBJS := $(NORMAL_TEST_CFILES:.c=.o) $(COMPULSORY_TEST_CFILES:.c=.o)
 
 CORE_OBJS := tools/ccanlint/ccanlint.o \
        tools/ccanlint/file_analysis.o \
+       tools/ccanlint/licenses.o \
        tools/doc_extract-core.o \
        tools/depends.o \
        tools/tools.o \
index b9965f75a5852364ef923ee98ca3b2e792745ceb..aec75ad97e524a3b42c19ac55244908903a2181f 100644 (file)
@@ -4,6 +4,7 @@
 #include <ccan/list/list.h>
 #include <stdbool.h>
 #include "../doc_extract.h"
+#include "licenses.h"
 
 #define REGISTER_TEST(name, ...) extern struct ccanlint name
 
    4 == Describe every action. */
 extern int verbose;
 
-enum license {
-       LICENSE_LGPLv2_PLUS,
-       LICENSE_LGPLv2,
-       LICENSE_LGPLv3,
-       LICENSE_LGPL,
-       LICENSE_GPLv2_PLUS,
-       LICENSE_GPLv2,
-       LICENSE_GPLv3,
-       LICENSE_GPL,
-       LICENSE_BSD,
-       LICENSE_MIT,
-       LICENSE_PUBLIC_DOMAIN,
-       LICENSE_UNKNOWN
-};
-
 struct manifest {
        char *dir;
        /* The module name, ie. final element of dir name */
@@ -195,6 +181,9 @@ struct ccan_file {
 
        /* Leak output from valgrind. */
        char *leak_info;
+
+       /* Simplified stream (lowercase letters and single spaces) */
+       char *simplified;
 };
 
 /* A new ccan_file, with the given name (talloc_steal onto returned value). */
@@ -209,6 +198,9 @@ char **get_ccan_file_lines(struct ccan_file *f);
 /* Use this rather than accessing f->lines directly: loads on demand. */
 struct line_info *get_ccan_line_info(struct ccan_file *f);
 
+/* Use this rather than accessing f->simplified directly: loads on demand. */
+const char *get_ccan_simplified(struct ccan_file *f);
+
 enum line_compiled {
        NOT_COMPILED,
        COMPILED,
index 7ce65479bde929cb3e036ff73514522a9f4cddad..c20211677d6b3db3063ca0947778232d1ca18718 100644 (file)
@@ -86,6 +86,7 @@ struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
        f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
        f->contents = NULL;
        f->cov_compiled = NULL;
+       f->simplified = NULL;
        return f;
 }
 
diff --git a/tools/ccanlint/licenses.c b/tools/ccanlint/licenses.c
new file mode 100644 (file)
index 0000000..9bfa1d2
--- /dev/null
@@ -0,0 +1,113 @@
+#include "licenses.h"
+#include "ccanlint.h"
+#include <ccan/talloc/talloc.h>
+#include <ccan/str/str.h>
+
+const struct license_info licenses[] = {
+       { "LGPLv2+", "LGPL",
+         { "gnu lesser general public license",
+           "version 2",
+           "or at your option any later version"
+         }
+       },
+       { "LGPLv2", "LGPL",
+         { "gnu lesser general public license",
+           "version 2",
+           NULL
+         }
+       },
+       { "LGPLv3", "LGPL",
+         { "gnu lesser general public license",
+           "version 3",
+           NULL
+         }
+       },
+       { "LGPL", "LGPL",
+         { "gnu lesser general public license",
+           NULL,
+           NULL
+         }
+       },
+       { "GPLv2+", "GPL",
+         { "gnu general public license",
+           "version 2",
+           "or at your option any later version"
+         }
+       },
+       { "GPLv2", "GPL",
+         { "gnu general public license",
+           "version 2",
+           NULL
+         }
+       },
+       { "GPLv3", "GPL",
+         { "gnu general public license",
+           "version 3",
+           NULL
+         }
+       },
+       { "GPL", "GPL",
+         { "gnu general public license",
+           NULL,
+           NULL
+         }
+       },
+       { "BSD-3CLAUSE", "BSD",
+         { "redistributions of source code must retain",
+           "redistributions in binary form must reproduce",
+           "endorse or promote"
+         }
+       },
+       { "BSD-MIT", "MIT",
+         { "without restriction",
+           "above copyright notice",
+           "without warranty"
+         }
+       },
+       { "Public domain", "Public domain",
+         { NULL, NULL, NULL  }
+       },
+       { "Unknown license", "Unknown license",
+         { NULL, NULL, NULL  }
+       },
+};
+
+const char *get_ccan_simplified(struct ccan_file *f)
+{
+       if (!f->simplified) {
+               unsigned int i, j;
+
+               /* Simplify for easy matching: only alnum and single spaces. */
+               f->simplified = talloc_strdup(f, get_ccan_file_contents(f));
+               for (i = 0, j = 0; f->simplified[i]; i++) {
+                       if (cisupper(f->simplified[i]))
+                               f->simplified[j++] = tolower(f->simplified[i]);
+                       else if (cislower(f->simplified[i]))
+                               f->simplified[j++] = f->simplified[i];
+                       else if (cisdigit(f->simplified[i]))
+                               f->simplified[j++] = f->simplified[i];
+                       else if (cisspace(f->simplified[i])) {
+                               if (j != 0 && f->simplified[j-1] != ' ')
+                                       f->simplified[j++] = ' ';
+                       }
+               }
+               f->simplified[j] = '\0';
+       }
+       return f->simplified;
+}
+
+bool find_boilerplate(struct ccan_file *f, enum license license)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_CLAUSES; i++) {
+               if (!licenses[license].clause[i])
+                       break;
+
+               if (!strstr(get_ccan_simplified(f),
+                           licenses[license].clause[i])) {
+                       return false;
+               }
+       }
+       return true;
+}
diff --git a/tools/ccanlint/licenses.h b/tools/ccanlint/licenses.h
new file mode 100644 (file)
index 0000000..7b70bfa
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef CCANLINT_LICENSES_H
+#define CCANLINT_LICENSES_H
+#include <stdbool.h>
+
+enum license {
+       LICENSE_LGPLv2_PLUS,
+       LICENSE_LGPLv2,
+       LICENSE_LGPLv3,
+       LICENSE_LGPL,
+       LICENSE_GPLv2_PLUS,
+       LICENSE_GPLv2,
+       LICENSE_GPLv3,
+       LICENSE_GPL,
+       LICENSE_BSD,
+       LICENSE_MIT,
+       LICENSE_PUBLIC_DOMAIN,
+       LICENSE_UNKNOWN
+};
+
+#define NUM_CLAUSES 3
+
+struct license_info {
+       const char *name;
+       const char *shortname;
+       /* Edit distance is expensive, and this works quite well. */
+       const char *clause[NUM_CLAUSES];
+};
+
+extern const struct license_info licenses[];
+
+struct ccan_file;
+bool find_boilerplate(struct ccan_file *f, enum license license);
+
+#endif /* CCANLINT_LICENSES_H */
diff --git a/tools/ccanlint/tests/license_comment.c b/tools/ccanlint/tests/license_comment.c
new file mode 100644 (file)
index 0000000..f190212
--- /dev/null
@@ -0,0 +1,69 @@
+#include <tools/ccanlint/ccanlint.h>
+#include <ccan/foreach/foreach.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <err.h>
+#include <ccan/talloc/talloc.h>
+#include <ccan/str/str.h>
+#include <ccan/str_talloc/str_talloc.h>
+
+static void check_license_comment(struct manifest *m,
+                                 bool keep,
+                                 unsigned int *timeleft, struct score *score)
+{
+       struct list_head *list;
+
+       /* No requirements on public domain. */
+       if (m->license == LICENSE_PUBLIC_DOMAIN
+           || m->license == LICENSE_UNKNOWN) {
+               score->pass = true;
+               score->score = score->total;
+               return;
+       }
+
+       foreach_ptr(list, &m->c_files, &m->h_files) {
+               struct ccan_file *f;
+
+               list_for_each(list, f, list) {
+                       unsigned int i;
+                       char **lines = get_ccan_file_lines(f);
+                       struct line_info *info = get_ccan_line_info(f);
+                       bool found_license = false, found_flavor = false;
+
+                       for (i = 0; lines[i]; i++) {
+                               if (info[i].type == CODE_LINE)
+                                       break;
+                               if (strstr(lines[i], "LICENSE"))
+                                       found_license = true;
+                               if (strstr(lines[i],
+                                          licenses[m->license].shortname))
+                                       found_flavor = true;
+                       }
+                       if ((!found_license || !found_flavor)
+                           && !find_boilerplate(f, m->license)) {
+                               score_file_error(score, f, lines[i] ? i : 0,
+                                                "No reference to license"
+                                                " found");
+                       }
+               }
+       }
+
+       if (list_empty(&score->per_file_errors)) {
+               score->pass = true;
+               score->score = score->total;
+       }
+}
+
+struct ccanlint license_comment = {
+       .key = "license_comment",
+       .name = "Source and header files refer to LICENSE",
+       .check = check_license_comment,
+       .needs = "license_exists"
+};
+REGISTER_TEST(license_comment);