More junkcode!
authorRusty Russell <rusty@rustcorp.com.au>
Thu, 23 Apr 2009 06:18:49 +0000 (15:48 +0930)
committerRusty Russell <rusty@rustcorp.com.au>
Thu, 23 Apr 2009 06:18:49 +0000 (15:48 +0930)
junkcode/tinkertim@gmail.com-grawk/grawk.c [new file with mode: 0644]

diff --git a/junkcode/tinkertim@gmail.com-grawk/grawk.c b/junkcode/tinkertim@gmail.com-grawk/grawk.c
new file mode 100644 (file)
index 0000000..14e5cc4
--- /dev/null
@@ -0,0 +1,600 @@
+/* Copyright (c) 2008, Tim Post <tinkertim@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the original program's authors nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/* Some example usages:
+ * grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages
+ * grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages
+ * grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog
+ * cat syslog | grawk dhclient '$0'
+ * cat myservice.log | grawk -F , error '$3'
+ *
+ * Contributors:
+ * Tim Post, Nicholas Clements, Alex Karlov
+ * We hope that you find this useful! */
+
+/* FIXME:
+ * readline() should probably be renamed
+ */
+
+/* TODO:
+ * Add a tail -f like behavior that applies expressions and fields
+ * Recursive (like grep -r) or at least honor symlinks ? */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <regex.h>
+
+#define VERSION    "1.0.7"
+#define MAINTAINER "Tim Post <echo@echoreply.us>"
+
+/* Storage structure to hold awk-style pattern */
+struct awk_pattern
+{
+       int maxfield;   /* Maximum field number for $# fields */
+       int numfields;  /* Number of awk pattern fields */
+       char **fields;  /* The awk pattern fields */
+};
+
+typedef struct awk_pattern awk_pat_t;
+
+/* Option arguments */
+static struct option const long_options[] = {
+       { "ignore-case",     no_argument,       0, 'i' },
+       { "with-filename",   no_argument,       0, 'W' },
+       { "no-filename",     no_argument,       0, 'w' },
+       { "line-number",     no_argument,       0, 'n' },
+       { "field-separator", required_argument, 0, 'F' },
+       { "help",            no_argument,       0, 'h' },
+       { "version",         no_argument,       0, 'v' },
+       { 0, 0, 0, 0}
+};
+
+/* The official name of the program */
+const char *progname = "grawk";
+
+/* Global for delimiters used in tokenizing strings */
+char *tokdelim = NULL;
+
+/* Prototypes */
+static void usage(void);
+static int process(FILE *, regex_t, awk_pat_t, char *, int);
+static int process_line(char *, awk_pat_t, char *, char *);
+static int process_files(int, char **, regex_t, awk_pat_t, int, int);
+static int process_pipe(regex_t, awk_pat_t, int);
+static int awkcomp(awk_pat_t *, char *);
+static void awkfree(awk_pat_t *);
+static char *readline(FILE *);
+
+static void usage(void)
+{
+       printf("%s %s\n", progname, VERSION);
+       printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n",
+                       progname);
+       printf("Options:\n");
+       printf("  --help                       "
+               "show help and examples\n");
+       printf("  -i, --ignore-case            "
+               "ignore case distinctions\n");
+       printf("  -W, --with-filename          "
+               "Print filename for each match\n");
+       printf("  -w, --no-filename            "
+               "Never print filename for each match\n");
+       printf("  -n, --line-number            "
+               "Prefix each line of output with line number.\n");
+       printf("  -F fs, --field-separator=fs  "
+               "Use fs as the field separator\n");
+       printf("  -h, --help                   "
+               "Print a brief help summary\n");
+       printf("  -v, --version                "
+               "Print version information and exit normally\n");
+       printf("  PATTERN                      "
+               "a basic regular expression\n");
+       printf("  OUTPUT_PATTERN               "
+               "awk-style print statement; defines "
+               "output fields\n");
+       printf("\nExamples:\n");
+       printf("  Retreive joe123's home directory from /etc/passwd:\n");
+       printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname);
+       printf("\n  Find fields 2 3 and 4 on lines that begin with @ from stdin:\n");
+       printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname);
+       printf("\n  Use as a simple grep:\n");
+       printf("\t%s \"string to find\" '$0' /file.txt\n", progname);
+       printf("\nReport bugs to %s\n", MAINTAINER);
+}
+
+/* readline() - read a line from the file handle.
+ * Return an allocated string */
+static char *readline(FILE *fp)
+{
+       char *str = (char *)NULL;
+       int ch = 0, len = 256, step = 256, i = 0;
+
+       str = (char *)malloc(len);
+       if (str == NULL)
+               return str;
+
+       while (1) {
+               ch = fgetc(fp);
+               if (feof(fp))
+                       break;
+               if (ch == '\n' || ch == '\r') {
+                       str[i++] = 0;
+                       break;
+               }
+               str[i++] = ch;
+               if (i == len - 2) {
+                       len += step;
+                       str = (char *)realloc(str, len);
+                       if (str == NULL) {
+                               fclose(fp);
+                               return str;
+                       }
+               }
+       }
+       return str;
+}
+
+/* process() - this is the actual processing where we compare against a
+ * previously compiled grep pattern and output based on the awk pattern.
+ * The file is opened by the calling function. We pass in an empty string
+ * if we don't want to show the filename. If we want to show the line number,
+ * the value of show_lineno is 1. If we find a line, return 1. If no line is
+ * found, return 0. If an error occurs, return -1. */
+static int process(FILE *fp, regex_t re, awk_pat_t awk,
+       char *filename, int show_lineno)
+{
+       char *inbuf = NULL;
+       char slineno[32];
+       memset(slineno, 0, sizeof(slineno));
+       long lineno = 0;
+       int found = 0;
+
+       while (1) {
+               inbuf = readline(fp);
+               if (!inbuf)
+                       break;
+               if (feof(fp))
+                       break;
+               lineno++;
+               if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) {
+                       found = 1;  // Found a line.
+                       if (show_lineno)
+                               sprintf(slineno, "%ld:", lineno);
+                       if (process_line(inbuf, awk, filename, slineno)) {
+                               fprintf (stderr, "Error processing line [%s]\n", inbuf);
+                               free (inbuf);
+                               return -1;
+                       }
+               }
+               free (inbuf);
+       }
+
+       if (inbuf)
+               free(inbuf);
+
+       return found;
+}
+
+/* process_files() - process one or more files from the command-line.
+ * If at least one line is found, return 1, else return 0 if no lines
+ * were found or an error occurs. */
+static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk,
+               int show_filename, int show_lineno)
+{
+       int i, found = 0;
+       FILE *fp = NULL;
+       struct stat fstat;
+       char filename[1024];
+       memset(filename, 0, sizeof(filename));
+
+       for(i = 0; i < numfiles; i++) {
+               if (stat(files[i], &fstat) == -1) {
+                       /* Did a file get deleted from the time we started running? */
+                       fprintf (stderr,
+                               "Error accessing file %s. No such file\n", files[i]);
+                       continue;
+               }
+               if (show_filename)
+                       sprintf( filename, "%s:", files[i] );
+               /* For now, we aren't recursive. Perhaps allow symlinks? */
+               if ((fstat.st_mode & S_IFMT) != S_IFREG)
+                       continue;
+               if (NULL == (fp = fopen(files[i], "r"))) {
+                       fprintf(stderr,
+                               "Error opening file %s. Permission denied\n", files[i]);
+                       continue;
+               }
+               if (process(fp, re, awk, filename, show_lineno) == 1)
+                       found = 1;
+               fclose(fp);
+       }
+
+       return found;
+}
+
+/* process_pipe() - process input from stdin */
+static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno)
+{
+       if (process(stdin, re, awk, "", show_lineno) == 1)
+               return 1;
+
+       return 0;
+}
+
+/* process_line() - process the line based on the awk-style pattern and output
+ * the results. */
+static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno)
+{
+       char full_line[3] = { '\1', '0', '\0' };
+
+       if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) {
+               /* If the caller only wants the whole string, oblige, quickly. */
+               fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf);
+               return 0;
+       }
+
+       /* Build an array of fields from the line using strtok()
+        * TODO: make this re-entrant so that grawk can be spawned as a thread */
+       char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *));
+       char *wrkbuf = strdup(inbuf), *tbuf;
+
+       int count = 0, n = 1, i;
+       for (i = 0; i < (awk.maxfield + 1); i++) {
+               linefields[i] = NULL;
+       }
+
+       tbuf = strtok(wrkbuf, tokdelim);
+       if(tbuf)
+               linefields[0] = strdup(tbuf);
+
+       while (tbuf != NULL) {
+               tbuf = strtok(NULL, tokdelim);
+               if (!tbuf)
+                       break;
+               count++;
+               if (count > awk.maxfield)
+                       break;
+               linefields[count] = strdup(tbuf);
+               if (!linefields[count]) {
+                       fprintf(stderr, "Could not allocate memory to process file %s\n",
+                               filename);
+                       return -1;
+               }
+       }
+       /* For each field in the awk structure,
+        * find the field and print it to stdout.*/
+       fprintf(stdout, "%s%s", filename, lineno);      /* if needed */
+       for (i = 0; i < awk.numfields; i++) {
+               if (awk.fields[i][0] == '\1') {
+                       n = atoi(&awk.fields[i][1]);
+                       if (n == 0) {
+                               fprintf(stdout, "%s", inbuf);
+                               continue;
+                       }
+                       if (linefields[n-1])
+                               fprintf(stdout, "%s", linefields[n-1]);
+                       continue;
+               } else
+                       fprintf(stdout, "%s", awk.fields[i]);
+       }
+       fprintf(stdout, "\n");
+       /* Cleanup */
+       if (wrkbuf)
+               free(wrkbuf);
+
+       for (i = 0; i < count; i++) {
+               free(linefields[i]);
+               linefields[i] = (char *) NULL;
+       }
+
+       free(linefields);
+       linefields = (char **)NULL;
+
+       return 0;
+}
+
+/* awkcomp() - little awk-style print format compilation routine.
+ * Returns structure with the apattern broken down into an array for easier
+ * comparison and printing. Handles string literals as well as fields and
+ * delimiters. Example: $1,$2 " \$ and \"blah\" " $4
+ * Returns -1 on error, else 0. */
+static int awkcomp(awk_pat_t *awk, char *apattern)
+{
+       awk->maxfield = 0;
+       awk->numfields = 0;
+       awk->fields = NULL;
+       awk->fields = (char **)malloc(sizeof(char *));
+
+       int i, num = 0;
+       char *wrkbuf;
+
+       wrkbuf = (char *)malloc(strlen(apattern) + 1);
+       if (wrkbuf == NULL) {
+               free(awk);
+               fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n");
+               return -1;
+       }
+
+       int inString = 0, offs = 0;
+       char ch;
+       for (i = 0; i < strlen( apattern ); i++) {
+               ch = apattern[i];
+               if (inString && ch != '"' && ch != '\\') {
+                       wrkbuf[offs++] = ch;
+                       continue;
+               }
+               if (ch == ' ')
+                       continue;
+               switch (ch) {
+               /* Handle delimited strings inside of literal strings */
+               case '\\':
+                       if (inString) {
+                               wrkbuf[offs++] = apattern[++i];
+                               continue;
+                       } else {
+                               /* Unexpected and unconventional escape (can get these
+                                * from improper invocations of sed in a pipe with grawk),
+                                * if sed is used to build the field delimiters */
+                               fprintf(stderr,
+                                       "Unexpected character \'\\\' in output format\n");
+                               return -1;
+                       }
+                       break;
+               /* Beginning or ending of a literal string */
+               case '"':
+                       inString = !inString;
+                       if (inString)
+                               continue;
+                       break;
+               /* Handle the awk-like $# field variables */
+               case '$':
+                       /* We use a non-printable ASCII character to
+                        * delimit the string field values.*/
+                       wrkbuf[offs++] = '\1';
+                       /* We also need the max. field number */
+                       num = 0;
+                       while (1) {
+                               ch = apattern[++i];
+                               /* Not a number, exit this loop */
+                               if (ch < 48 || ch > 57) {
+                                       i--;
+                                       break;
+                               }
+                               num = (num * 10) + (ch - 48);
+                               wrkbuf[offs++] = ch;
+                       }
+                       if (num > awk->maxfield)
+                               awk->maxfield = num;
+                       /* Incomplete expression, a $ not followed by a number */
+                       if (wrkbuf[1] == 0) {
+                               fprintf(stderr, "Incomplete field descriptor at "
+                                               "or near character %d in awk pattern\n", i+1);
+                               return -1;
+                       }
+                       break;
+               /* Field separator */
+               case ',':
+                       wrkbuf[offs++] = ' ';
+                       break;
+               }
+               /* if wrkbuf has nothing, we've got rubbish. Continue in the hopes
+                * that something else makes sense. */
+               if (offs == 0)
+                       continue;
+               /* End of a field reached, put it into awk->fields */
+               wrkbuf[offs] = '\0';
+               awk->fields =
+                       (char **)realloc(awk->fields, (awk->numfields + 1)
+                               * sizeof(char *));
+               if (!awk->fields ) {
+                       fprintf(stderr,
+                               "Memory allocation error (awk->fields) in awkcomp()\n");
+                       return -1;
+               }
+               awk->fields[awk->numfields] = strdup(wrkbuf);
+               if (!awk->fields[awk->numfields]) {
+                       fprintf(stderr,
+                               "Memory allocation error (awk->fields[%d]) in awkcomp()\n",
+                                       awk->numfields);
+                       return -1;
+               }
+               memset(wrkbuf, 0, strlen(apattern) + 1);
+               awk->numfields++;
+               offs = 0;
+       }
+
+       free(wrkbuf);
+
+       if (awk->numfields == 0) {
+               fprintf(stderr,
+                       "Unable to parse and compile the pattern; no fields found\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+/* awkfree() - free a previously allocated awk_pat structure */
+static void awkfree(awk_pat_t *awk )
+{
+       int i;
+       for (i = 0; i < awk->numfields; i++)
+               free(awk->fields[i]);
+
+       free(awk->fields);
+}
+
+int main(int argc, char **argv)
+{
+       char *apattern = NULL, *gpattern = NULL;
+       char **files = NULL;
+       int numfiles = 0, i = 0, c = 0;
+       int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0;
+
+       if (argc < 3) {
+               usage();
+               return EXIT_FAILURE;
+       }
+
+       tokdelim = strdup("\t\r\n ");
+       while (1) {
+               int opt_ind = 0;
+               while (c != -1) {
+                       c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind);
+                       switch (c) {
+                       case 'w':
+                               with_filename = 0;
+                               no_filename = 1;
+                               break;
+                       case 'i':
+                               ignore_case = 1;
+                               break;
+                       case 'W':
+                               with_filename = 1;
+                               no_filename = 0;
+                               break;
+                       case 'n':
+                               line_number = 1;
+                               break;
+                       case 'F':
+                               tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1);
+                               memset(tokdelim, 0, 3 + strlen( optarg ) + 1);
+                               sprintf(tokdelim, "\t\r\n%s", optarg);
+                               break;
+                       case 'h':
+                               usage();
+                               free(tokdelim);
+                               return EXIT_SUCCESS;
+                               break;
+                       case 'v':
+                               printf("%s\n", VERSION);
+                               free(tokdelim);
+                               return EXIT_SUCCESS;
+                               break;
+                       }
+               }
+
+               /* Now we'll grab our patterns and files. */
+               if ((argc - optind) < 2) {
+                       usage();
+                       free(tokdelim);
+                       return EXIT_FAILURE;
+               }
+
+               /* pattern one will be our "grep" pattern */
+               gpattern = strdup(argv[optind]);
+               if (gpattern == NULL) {
+                       fprintf(stderr, "Memory allocation error");
+                       exit(EXIT_FAILURE);
+               }
+               optind++;
+
+               /* pattern two is our "awk" pattern */
+               apattern = strdup(argv[optind]);
+               if(apattern == NULL) {
+                       fprintf(stderr, "Memory allocation error");
+                       exit(EXIT_FAILURE);
+               }
+               optind++;
+
+               /* Anything that remains is a file or wildcard which should be
+                * expanded by the calling shell. */
+               if (optind < argc) {
+                       numfiles = argc - optind;
+                       files = (char **)malloc(sizeof(char *) * (numfiles + 1));
+                       for (i = 0; i < numfiles; i++) {
+                               files[i] = strdup(argv[optind + i]);
+                       }
+               }
+               /* If the number of files is greater than 1 then we default to
+                * showing the filename unless specifically directed against it.*/
+               if (numfiles > 1 && no_filename == 0)
+                       with_filename = 1;
+               break;
+       }
+
+       /* Process everything */
+       regex_t re;
+       int cflags = 0, rc = 0;
+
+       if (ignore_case)
+               cflags = REG_ICASE;
+       /* compile the regular expression parser */
+       if (regcomp(&re, gpattern, cflags)) {
+               fprintf(stderr,
+                       "Error compiling grep-style pattern [%s]\n", gpattern);
+               return EXIT_FAILURE;
+       }
+
+       awk_pat_t awk;
+       if (awkcomp(&awk, apattern))
+       {
+               fprintf(stderr,
+                       "Error compiling awk-style pattern [%s]\n", apattern);
+               return EXIT_FAILURE;
+       }
+
+       if (numfiles > 0) {
+               if(process_files(
+                       numfiles, files, re, awk, with_filename, line_number) == 0)
+                       rc = 255;   // We'll return 255 if no lines were found.
+       } else {
+               if(process_pipe(re, awk, line_number) == 0)
+                       rc = 255;
+       }
+
+       /* Destructor */
+       for (i = 0; i < numfiles; i++) {
+               if (files[i])
+                       free(files[i]);
+       }
+       free(files);
+
+       /* Awk pattern */
+       free(apattern);
+
+       /* Grep pattern */
+       free(gpattern);
+
+       /* Grep regex */
+       regfree(&re);
+
+       /* Awk pattern structure */
+       awkfree(&awk);
+
+       /* Token delimiter (might have been freed elsewhere) */
+       if (tokdelim)
+               free(tokdelim);
+       return rc;
+}