1 /* Copyright (c) 2008, Tim Post <tinkertim@gmail.com>
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
14 * Neither the name of the original program's authors nor the names of its
15 * contributors may be used to endorse or promote products derived from this
16 * software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
31 /* Some example usages:
32 * grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages
33 * grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages
34 * grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog
35 * cat syslog | grawk dhclient '$0'
36 * cat myservice.log | grawk -F , error '$3'
39 * Tim Post, Nicholas Clements, Alex Karlov
40 * We hope that you find this useful! */
43 * readline() should probably be renamed
47 * Add a tail -f like behavior that applies expressions and fields
48 * Recursive (like grep -r) or at least honor symlinks ? */
54 #include <sys/types.h>
58 #define VERSION "1.0.7"
59 #define MAINTAINER "Tim Post <echo@echoreply.us>"
61 /* Storage structure to hold awk-style pattern */
64 int maxfield; /* Maximum field number for $# fields */
65 int numfields; /* Number of awk pattern fields */
66 char **fields; /* The awk pattern fields */
69 typedef struct awk_pattern awk_pat_t;
71 /* Option arguments */
72 static struct option const long_options[] = {
73 { "ignore-case", no_argument, 0, 'i' },
74 { "with-filename", no_argument, 0, 'W' },
75 { "no-filename", no_argument, 0, 'w' },
76 { "line-number", no_argument, 0, 'n' },
77 { "field-separator", required_argument, 0, 'F' },
78 { "help", no_argument, 0, 'h' },
79 { "version", no_argument, 0, 'v' },
83 /* The official name of the program */
84 const char *progname = "grawk";
86 /* Global for delimiters used in tokenizing strings */
87 char *tokdelim = NULL;
90 static void usage(void);
91 static int process(FILE *, regex_t, awk_pat_t, char *, int);
92 static int process_line(char *, awk_pat_t, char *, char *);
93 static int process_files(int, char **, regex_t, awk_pat_t, int, int);
94 static int process_pipe(regex_t, awk_pat_t, int);
95 static int awkcomp(awk_pat_t *, char *);
96 static void awkfree(awk_pat_t *);
97 static char *readline(FILE *);
99 static void usage(void)
101 printf("%s %s\n", progname, VERSION);
102 printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n",
104 printf("Options:\n");
106 "show help and examples\n");
107 printf(" -i, --ignore-case "
108 "ignore case distinctions\n");
109 printf(" -W, --with-filename "
110 "Print filename for each match\n");
111 printf(" -w, --no-filename "
112 "Never print filename for each match\n");
113 printf(" -n, --line-number "
114 "Prefix each line of output with line number.\n");
115 printf(" -F fs, --field-separator=fs "
116 "Use fs as the field separator\n");
117 printf(" -h, --help "
118 "Print a brief help summary\n");
119 printf(" -v, --version "
120 "Print version information and exit normally\n");
122 "a basic regular expression\n");
123 printf(" OUTPUT_PATTERN "
124 "awk-style print statement; defines "
126 printf("\nExamples:\n");
127 printf(" Retrieve joe123's home directory from /etc/passwd:\n");
128 printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname);
129 printf("\n Find fields 2 3 and 4 on lines that begin with @ from stdin:\n");
130 printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname);
131 printf("\n Use as a simple grep:\n");
132 printf("\t%s \"string to find\" '$0' /file.txt\n", progname);
133 printf("\nReport bugs to %s\n", MAINTAINER);
136 /* readline() - read a line from the file handle.
137 * Return an allocated string */
138 static char *readline(FILE *fp)
140 char *str = (char *)NULL;
141 int ch = 0, len = 256, step = 256, i = 0;
143 str = (char *)malloc(len);
151 if (ch == '\n' || ch == '\r') {
158 str = (char *)realloc(str, len);
168 /* process() - this is the actual processing where we compare against a
169 * previously compiled grep pattern and output based on the awk pattern.
170 * The file is opened by the calling function. We pass in an empty string
171 * if we don't want to show the filename. If we want to show the line number,
172 * the value of show_lineno is 1. If we find a line, return 1. If no line is
173 * found, return 0. If an error occurs, return -1. */
174 static int process(FILE *fp, regex_t re, awk_pat_t awk,
175 char *filename, int show_lineno)
179 memset(slineno, 0, sizeof(slineno));
184 inbuf = readline(fp);
190 if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) {
191 found = 1; // Found a line.
193 sprintf(slineno, "%ld:", lineno);
194 if (process_line(inbuf, awk, filename, slineno)) {
195 fprintf (stderr, "Error processing line [%s]\n", inbuf);
209 /* process_files() - process one or more files from the command-line.
210 * If at least one line is found, return 1, else return 0 if no lines
211 * were found or an error occurs. */
212 static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk,
213 int show_filename, int show_lineno)
219 memset(filename, 0, sizeof(filename));
221 for(i = 0; i < numfiles; i++) {
222 if (stat(files[i], &fstat) == -1) {
223 /* Did a file get deleted from the time we started running? */
225 "Error accessing file %s. No such file\n", files[i]);
229 sprintf( filename, "%s:", files[i] );
230 /* For now, we aren't recursive. Perhaps allow symlinks? */
231 if ((fstat.st_mode & S_IFMT) != S_IFREG)
233 if (NULL == (fp = fopen(files[i], "r"))) {
235 "Error opening file %s. Permission denied\n", files[i]);
238 if (process(fp, re, awk, filename, show_lineno) == 1)
246 /* process_pipe() - process input from stdin */
247 static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno)
249 if (process(stdin, re, awk, "", show_lineno) == 1)
255 /* process_line() - process the line based on the awk-style pattern and output
257 static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno)
259 char full_line[3] = { '\1', '0', '\0' };
261 if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) {
262 /* If the caller only wants the whole string, oblige, quickly. */
263 fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf);
267 /* Build an array of fields from the line using strtok()
268 * TODO: make this re-entrant so that grawk can be spawned as a thread */
269 char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *));
270 char *wrkbuf = strdup(inbuf), *tbuf;
272 int count = 0, n = 1, i;
273 for (i = 0; i < (awk.maxfield + 1); i++) {
274 linefields[i] = NULL;
277 tbuf = strtok(wrkbuf, tokdelim);
279 linefields[0] = strdup(tbuf);
281 while (tbuf != NULL) {
282 tbuf = strtok(NULL, tokdelim);
286 if (count > awk.maxfield)
288 linefields[count] = strdup(tbuf);
289 if (!linefields[count]) {
290 fprintf(stderr, "Could not allocate memory to process file %s\n",
295 /* For each field in the awk structure,
296 * find the field and print it to stdout.*/
297 fprintf(stdout, "%s%s", filename, lineno); /* if needed */
298 for (i = 0; i < awk.numfields; i++) {
299 if (awk.fields[i][0] == '\1') {
300 n = atoi(&awk.fields[i][1]);
302 fprintf(stdout, "%s", inbuf);
306 fprintf(stdout, "%s", linefields[n-1]);
309 fprintf(stdout, "%s", awk.fields[i]);
311 fprintf(stdout, "\n");
316 for (i = 0; i < count; i++) {
318 linefields[i] = (char *) NULL;
322 linefields = (char **)NULL;
327 /* awkcomp() - little awk-style print format compilation routine.
328 * Returns structure with the apattern broken down into an array for easier
329 * comparison and printing. Handles string literals as well as fields and
330 * delimiters. Example: $1,$2 " \$ and \"blah\" " $4
331 * Returns -1 on error, else 0. */
332 static int awkcomp(awk_pat_t *awk, char *apattern)
337 awk->fields = (char **)malloc(sizeof(char *));
342 wrkbuf = (char *)malloc(strlen(apattern) + 1);
343 if (wrkbuf == NULL) {
345 fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n");
349 int inString = 0, offs = 0;
351 for (i = 0; i < strlen( apattern ); i++) {
353 if (inString && ch != '"' && ch != '\\') {
360 /* Handle delimited strings inside of literal strings */
363 wrkbuf[offs++] = apattern[++i];
366 /* Unexpected and unconventional escape (can get these
367 * from improper invocations of sed in a pipe with grawk),
368 * if sed is used to build the field delimiters */
370 "Unexpected character \'\\\' in output format\n");
374 /* Beginning or ending of a literal string */
376 inString = !inString;
380 /* Handle the awk-like $# field variables */
382 /* We use a non-printable ASCII character to
383 * delimit the string field values.*/
384 wrkbuf[offs++] = '\1';
385 /* We also need the max. field number */
389 /* Not a number, exit this loop */
390 if (ch < 48 || ch > 57) {
394 num = (num * 10) + (ch - 48);
397 if (num > awk->maxfield)
399 /* Incomplete expression, a $ not followed by a number */
400 if (wrkbuf[1] == 0) {
401 fprintf(stderr, "Incomplete field descriptor at "
402 "or near character %d in awk pattern\n", i+1);
406 /* Field separator */
408 wrkbuf[offs++] = ' ';
411 /* if wrkbuf has nothing, we've got rubbish. Continue in the hopes
412 * that something else makes sense. */
415 /* End of a field reached, put it into awk->fields */
418 (char **)realloc(awk->fields, (awk->numfields + 1)
422 "Memory allocation error (awk->fields) in awkcomp()\n");
425 awk->fields[awk->numfields] = strdup(wrkbuf);
426 if (!awk->fields[awk->numfields]) {
428 "Memory allocation error (awk->fields[%d]) in awkcomp()\n",
432 memset(wrkbuf, 0, strlen(apattern) + 1);
439 if (awk->numfields == 0) {
441 "Unable to parse and compile the pattern; no fields found\n");
448 /* awkfree() - free a previously allocated awk_pat structure */
449 static void awkfree(awk_pat_t *awk )
452 for (i = 0; i < awk->numfields; i++)
453 free(awk->fields[i]);
458 int main(int argc, char **argv)
460 char *apattern = NULL, *gpattern = NULL;
462 int numfiles = 0, i = 0, c = 0;
463 int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0;
470 tokdelim = strdup("\t\r\n ");
474 c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind);
491 tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1);
492 memset(tokdelim, 0, 3 + strlen( optarg ) + 1);
493 sprintf(tokdelim, "\t\r\n%s", optarg);
501 printf("%s\n", VERSION);
508 /* Now we'll grab our patterns and files. */
509 if ((argc - optind) < 2) {
515 /* pattern one will be our "grep" pattern */
516 gpattern = strdup(argv[optind]);
517 if (gpattern == NULL) {
518 fprintf(stderr, "Memory allocation error");
523 /* pattern two is our "awk" pattern */
524 apattern = strdup(argv[optind]);
525 if(apattern == NULL) {
526 fprintf(stderr, "Memory allocation error");
531 /* Anything that remains is a file or wildcard which should be
532 * expanded by the calling shell. */
534 numfiles = argc - optind;
535 files = (char **)malloc(sizeof(char *) * (numfiles + 1));
536 for (i = 0; i < numfiles; i++) {
537 files[i] = strdup(argv[optind + i]);
540 /* If the number of files is greater than 1 then we default to
541 * showing the filename unless specifically directed against it.*/
542 if (numfiles > 1 && no_filename == 0)
547 /* Process everything */
549 int cflags = 0, rc = 0;
553 /* compile the regular expression parser */
554 if (regcomp(&re, gpattern, cflags)) {
556 "Error compiling grep-style pattern [%s]\n", gpattern);
561 if (awkcomp(&awk, apattern))
564 "Error compiling awk-style pattern [%s]\n", apattern);
570 numfiles, files, re, awk, with_filename, line_number) == 0)
571 rc = 255; // We'll return 255 if no lines were found.
573 if(process_pipe(re, awk, line_number) == 0)
578 for (i = 0; i < numfiles; i++) {
593 /* Awk pattern structure */
596 /* Token delimiter (might have been freed elsewhere) */