]> git.kernelconcepts.de Git - karo-tx-uboot.git/blobdiff - lib/slre.c
Update from 2013.01 to 2013.07
[karo-tx-uboot.git] / lib / slre.c
diff --git a/lib/slre.c b/lib/slre.c
new file mode 100644 (file)
index 0000000..8cdd192
--- /dev/null
@@ -0,0 +1,724 @@
+/*
+ * Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com>
+ * All rights reserved
+ *
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * Sergey Lyubka wrote this file.  As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return.
+ */
+
+/*
+ * Downloaded Sat Nov  5 17:43:06 CET 2011 at
+ * http://slre.sourceforge.net/1.0/slre.c
+ */
+
+#ifdef SLRE_TEST
+#include <stdio.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#else
+#include <common.h>
+#include <linux/ctype.h>
+#endif /* SLRE_TEST */
+
+#include <errno.h>
+
+#include <slre.h>
+
+enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL,
+       STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT};
+
+#ifdef SLRE_TEST
+static struct {
+       const char      *name;
+       int             narg;
+       const char      *flags;
+} opcodes[] = {
+       {"END",         0, ""},         /* End of code block or program */
+       {"BRANCH",      2, "oo"},       /* Alternative operator, "|"    */
+       {"ANY",         0, ""},         /* Match any character, "."     */
+       {"EXACT",       2, "d"},        /* Match exact string           */
+       {"ANYOF",       2, "D"},        /* Match any from set, "[]"     */
+       {"ANYBUT",      2, "D"},        /* Match any but from set, "[^]"*/
+       {"OPEN ",       1, "i"},        /* Capture start, "("           */
+       {"CLOSE",       1, "i"},        /* Capture end, ")"             */
+       {"BOL",         0, ""},         /* Beginning of string, "^"     */
+       {"EOL",         0, ""},         /* End of string, "$"           */
+       {"STAR",        1, "o"},        /* Match zero or more times "*" */
+       {"PLUS",        1, "o"},        /* Match one or more times, "+" */
+       {"STARQ",       1, "o"},        /* Non-greedy STAR,  "*?"       */
+       {"PLUSQ",       1, "o"},        /* Non-greedy PLUS, "+?"        */
+       {"QUEST",       1, "o"},        /* Match zero or one time, "?"  */
+       {"SPACE",       0, ""},         /* Match whitespace, "\s"       */
+       {"NONSPACE",    0, ""},         /* Match non-space, "\S"        */
+       {"DIGIT",       0, ""}          /* Match digit, "\d"            */
+};
+#endif /* SLRE_TEST */
+
+/*
+ * Commands and operands are all unsigned char (1 byte long). All code offsets
+ * are relative to current address, and positive (always point forward). Data
+ * offsets are absolute. Commands with operands:
+ *
+ * BRANCH offset1 offset2
+ *     Try to match the code block that follows the BRANCH instruction
+ *     (code block ends with END). If no match, try to match code block that
+ *     starts at offset1. If either of these match, jump to offset2.
+ *
+ * EXACT data_offset data_length
+ *     Try to match exact string. String is recorded in data section from
+ *     data_offset, and has length data_length.
+ *
+ * OPEN capture_number
+ * CLOSE capture_number
+ *     If the user have passed 'struct cap' array for captures, OPEN
+ *     records the beginning of the matched substring (cap->ptr), CLOSE
+ *     sets the length (cap->len) for respective capture_number.
+ *
+ * STAR code_offset
+ * PLUS code_offset
+ * QUEST code_offset
+ *     *, +, ?, respectively. Try to gobble as much as possible from the
+ *     matched buffer, until code block that follows these instructions
+ *     matches. When the longest possible string is matched,
+ *     jump to code_offset
+ *
+ * STARQ, PLUSQ are non-greedy versions of STAR and PLUS.
+ */
+
+static const char *meta_chars = "|.^$*+?()[\\";
+
+#ifdef SLRE_TEST
+
+static void
+print_character_set(FILE *fp, const unsigned char *p, int len)
+{
+       int     i;
+
+       for (i = 0; i < len; i++) {
+               if (i > 0)
+                       (void) fputc(',', fp);
+               if (p[i] == 0) {
+                       i++;
+                       if (p[i] == 0)
+                               (void) fprintf(fp, "\\x%02x", p[i]);
+                       else
+                               (void) fprintf(fp, "%s", opcodes[p[i]].name);
+               } else if (isprint(p[i])) {
+                       (void) fputc(p[i], fp);
+               } else {
+                       (void) fprintf(fp, "\\x%02x", p[i]);
+               }
+       }
+}
+
+void
+slre_dump(const struct slre *r, FILE *fp)
+{
+       int     i, j, ch, op, pc;
+
+       for (pc = 0; pc < r->code_size; pc++) {
+
+               op = r->code[pc];
+               (void) fprintf(fp, "%3d %s ", pc, opcodes[op].name);
+
+               for (i = 0; opcodes[op].flags[i] != '\0'; i++)
+                       switch (opcodes[op].flags[i]) {
+                       case 'i':
+                               (void) fprintf(fp, "%d ", r->code[pc + 1]);
+                               pc++;
+                               break;
+                       case 'o':
+                               (void) fprintf(fp, "%d ",
+                                   pc + r->code[pc + 1] - i);
+                               pc++;
+                               break;
+                       case 'D':
+                               print_character_set(fp, r->data +
+                                   r->code[pc + 1], r->code[pc + 2]);
+                               pc += 2;
+                               break;
+                       case 'd':
+                               (void) fputc('"', fp);
+                               for (j = 0; j < r->code[pc + 2]; j++) {
+                                       ch = r->data[r->code[pc + 1] + j];
+                                       if (isprint(ch)) {
+                                               (void) fputc(ch, fp);
+                                       } else {
+                                               (void) fprintf(fp,
+                                                       "\\x%02x", ch);
+                                       }
+                               }
+                               (void) fputc('"', fp);
+                               pc += 2;
+                               break;
+                       }
+
+               (void) fputc('\n', fp);
+       }
+}
+#endif /* SLRE_TEST */
+
+static void
+set_jump_offset(struct slre *r, int pc, int offset)
+{
+       assert(offset < r->code_size);
+
+       if (r->code_size - offset > 0xff)
+               r->err_str = "Jump offset is too big";
+       else
+               r->code[pc] = (unsigned char) (r->code_size - offset);
+}
+
+static void
+emit(struct slre *r, int code)
+{
+       if (r->code_size >= (int) (sizeof(r->code) / sizeof(r->code[0])))
+               r->err_str = "RE is too long (code overflow)";
+       else
+               r->code[r->code_size++] = (unsigned char) code;
+}
+
+static void
+store_char_in_data(struct slre *r, int ch)
+{
+       if (r->data_size >= (int) sizeof(r->data))
+               r->err_str = "RE is too long (data overflow)";
+       else
+               r->data[r->data_size++] = ch;
+}
+
+static void
+exact(struct slre *r, const char **re)
+{
+       int     old_data_size = r->data_size;
+
+       while (**re != '\0' && (strchr(meta_chars, **re)) == NULL)
+               store_char_in_data(r, *(*re)++);
+
+       emit(r, EXACT);
+       emit(r, old_data_size);
+       emit(r, r->data_size - old_data_size);
+}
+
+static int
+get_escape_char(const char **re)
+{
+       int     res;
+
+       switch (*(*re)++) {
+       case 'n':
+               res = '\n';
+               break;
+       case 'r':
+               res = '\r';
+               break;
+       case 't':
+               res = '\t';
+               break;
+       case '0':
+               res = 0;
+               break;
+       case 'S':
+               res = NONSPACE << 8;
+               break;
+       case 's':
+               res = SPACE << 8;
+               break;
+       case 'd':
+               res = DIGIT << 8;
+               break;
+       default:
+               res = (*re)[-1];
+               break;
+       }
+
+       return res;
+}
+
+static void
+anyof(struct slre *r, const char **re)
+{
+       int     esc, old_data_size = r->data_size, op = ANYOF;
+
+       if (**re == '^') {
+               op = ANYBUT;
+               (*re)++;
+       }
+
+       while (**re != '\0')
+
+               switch (*(*re)++) {
+               case ']':
+                       emit(r, op);
+                       emit(r, old_data_size);
+                       emit(r, r->data_size - old_data_size);
+                       return;
+                       /* NOTREACHED */
+                       break;
+               case '\\':
+                       esc = get_escape_char(re);
+                       if ((esc & 0xff) == 0) {
+                               store_char_in_data(r, 0);
+                               store_char_in_data(r, esc >> 8);
+                       } else {
+                               store_char_in_data(r, esc);
+                       }
+                       break;
+               default:
+                       store_char_in_data(r, (*re)[-1]);
+                       break;
+               }
+
+       r->err_str = "No closing ']' bracket";
+}
+
+static void
+relocate(struct slre *r, int begin, int shift)
+{
+       emit(r, END);
+       memmove(r->code + begin + shift, r->code + begin, r->code_size - begin);
+       r->code_size += shift;
+}
+
+static void
+quantifier(struct slre *r, int prev, int op)
+{
+       if (r->code[prev] == EXACT && r->code[prev + 2] > 1) {
+               r->code[prev + 2]--;
+               emit(r, EXACT);
+               emit(r, r->code[prev + 1] + r->code[prev + 2]);
+               emit(r, 1);
+               prev = r->code_size - 3;
+       }
+       relocate(r, prev, 2);
+       r->code[prev] = op;
+       set_jump_offset(r, prev + 1, prev);
+}
+
+static void
+exact_one_char(struct slre *r, int ch)
+{
+       emit(r, EXACT);
+       emit(r, r->data_size);
+       emit(r, 1);
+       store_char_in_data(r, ch);
+}
+
+static void
+fixup_branch(struct slre *r, int fixup)
+{
+       if (fixup > 0) {
+               emit(r, END);
+               set_jump_offset(r, fixup, fixup - 2);
+       }
+}
+
+static void
+compile(struct slre *r, const char **re)
+{
+       int     op, esc, branch_start, last_op, fixup, cap_no, level;
+
+       fixup = 0;
+       level = r->num_caps;
+       branch_start = last_op = r->code_size;
+
+       for (;;)
+               switch (*(*re)++) {
+               case '\0':
+                       (*re)--;
+                       return;
+                       /* NOTREACHED */
+                       break;
+               case '^':
+                       emit(r, BOL);
+                       break;
+               case '$':
+                       emit(r, EOL);
+                       break;
+               case '.':
+                       last_op = r->code_size;
+                       emit(r, ANY);
+                       break;
+               case '[':
+                       last_op = r->code_size;
+                       anyof(r, re);
+                       break;
+               case '\\':
+                       last_op = r->code_size;
+                       esc = get_escape_char(re);
+                       if (esc & 0xff00)
+                               emit(r, esc >> 8);
+                       else
+                               exact_one_char(r, esc);
+                       break;
+               case '(':
+                       last_op = r->code_size;
+                       cap_no = ++r->num_caps;
+                       emit(r, OPEN);
+                       emit(r, cap_no);
+
+                       compile(r, re);
+                       if (*(*re)++ != ')') {
+                               r->err_str = "No closing bracket";
+                               return;
+                       }
+
+                       emit(r, CLOSE);
+                       emit(r, cap_no);
+                       break;
+               case ')':
+                       (*re)--;
+                       fixup_branch(r, fixup);
+                       if (level == 0) {
+                               r->err_str = "Unbalanced brackets";
+                               return;
+                       }
+                       return;
+                       /* NOTREACHED */
+                       break;
+               case '+':
+               case '*':
+                       op = (*re)[-1] == '*' ? STAR : PLUS;
+                       if (**re == '?') {
+                               (*re)++;
+                               op = op == STAR ? STARQ : PLUSQ;
+                       }
+                       quantifier(r, last_op, op);
+                       break;
+               case '?':
+                       quantifier(r, last_op, QUEST);
+                       break;
+               case '|':
+                       fixup_branch(r, fixup);
+                       relocate(r, branch_start, 3);
+                       r->code[branch_start] = BRANCH;
+                       set_jump_offset(r, branch_start + 1, branch_start);
+                       fixup = branch_start + 2;
+                       r->code[fixup] = 0xff;
+                       break;
+               default:
+                       (*re)--;
+                       last_op = r->code_size;
+                       exact(r, re);
+                       break;
+               }
+}
+
+int
+slre_compile(struct slre *r, const char *re)
+{
+       r->err_str = NULL;
+       r->code_size = r->data_size = r->num_caps = r->anchored = 0;
+
+       if (*re == '^')
+               r->anchored++;
+
+       emit(r, OPEN);  /* This will capture what matches full RE */
+       emit(r, 0);
+
+       while (*re != '\0')
+               compile(r, &re);
+
+       if (r->code[2] == BRANCH)
+               fixup_branch(r, 4);
+
+       emit(r, CLOSE);
+       emit(r, 0);
+       emit(r, END);
+
+       return (r->err_str == NULL ? 1 : 0);
+}
+
+static int match(const struct slre *, int,
+               const char *, int, int *, struct cap *);
+
+static void
+loop_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs)
+{
+       int     saved_offset, matched_offset;
+
+       saved_offset = matched_offset = *ofs;
+
+       while (match(r, pc + 2, s, len, ofs, NULL)) {
+               saved_offset = *ofs;
+               if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL))
+                       matched_offset = saved_offset;
+               *ofs = saved_offset;
+       }
+
+       *ofs = matched_offset;
+}
+
+static void
+loop_non_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs)
+{
+       int     saved_offset = *ofs;
+
+       while (match(r, pc + 2, s, len, ofs, NULL)) {
+               saved_offset = *ofs;
+               if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL))
+                       break;
+       }
+
+       *ofs = saved_offset;
+}
+
+static int
+is_any_of(const unsigned char *p, int len, const char *s, int *ofs)
+{
+       int     i, ch;
+
+       ch = s[*ofs];
+
+       for (i = 0; i < len; i++)
+               if (p[i] == ch) {
+                       (*ofs)++;
+                       return 1;
+               }
+
+       return 0;
+}
+
+static int
+is_any_but(const unsigned char *p, int len, const char *s, int *ofs)
+{
+       int     i, ch;
+
+       ch = s[*ofs];
+
+       for (i = 0; i < len; i++) {
+               if (p[i] == ch)
+                       return 0;
+       }
+
+       (*ofs)++;
+       return 1;
+}
+
+static int
+match(const struct slre *r, int pc, const char *s, int len,
+               int *ofs, struct cap *caps)
+{
+       int     n, saved_offset, res = 1;
+
+       while (res && r->code[pc] != END) {
+
+               assert(pc < r->code_size);
+               assert(pc < (int) (sizeof(r->code) / sizeof(r->code[0])));
+
+               switch (r->code[pc]) {
+               case BRANCH:
+                       saved_offset = *ofs;
+                       res = match(r, pc + 3, s, len, ofs, caps);
+                       if (res == 0) {
+                               *ofs = saved_offset;
+                               res = match(r, pc + r->code[pc + 1],
+                                   s, len, ofs, caps);
+                       }
+                       pc += r->code[pc + 2];
+                       break;
+               case EXACT:
+                       res = 0;
+                       n = r->code[pc + 2];    /* String length */
+                       if (n <= len - *ofs && !memcmp(s + *ofs, r->data +
+                           r->code[pc + 1], n)) {
+                               (*ofs) += n;
+                               res = 1;
+                       }
+                       pc += 3;
+                       break;
+               case QUEST:
+                       res = 1;
+                       saved_offset = *ofs;
+                       if (!match(r, pc + 2, s, len, ofs, caps))
+                               *ofs = saved_offset;
+                       pc += r->code[pc + 1];
+                       break;
+               case STAR:
+                       res = 1;
+                       loop_greedy(r, pc, s, len, ofs);
+                       pc += r->code[pc + 1];
+                       break;
+               case STARQ:
+                       res = 1;
+                       loop_non_greedy(r, pc, s, len, ofs);
+                       pc += r->code[pc + 1];
+                       break;
+               case PLUS:
+                       res = match(r, pc + 2, s, len, ofs, caps);
+                       if (res == 0)
+                               break;
+
+                       loop_greedy(r, pc, s, len, ofs);
+                       pc += r->code[pc + 1];
+                       break;
+               case PLUSQ:
+                       res = match(r, pc + 2, s, len, ofs, caps);
+                       if (res == 0)
+                               break;
+
+                       loop_non_greedy(r, pc, s, len, ofs);
+                       pc += r->code[pc + 1];
+                       break;
+               case SPACE:
+                       res = 0;
+                       if (*ofs < len && isspace(((unsigned char *)s)[*ofs])) {
+                               (*ofs)++;
+                               res = 1;
+                       }
+                       pc++;
+                       break;
+               case NONSPACE:
+                       res = 0;
+                       if (*ofs < len &&
+                                       !isspace(((unsigned char *)s)[*ofs])) {
+                               (*ofs)++;
+                               res = 1;
+                       }
+                       pc++;
+                       break;
+               case DIGIT:
+                       res = 0;
+                       if (*ofs < len && isdigit(((unsigned char *)s)[*ofs])) {
+                               (*ofs)++;
+                               res = 1;
+                       }
+                       pc++;
+                       break;
+               case ANY:
+                       res = 0;
+                       if (*ofs < len) {
+                               (*ofs)++;
+                               res = 1;
+                       }
+                       pc++;
+                       break;
+               case ANYOF:
+                       res = 0;
+                       if (*ofs < len)
+                               res = is_any_of(r->data + r->code[pc + 1],
+                                       r->code[pc + 2], s, ofs);
+                       pc += 3;
+                       break;
+               case ANYBUT:
+                       res = 0;
+                       if (*ofs < len)
+                               res = is_any_but(r->data + r->code[pc + 1],
+                                       r->code[pc + 2], s, ofs);
+                       pc += 3;
+                       break;
+               case BOL:
+                       res = *ofs == 0 ? 1 : 0;
+                       pc++;
+                       break;
+               case EOL:
+                       res = *ofs == len ? 1 : 0;
+                       pc++;
+                       break;
+               case OPEN:
+                       if (caps != NULL)
+                               caps[r->code[pc + 1]].ptr = s + *ofs;
+                       pc += 2;
+                       break;
+               case CLOSE:
+                       if (caps != NULL)
+                               caps[r->code[pc + 1]].len = (s + *ofs) -
+                                   caps[r->code[pc + 1]].ptr;
+                       pc += 2;
+                       break;
+               case END:
+                       pc++;
+                       break;
+               default:
+                       printf("unknown cmd (%d) at %d\n", r->code[pc], pc);
+                       assert(0);
+                       break;
+               }
+       }
+
+       return res;
+}
+
+int
+slre_match(const struct slre *r, const char *buf, int len,
+               struct cap *caps)
+{
+       int     i, ofs = 0, res = 0;
+
+       if (r->anchored) {
+               res = match(r, 0, buf, len, &ofs, caps);
+       } else {
+               for (i = 0; i < len && res == 0; i++) {
+                       ofs = i;
+                       res = match(r, 0, buf, len, &ofs, caps);
+               }
+       }
+
+       return res;
+}
+
+#ifdef SLRE_TEST
+#define N_CAPS 5
+
+int main(int argc, char *argv[])
+{
+       struct slre     slre;
+       struct cap      caps[N_CAPS];
+       unsigned char   data[1 * 1024 * 1024];
+       FILE            *fp;
+       int             i, res, len;
+
+       if (argc < 2) {
+               fprintf(stderr, "Usage: %s 'slre' <file>\n", argv[0]);
+               return 1;
+       }
+
+       fp = fopen(argv[2], "rb");
+       if (fp == NULL) {
+               fprintf(stderr, "Error: cannot open %s:%s\n",
+                       argv[2], strerror(errno));
+               return 1;
+       }
+
+       if (!slre_compile(&slre, argv[1])) {
+               fprintf(stderr, "Error compiling slre: %s\n", slre.err_str);
+               return 1;
+       }
+       
+       slre_dump(&slre, stderr);
+
+       while (fgets(data, sizeof(data), fp) != NULL) {
+               len = strlen(data);
+
+               if ((len > 0) && (data[len-1] == '\n')) {
+                       data[len-1] = '\0';
+                       --len;
+               }
+
+               printf("Data = \"%s\"\n", data);
+
+               (void) memset(caps, 0, sizeof(caps));
+
+               res = 0;
+
+               res = slre_match(&slre, data, len, caps);
+               printf("Result [%d]: %d\n", i, res);
+
+               for (i = 0; i < N_CAPS; i++) {
+                       if (caps[i].len > 0) {
+                               printf("Substring %d: len=%d  [%.*s]\n", i,
+                                       caps[i].len,
+                                       caps[i].len, caps[i].ptr);
+                       }
+               }
+               printf("----------------------------------------------------\n");
+       }
+       (void) fclose(fp);
+
+       return 0;
+}
+#endif /* SLRE_TEST */