Line data Source code
1 : /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2 :
3 : /***
4 : This file is part of systemd.
5 :
6 : Copyright 2012 Kay Sievers <kay@vrfy.org>
7 :
8 : systemd is free software; you can redistribute it and/or modify it
9 : under the terms of the GNU Lesser General Public License as published by
10 : the Free Software Foundation; either version 2.1 of the License, or
11 : (at your option) any later version.
12 :
13 : systemd is distributed in the hope that it will be useful, but
14 : WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : Lesser General Public License for more details.
17 :
18 : You should have received a copy of the GNU Lesser General Public License
19 : along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 : ***/
21 :
22 : #include <stdlib.h>
23 : #include <string.h>
24 :
25 : #include "util.h"
26 : #include "strbuf.h"
27 :
28 : /*
29 : * Strbuf stores given strings in a single continuous allocated memory
30 : * area. Identical strings are de-duplicated and return the same offset
31 : * as the first string stored. If the tail of a string already exists
32 : * in the buffer, the tail is returned.
33 : *
34 : * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
35 : * information about the stored strings.
36 : *
37 : * Example of udev rules:
38 : * $ ./udevadm test .
39 : * ...
40 : * read rules file: /usr/lib/udev/rules.d/99-systemd.rules
41 : * rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
42 : * 23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
43 : * ...
44 : */
45 :
46 5 : struct strbuf *strbuf_new(void) {
47 : struct strbuf *str;
48 :
49 5 : str = new0(struct strbuf, 1);
50 5 : if (!str)
51 0 : return NULL;
52 :
53 5 : str->buf = new0(char, 1);
54 5 : if (!str->buf)
55 0 : goto err;
56 5 : str->len = 1;
57 :
58 5 : str->root = new0(struct strbuf_node, 1);
59 5 : if (!str->root)
60 0 : goto err;
61 5 : str->nodes_count = 1;
62 5 : return str;
63 : err:
64 0 : free(str->buf);
65 0 : free(str->root);
66 0 : free(str);
67 0 : return NULL;
68 : }
69 :
70 255 : static void strbuf_node_cleanup(struct strbuf_node *node) {
71 : size_t i;
72 :
73 505 : for (i = 0; i < node->children_count; i++)
74 250 : strbuf_node_cleanup(node->children[i].child);
75 255 : free(node->children);
76 255 : free(node);
77 255 : }
78 :
79 : /* clean up trie data, leave only the string buffer */
80 2 : void strbuf_complete(struct strbuf *str) {
81 2 : if (!str)
82 0 : return;
83 2 : if (str->root)
84 2 : strbuf_node_cleanup(str->root);
85 2 : str->root = NULL;
86 : }
87 :
88 : /* clean up everything */
89 5 : void strbuf_cleanup(struct strbuf *str) {
90 5 : if (!str)
91 0 : return;
92 5 : if (str->root)
93 3 : strbuf_node_cleanup(str->root);
94 5 : free(str->buf);
95 5 : free(str);
96 : }
97 :
98 2535 : static int strbuf_children_cmp(const struct strbuf_child_entry *n1,
99 : const struct strbuf_child_entry *n2) {
100 2535 : return n1->c - n2->c;
101 : }
102 :
103 250 : static void bubbleinsert(struct strbuf_node *node,
104 : uint8_t c,
105 : struct strbuf_node *node_child) {
106 :
107 250 : struct strbuf_child_entry new = {
108 : .c = c,
109 : .child = node_child,
110 : };
111 250 : int left = 0, right = node->children_count;
112 :
113 771 : while (right > left) {
114 271 : int middle = (right + left) / 2 ;
115 271 : if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
116 138 : left = middle + 1;
117 : else
118 133 : right = middle;
119 : }
120 :
121 250 : memmove(node->children + left + 1, node->children + left,
122 250 : sizeof(struct strbuf_child_entry) * (node->children_count - left));
123 250 : node->children[left] = new;
124 :
125 250 : node->children_count ++;
126 250 : }
127 :
128 : /* add string, return the index/offset into the buffer */
129 253 : ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
130 : uint8_t c;
131 : struct strbuf_node *node;
132 : size_t depth;
133 : char *buf_new;
134 : struct strbuf_child_entry *child;
135 : struct strbuf_node *node_child;
136 : ssize_t off;
137 :
138 253 : if (!str->root)
139 0 : return -EINVAL;
140 :
141 : /* search string; start from last character to find possibly matching tails */
142 253 : if (len == 0)
143 0 : return 0;
144 253 : str->in_count++;
145 253 : str->in_len += len;
146 :
147 253 : node = str->root;
148 253 : c = s[len-1];
149 1403 : for (depth = 0; depth <= len; depth++) {
150 : struct strbuf_child_entry search;
151 :
152 : /* match against current node */
153 1403 : off = node->value_off + node->value_len - len;
154 1403 : if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
155 3 : str->dedup_len += len;
156 3 : str->dedup_count++;
157 3 : return off;
158 : }
159 :
160 : /* lookup child node */
161 1400 : c = s[len - 1 - depth];
162 1400 : search.c = c;
163 1400 : child = bsearch(&search, node->children, node->children_count,
164 : sizeof(struct strbuf_child_entry),
165 : (__compar_fn_t) strbuf_children_cmp);
166 1400 : if (!child)
167 250 : break;
168 1150 : node = child->child;
169 : }
170 :
171 : /* add new string */
172 250 : buf_new = realloc(str->buf, str->len + len+1);
173 250 : if (!buf_new)
174 0 : return -ENOMEM;
175 250 : str->buf = buf_new;
176 250 : off = str->len;
177 250 : memcpy(str->buf + off, s, len);
178 250 : str->len += len;
179 250 : str->buf[str->len++] = '\0';
180 :
181 : /* new node */
182 250 : node_child = new0(struct strbuf_node, 1);
183 250 : if (!node_child)
184 0 : return -ENOMEM;
185 250 : node_child->value_off = off;
186 250 : node_child->value_len = len;
187 :
188 : /* extend array, add new entry, sort for bisection */
189 250 : child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
190 250 : if (!child) {
191 0 : free(node_child);
192 0 : return -ENOMEM;
193 : }
194 :
195 250 : str->nodes_count++;
196 :
197 250 : node->children = child;
198 250 : bubbleinsert(node, c, node_child);
199 :
200 250 : return off;
201 : }
|