LCOV - code coverage report
Current view: top level - journal - journal-file.c (source / functions) Hit Total Coverage
Test: systemd test coverage Lines: 1098 1551 70.8 %
Date: 2015-07-29 18:47:03 Functions: 63 71 88.7 %

          Line data    Source code
       1             : /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
       2             : 
       3             : /***
       4             :   This file is part of systemd.
       5             : 
       6             :   Copyright 2011 Lennart Poettering
       7             : 
       8             :   systemd is free software; you can redistribute it and/or modify it
       9             :   under the terms of the GNU Lesser General Public License as published by
      10             :   the Free Software Foundation; either version 2.1 of the License, or
      11             :   (at your option) any later version.
      12             : 
      13             :   systemd is distributed in the hope that it will be useful, but
      14             :   WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
      16             :   Lesser General Public License for more details.
      17             : 
      18             :   You should have received a copy of the GNU Lesser General Public License
      19             :   along with systemd; If not, see <http://www.gnu.org/licenses/>.
      20             : ***/
      21             : 
      22             : #include <sys/mman.h>
      23             : #include <errno.h>
      24             : #include <sys/uio.h>
      25             : #include <unistd.h>
      26             : #include <sys/statvfs.h>
      27             : #include <fcntl.h>
      28             : #include <stddef.h>
      29             : #include <linux/fs.h>
      30             : 
      31             : #include "btrfs-util.h"
      32             : #include "journal-def.h"
      33             : #include "journal-file.h"
      34             : #include "journal-authenticate.h"
      35             : #include "lookup3.h"
      36             : #include "compress.h"
      37             : #include "random-util.h"
      38             : 
      39             : #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
      40             : #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
      41             : 
      42             : #define COMPRESSION_SIZE_THRESHOLD (512ULL)
      43             : 
      44             : /* This is the minimum journal file size */
      45             : #define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL)           /* 4 MiB */
      46             : 
      47             : /* These are the lower and upper bounds if we deduce the max_use value
      48             :  * from the file system size */
      49             : #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
      50             : #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
      51             : 
      52             : /* This is the upper bound if we deduce max_size from max_use */
      53             : #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
      54             : 
      55             : /* This is the upper bound if we deduce the keep_free value from the
      56             :  * file system size */
      57             : #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
      58             : 
      59             : /* This is the keep_free value when we can't determine the system
      60             :  * size */
      61             : #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
      62             : 
      63             : /* n_data was the first entry we added after the initial file format design */
      64             : #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
      65             : 
      66             : /* How many entries to keep in the entry array chain cache at max */
      67             : #define CHAIN_CACHE_MAX 20
      68             : 
      69             : /* How much to increase the journal file size at once each time we allocate something new. */
      70             : #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
      71             : 
      72             : /* Reread fstat() of the file for detecting deletions at least this often */
      73             : #define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
      74             : 
      75             : /* The mmap context to use for the header we pick as one above the last defined typed */
      76             : #define CONTEXT_HEADER _OBJECT_TYPE_MAX
      77             : 
      78       32666 : static int journal_file_set_online(JournalFile *f) {
      79       32666 :         assert(f);
      80             : 
      81       32666 :         if (!f->writable)
      82           0 :                 return -EPERM;
      83             : 
      84       32666 :         if (!(f->fd >= 0 && f->header))
      85           0 :                 return -EINVAL;
      86             : 
      87       32666 :         if (mmap_cache_got_sigbus(f->mmap, f->fd))
      88           0 :                 return -EIO;
      89             : 
      90       32666 :         switch(f->header->state) {
      91             :                 case STATE_ONLINE:
      92       32647 :                         return 0;
      93             : 
      94             :                 case STATE_OFFLINE:
      95          19 :                         f->header->state = STATE_ONLINE;
      96          19 :                         fsync(f->fd);
      97          19 :                         return 0;
      98             : 
      99             :                 default:
     100           0 :                         return -EINVAL;
     101             :         }
     102             : }
     103             : 
     104        1365 : int journal_file_set_offline(JournalFile *f) {
     105        1365 :         assert(f);
     106             : 
     107        1365 :         if (!f->writable)
     108        1346 :                 return -EPERM;
     109             : 
     110          19 :         if (!(f->fd >= 0 && f->header))
     111           0 :                 return -EINVAL;
     112             : 
     113          19 :         if (f->header->state != STATE_ONLINE)
     114           2 :                 return 0;
     115             : 
     116          17 :         fsync(f->fd);
     117             : 
     118          17 :         if (mmap_cache_got_sigbus(f->mmap, f->fd))
     119           0 :                 return -EIO;
     120             : 
     121          17 :         f->header->state = STATE_OFFLINE;
     122             : 
     123          17 :         if (mmap_cache_got_sigbus(f->mmap, f->fd))
     124           0 :                 return -EIO;
     125             : 
     126          17 :         fsync(f->fd);
     127             : 
     128          17 :         return 0;
     129             : }
     130             : 
     131        1365 : void journal_file_close(JournalFile *f) {
     132        1365 :         assert(f);
     133             : 
     134             : #ifdef HAVE_GCRYPT
     135             :         /* Write the final tag */
     136        1365 :         if (f->seal && f->writable)
     137           0 :                 journal_file_append_tag(f);
     138             : #endif
     139             : 
     140        1365 :         journal_file_set_offline(f);
     141             : 
     142        1365 :         if (f->mmap && f->fd >= 0)
     143        1365 :                 mmap_cache_close_fd(f->mmap, f->fd);
     144             : 
     145        1365 :         if (f->fd >= 0 && f->defrag_on_close) {
     146             : 
     147             :                 /* Be friendly to btrfs: turn COW back on again now,
     148             :                  * and defragment the file. We won't write to the file
     149             :                  * ever again, hence remove all fragmentation, and
     150             :                  * reenable all the good bits COW usually provides
     151             :                  * (such as data checksumming). */
     152             : 
     153           2 :                 (void) chattr_fd(f->fd, 0, FS_NOCOW_FL);
     154           2 :                 (void) btrfs_defrag_fd(f->fd);
     155             :         }
     156             : 
     157        1365 :         safe_close(f->fd);
     158        1365 :         free(f->path);
     159             : 
     160        1365 :         if (f->mmap)
     161        1365 :                 mmap_cache_unref(f->mmap);
     162             : 
     163        1365 :         ordered_hashmap_free_free(f->chain_cache);
     164             : 
     165             : #if defined(HAVE_XZ) || defined(HAVE_LZ4)
     166        1365 :         free(f->compress_buffer);
     167             : #endif
     168             : 
     169             : #ifdef HAVE_GCRYPT
     170        1365 :         if (f->fss_file)
     171           0 :                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
     172        1365 :         else if (f->fsprg_state)
     173           0 :                 free(f->fsprg_state);
     174             : 
     175        1365 :         free(f->fsprg_seed);
     176             : 
     177        1365 :         if (f->hmac)
     178           0 :                 gcry_md_close(f->hmac);
     179             : #endif
     180             : 
     181        1365 :         free(f);
     182        1365 : }
     183             : 
     184          18 : static int journal_file_init_header(JournalFile *f, JournalFile *template) {
     185          18 :         Header h = {};
     186             :         ssize_t k;
     187             :         int r;
     188             : 
     189          18 :         assert(f);
     190             : 
     191          18 :         memcpy(h.signature, HEADER_SIGNATURE, 8);
     192          18 :         h.header_size = htole64(ALIGN64(sizeof(h)));
     193             : 
     194          18 :         h.incompatible_flags |= htole32(
     195          36 :                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
     196          18 :                 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
     197             : 
     198          18 :         h.compatible_flags = htole32(
     199          18 :                 f->seal * HEADER_COMPATIBLE_SEALED);
     200             : 
     201          18 :         r = sd_id128_randomize(&h.file_id);
     202          18 :         if (r < 0)
     203           0 :                 return r;
     204             : 
     205          18 :         if (template) {
     206           3 :                 h.seqnum_id = template->header->seqnum_id;
     207           3 :                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
     208             :         } else
     209          15 :                 h.seqnum_id = h.file_id;
     210             : 
     211          18 :         k = pwrite(f->fd, &h, sizeof(h), 0);
     212          18 :         if (k < 0)
     213           0 :                 return -errno;
     214             : 
     215          18 :         if (k != sizeof(h))
     216           0 :                 return -EIO;
     217             : 
     218          18 :         return 0;
     219             : }
     220             : 
     221          19 : static int journal_file_refresh_header(JournalFile *f) {
     222             :         sd_id128_t boot_id;
     223             :         int r;
     224             : 
     225          19 :         assert(f);
     226             : 
     227          19 :         r = sd_id128_get_machine(&f->header->machine_id);
     228          19 :         if (r < 0)
     229           0 :                 return r;
     230             : 
     231          19 :         r = sd_id128_get_boot(&boot_id);
     232          19 :         if (r < 0)
     233           0 :                 return r;
     234             : 
     235          19 :         if (sd_id128_equal(boot_id, f->header->boot_id))
     236           1 :                 f->tail_entry_monotonic_valid = true;
     237             : 
     238          19 :         f->header->boot_id = boot_id;
     239             : 
     240          19 :         r = journal_file_set_online(f);
     241             : 
     242             :         /* Sync the online state to disk */
     243          19 :         fsync(f->fd);
     244             : 
     245          19 :         return r;
     246             : }
     247             : 
     248        1347 : static int journal_file_verify_header(JournalFile *f) {
     249             :         uint32_t flags;
     250             : 
     251        1347 :         assert(f);
     252             : 
     253        1347 :         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
     254           0 :                 return -EBADMSG;
     255             : 
     256             :         /* In both read and write mode we refuse to open files with
     257             :          * incompatible flags we don't know */
     258        1347 :         flags = le32toh(f->header->incompatible_flags);
     259        1347 :         if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
     260         816 :                 if (flags & ~HEADER_INCOMPATIBLE_ANY)
     261           0 :                         log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
     262             :                                   f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
     263         816 :                 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
     264         816 :                 if (flags)
     265         816 :                         log_debug("Journal file %s uses incompatible flags %"PRIx32
     266             :                                   " disabled at compilation time.", f->path, flags);
     267         816 :                 return -EPROTONOSUPPORT;
     268             :         }
     269             : 
     270             :         /* When open for writing we refuse to open files with
     271             :          * compatible flags, too */
     272         531 :         flags = le32toh(f->header->compatible_flags);
     273         531 :         if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
     274           0 :                 if (flags & ~HEADER_COMPATIBLE_ANY)
     275           0 :                         log_debug("Journal file %s has unknown compatible flags %"PRIx32,
     276             :                                   f->path, flags & ~HEADER_COMPATIBLE_ANY);
     277           0 :                 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
     278           0 :                 if (flags)
     279           0 :                         log_debug("Journal file %s uses compatible flags %"PRIx32
     280             :                                   " disabled at compilation time.", f->path, flags);
     281           0 :                 return -EPROTONOSUPPORT;
     282             :         }
     283             : 
     284         531 :         if (f->header->state >= _STATE_MAX)
     285           0 :                 return -EBADMSG;
     286             : 
     287             :         /* The first addition was n_data, so check that we are at least this large */
     288         531 :         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
     289           0 :                 return -EBADMSG;
     290             : 
     291         531 :         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
     292           0 :                 return -EBADMSG;
     293             : 
     294         531 :         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
     295           0 :                 return -ENODATA;
     296             : 
     297         531 :         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
     298           0 :                 return -ENODATA;
     299             : 
     300        1062 :         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
     301        1062 :             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
     302        1062 :             !VALID64(le64toh(f->header->tail_object_offset)) ||
     303         531 :             !VALID64(le64toh(f->header->entry_array_offset)))
     304           0 :                 return -ENODATA;
     305             : 
     306         531 :         if (f->writable) {
     307             :                 uint8_t state;
     308             :                 sd_id128_t machine_id;
     309             :                 int r;
     310             : 
     311           1 :                 r = sd_id128_get_machine(&machine_id);
     312           1 :                 if (r < 0)
     313           0 :                         return r;
     314             : 
     315           1 :                 if (!sd_id128_equal(machine_id, f->header->machine_id))
     316           0 :                         return -EHOSTDOWN;
     317             : 
     318           1 :                 state = f->header->state;
     319             : 
     320           1 :                 if (state == STATE_ONLINE) {
     321           0 :                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
     322           0 :                         return -EBUSY;
     323           1 :                 } else if (state == STATE_ARCHIVED)
     324           0 :                         return -ESHUTDOWN;
     325           1 :                 else if (state != STATE_OFFLINE) {
     326           0 :                         log_debug("Journal file %s has unknown state %i.", f->path, state);
     327           0 :                         return -EBUSY;
     328             :                 }
     329             :         }
     330             : 
     331         531 :         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
     332         531 :         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
     333             : 
     334         531 :         f->seal = JOURNAL_HEADER_SEALED(f->header);
     335             : 
     336         531 :         return 0;
     337             : }
     338             : 
     339        1402 : static int journal_file_fstat(JournalFile *f) {
     340        1402 :         assert(f);
     341        1402 :         assert(f->fd >= 0);
     342             : 
     343        1402 :         if (fstat(f->fd, &f->last_stat) < 0)
     344           0 :                 return -errno;
     345             : 
     346        1402 :         f->last_stat_usec = now(CLOCK_MONOTONIC);
     347             : 
     348             :         /* Refuse appending to files that are already deleted */
     349        1402 :         if (f->last_stat.st_nlink <= 0)
     350           0 :                 return -EIDRM;
     351             : 
     352        1402 :         return 0;
     353             : }
     354             : 
     355       32647 : static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
     356             :         uint64_t old_size, new_size;
     357             :         int r;
     358             : 
     359       32647 :         assert(f);
     360             : 
     361             :         /* We assume that this file is not sparse, and we know that
     362             :          * for sure, since we always call posix_fallocate()
     363             :          * ourselves */
     364             : 
     365       32647 :         if (mmap_cache_got_sigbus(f->mmap, f->fd))
     366           0 :                 return -EIO;
     367             : 
     368       32647 :         old_size =
     369       32647 :                 le64toh(f->header->header_size) +
     370       32647 :                 le64toh(f->header->arena_size);
     371             : 
     372       32647 :         new_size = PAGE_ALIGN(offset + size);
     373       32647 :         if (new_size < le64toh(f->header->header_size))
     374           0 :                 new_size = le64toh(f->header->header_size);
     375             : 
     376       32647 :         if (new_size <= old_size) {
     377             : 
     378             :                 /* We already pre-allocated enough space, but before
     379             :                  * we write to it, let's check with fstat() if the
     380             :                  * file got deleted, in order make sure we don't throw
     381             :                  * away the data immediately. Don't check fstat() for
     382             :                  * all writes though, but only once ever 10s. */
     383             : 
     384       32628 :                 if (f->last_stat_usec + LAST_STAT_REFRESH_USEC > now(CLOCK_MONOTONIC))
     385       32628 :                         return 0;
     386             : 
     387           0 :                 return journal_file_fstat(f);
     388             :         }
     389             : 
     390             :         /* Allocate more space. */
     391             : 
     392          19 :         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
     393           0 :                 return -E2BIG;
     394             : 
     395          19 :         if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
     396             :                 struct statvfs svfs;
     397             : 
     398           0 :                 if (fstatvfs(f->fd, &svfs) >= 0) {
     399             :                         uint64_t available;
     400             : 
     401           0 :                         available = svfs.f_bfree * svfs.f_bsize;
     402             : 
     403           0 :                         if (available >= f->metrics.keep_free)
     404           0 :                                 available -= f->metrics.keep_free;
     405             :                         else
     406           0 :                                 available = 0;
     407             : 
     408           0 :                         if (new_size - old_size > available)
     409           0 :                                 return -E2BIG;
     410             :                 }
     411             :         }
     412             : 
     413             :         /* Increase by larger blocks at once */
     414          19 :         new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
     415          19 :         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
     416           0 :                 new_size = f->metrics.max_size;
     417             : 
     418             :         /* Note that the glibc fallocate() fallback is very
     419             :            inefficient, hence we try to minimize the allocation area
     420             :            as we can. */
     421          19 :         r = posix_fallocate(f->fd, old_size, new_size - old_size);
     422          19 :         if (r != 0)
     423           0 :                 return -r;
     424             : 
     425          19 :         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
     426             : 
     427          19 :         return journal_file_fstat(f);
     428             : }
     429             : 
     430     3886752 : static unsigned type_to_context(ObjectType type) {
     431             :         /* One context for each type, plus one catch-all for the rest */
     432             :         assert_cc(_OBJECT_TYPE_MAX <= MMAP_CACHE_MAX_CONTEXTS);
     433             :         assert_cc(CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS);
     434     3886752 :         return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
     435             : }
     436             : 
     437     3886752 : static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
     438             :         int r;
     439             : 
     440     3886752 :         assert(f);
     441     3886752 :         assert(ret);
     442             : 
     443     3886752 :         if (size <= 0)
     444           0 :                 return -EINVAL;
     445             : 
     446             :         /* Avoid SIGBUS on invalid accesses */
     447     3886752 :         if (offset + size > (uint64_t) f->last_stat.st_size) {
     448             :                 /* Hmm, out of range? Let's refresh the fstat() data
     449             :                  * first, before we trust that check. */
     450             : 
     451           0 :                 r = journal_file_fstat(f);
     452           0 :                 if (r < 0)
     453           0 :                         return r;
     454             : 
     455           0 :                 if (offset + size > (uint64_t) f->last_stat.st_size)
     456           0 :                         return -EADDRNOTAVAIL;
     457             :         }
     458             : 
     459     3886752 :         return mmap_cache_get(f->mmap, f->fd, f->prot, type_to_context(type), keep_always, offset, size, &f->last_stat, ret);
     460             : }
     461             : 
     462     1927034 : static uint64_t minimum_header_size(Object *o) {
     463             : 
     464             :         static const uint64_t table[] = {
     465             :                 [OBJECT_DATA] = sizeof(DataObject),
     466             :                 [OBJECT_FIELD] = sizeof(FieldObject),
     467             :                 [OBJECT_ENTRY] = sizeof(EntryObject),
     468             :                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
     469             :                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
     470             :                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
     471             :                 [OBJECT_TAG] = sizeof(TagObject),
     472             :         };
     473             : 
     474     1927034 :         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
     475           0 :                 return sizeof(ObjectHeader);
     476             : 
     477     1927034 :         return table[o->object.type];
     478             : }
     479             : 
     480     1927034 : int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret) {
     481             :         int r;
     482             :         void *t;
     483             :         Object *o;
     484             :         uint64_t s;
     485             : 
     486     1927034 :         assert(f);
     487     1927034 :         assert(ret);
     488             : 
     489             :         /* Objects may only be located at multiple of 64 bit */
     490     1927034 :         if (!VALID64(offset))
     491           0 :                 return -EFAULT;
     492             : 
     493     1927034 :         r = journal_file_move_to(f, type, false, offset, sizeof(ObjectHeader), &t);
     494     1927034 :         if (r < 0)
     495           0 :                 return r;
     496             : 
     497     1927034 :         o = (Object*) t;
     498     1927034 :         s = le64toh(o->object.size);
     499             : 
     500     1927034 :         if (s < sizeof(ObjectHeader))
     501           0 :                 return -EBADMSG;
     502             : 
     503     1927034 :         if (o->object.type <= OBJECT_UNUSED)
     504           0 :                 return -EBADMSG;
     505             : 
     506     1927034 :         if (s < minimum_header_size(o))
     507           0 :                 return -EBADMSG;
     508             : 
     509     1927034 :         if (type > OBJECT_UNUSED && o->object.type != type)
     510           0 :                 return -EBADMSG;
     511             : 
     512     1927034 :         if (s > sizeof(ObjectHeader)) {
     513     1927034 :                 r = journal_file_move_to(f, type, false, offset, s, &t);
     514     1927034 :                 if (r < 0)
     515           0 :                         return r;
     516             : 
     517     1927034 :                 o = (Object*) t;
     518             :         }
     519             : 
     520     1927034 :         *ret = o;
     521     1927034 :         return 0;
     522             : }
     523             : 
     524       16279 : static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
     525             :         uint64_t r;
     526             : 
     527       16279 :         assert(f);
     528             : 
     529       16279 :         r = le64toh(f->header->tail_entry_seqnum) + 1;
     530             : 
     531       16279 :         if (seqnum) {
     532             :                 /* If an external seqnum counter was passed, we update
     533             :                  * both the local and the external one, and set it to
     534             :                  * the maximum of both */
     535             : 
     536           7 :                 if (*seqnum + 1 > r)
     537           1 :                         r = *seqnum + 1;
     538             : 
     539           7 :                 *seqnum = r;
     540             :         }
     541             : 
     542       16279 :         f->header->tail_entry_seqnum = htole64(r);
     543             : 
     544       16279 :         if (f->header->head_entry_seqnum == 0)
     545          12 :                 f->header->head_entry_seqnum = htole64(r);
     546             : 
     547       16279 :         return r;
     548             : }
     549             : 
     550       32647 : int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, Object **ret, uint64_t *offset) {
     551             :         int r;
     552             :         uint64_t p;
     553             :         Object *tail, *o;
     554             :         void *t;
     555             : 
     556       32647 :         assert(f);
     557       32647 :         assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
     558       32647 :         assert(size >= sizeof(ObjectHeader));
     559       32647 :         assert(offset);
     560       32647 :         assert(ret);
     561             : 
     562       32647 :         r = journal_file_set_online(f);
     563       32647 :         if (r < 0)
     564           0 :                 return r;
     565             : 
     566       32647 :         p = le64toh(f->header->tail_object_offset);
     567       32647 :         if (p == 0)
     568          18 :                 p = le64toh(f->header->header_size);
     569             :         else {
     570       32629 :                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
     571       32629 :                 if (r < 0)
     572           0 :                         return r;
     573             : 
     574       32629 :                 p += ALIGN64(le64toh(tail->object.size));
     575             :         }
     576             : 
     577       32647 :         r = journal_file_allocate(f, p, size);
     578       32647 :         if (r < 0)
     579           0 :                 return r;
     580             : 
     581       32647 :         r = journal_file_move_to(f, type, false, p, size, &t);
     582       32647 :         if (r < 0)
     583           0 :                 return r;
     584             : 
     585       32647 :         o = (Object*) t;
     586             : 
     587       32647 :         zero(o->object);
     588       32647 :         o->object.type = type;
     589       32647 :         o->object.size = htole64(size);
     590             : 
     591       32647 :         f->header->tail_object_offset = htole64(p);
     592       32647 :         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
     593             : 
     594       32647 :         *ret = o;
     595       32647 :         *offset = p;
     596             : 
     597       32647 :         return 0;
     598             : }
     599             : 
     600          18 : static int journal_file_setup_data_hash_table(JournalFile *f) {
     601             :         uint64_t s, p;
     602             :         Object *o;
     603             :         int r;
     604             : 
     605          18 :         assert(f);
     606             : 
     607             :         /* We estimate that we need 1 hash table entry per 768 of
     608             :            journal file and we want to make sure we never get beyond
     609             :            75% fill level. Calculate the hash table size for the
     610             :            maximum file size based on these metrics. */
     611             : 
     612          18 :         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
     613          18 :         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
     614          18 :                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
     615             : 
     616          18 :         log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
     617             : 
     618          18 :         r = journal_file_append_object(f,
     619             :                                        OBJECT_DATA_HASH_TABLE,
     620             :                                        offsetof(Object, hash_table.items) + s,
     621             :                                        &o, &p);
     622          18 :         if (r < 0)
     623           0 :                 return r;
     624             : 
     625          18 :         memzero(o->hash_table.items, s);
     626             : 
     627          18 :         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
     628          18 :         f->header->data_hash_table_size = htole64(s);
     629             : 
     630          18 :         return 0;
     631             : }
     632             : 
     633          18 : static int journal_file_setup_field_hash_table(JournalFile *f) {
     634             :         uint64_t s, p;
     635             :         Object *o;
     636             :         int r;
     637             : 
     638          18 :         assert(f);
     639             : 
     640             :         /* We use a fixed size hash table for the fields as this
     641             :          * number should grow very slowly only */
     642             : 
     643          18 :         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
     644          18 :         r = journal_file_append_object(f,
     645             :                                        OBJECT_FIELD_HASH_TABLE,
     646             :                                        offsetof(Object, hash_table.items) + s,
     647             :                                        &o, &p);
     648          18 :         if (r < 0)
     649           0 :                 return r;
     650             : 
     651          18 :         memzero(o->hash_table.items, s);
     652             : 
     653          18 :         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
     654          18 :         f->header->field_hash_table_size = htole64(s);
     655             : 
     656          18 :         return 0;
     657             : }
     658             : 
     659      165946 : int journal_file_map_data_hash_table(JournalFile *f) {
     660             :         uint64_t s, p;
     661             :         void *t;
     662             :         int r;
     663             : 
     664      165946 :         assert(f);
     665             : 
     666      165946 :         if (f->data_hash_table)
     667      165925 :                 return 0;
     668             : 
     669          21 :         p = le64toh(f->header->data_hash_table_offset);
     670          21 :         s = le64toh(f->header->data_hash_table_size);
     671             : 
     672          21 :         r = journal_file_move_to(f,
     673             :                                  OBJECT_DATA_HASH_TABLE,
     674             :                                  true,
     675             :                                  p, s,
     676             :                                  &t);
     677          21 :         if (r < 0)
     678           0 :                 return r;
     679             : 
     680          21 :         f->data_hash_table = t;
     681          21 :         return 0;
     682             : }
     683             : 
     684       11306 : int journal_file_map_field_hash_table(JournalFile *f) {
     685             :         uint64_t s, p;
     686             :         void *t;
     687             :         int r;
     688             : 
     689       11306 :         assert(f);
     690             : 
     691       11306 :         if (f->field_hash_table)
     692       11290 :                 return 0;
     693             : 
     694          16 :         p = le64toh(f->header->field_hash_table_offset);
     695          16 :         s = le64toh(f->header->field_hash_table_size);
     696             : 
     697          16 :         r = journal_file_move_to(f,
     698             :                                  OBJECT_FIELD_HASH_TABLE,
     699             :                                  true,
     700             :                                  p, s,
     701             :                                  &t);
     702          16 :         if (r < 0)
     703           0 :                 return r;
     704             : 
     705          16 :         f->field_hash_table = t;
     706          16 :         return 0;
     707             : }
     708             : 
     709          71 : static int journal_file_link_field(
     710             :                 JournalFile *f,
     711             :                 Object *o,
     712             :                 uint64_t offset,
     713             :                 uint64_t hash) {
     714             : 
     715             :         uint64_t p, h, m;
     716             :         int r;
     717             : 
     718          71 :         assert(f);
     719          71 :         assert(o);
     720          71 :         assert(offset > 0);
     721             : 
     722          71 :         if (o->object.type != OBJECT_FIELD)
     723           0 :                 return -EINVAL;
     724             : 
     725          71 :         m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
     726          71 :         if (m <= 0)
     727           0 :                 return -EBADMSG;
     728             : 
     729             :         /* This might alter the window we are looking at */
     730          71 :         o->field.next_hash_offset = o->field.head_data_offset = 0;
     731             : 
     732          71 :         h = hash % m;
     733          71 :         p = le64toh(f->field_hash_table[h].tail_hash_offset);
     734          71 :         if (p == 0)
     735          65 :                 f->field_hash_table[h].head_hash_offset = htole64(offset);
     736             :         else {
     737           6 :                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
     738           6 :                 if (r < 0)
     739           0 :                         return r;
     740             : 
     741           6 :                 o->field.next_hash_offset = htole64(offset);
     742             :         }
     743             : 
     744          71 :         f->field_hash_table[h].tail_hash_offset = htole64(offset);
     745             : 
     746          71 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
     747          71 :                 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
     748             : 
     749          71 :         return 0;
     750             : }
     751             : 
     752       11303 : static int journal_file_link_data(
     753             :                 JournalFile *f,
     754             :                 Object *o,
     755             :                 uint64_t offset,
     756             :                 uint64_t hash) {
     757             : 
     758             :         uint64_t p, h, m;
     759             :         int r;
     760             : 
     761       11303 :         assert(f);
     762       11303 :         assert(o);
     763       11303 :         assert(offset > 0);
     764             : 
     765       11303 :         if (o->object.type != OBJECT_DATA)
     766           0 :                 return -EINVAL;
     767             : 
     768       11303 :         m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
     769       11303 :         if (m <= 0)
     770           0 :                 return -EBADMSG;
     771             : 
     772             :         /* This might alter the window we are looking at */
     773       11303 :         o->data.next_hash_offset = o->data.next_field_offset = 0;
     774       11303 :         o->data.entry_offset = o->data.entry_array_offset = 0;
     775       11303 :         o->data.n_entries = 0;
     776             : 
     777       11303 :         h = hash % m;
     778       11303 :         p = le64toh(f->data_hash_table[h].tail_hash_offset);
     779       11303 :         if (p == 0)
     780             :                 /* Only entry in the hash table is easy */
     781        2378 :                 f->data_hash_table[h].head_hash_offset = htole64(offset);
     782             :         else {
     783             :                 /* Move back to the previous data object, to patch in
     784             :                  * pointer */
     785             : 
     786        8925 :                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
     787        8925 :                 if (r < 0)
     788           0 :                         return r;
     789             : 
     790        8925 :                 o->data.next_hash_offset = htole64(offset);
     791             :         }
     792             : 
     793       11303 :         f->data_hash_table[h].tail_hash_offset = htole64(offset);
     794             : 
     795       11303 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
     796       11303 :                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
     797             : 
     798       11303 :         return 0;
     799             : }
     800             : 
     801       11306 : int journal_file_find_field_object_with_hash(
     802             :                 JournalFile *f,
     803             :                 const void *field, uint64_t size, uint64_t hash,
     804             :                 Object **ret, uint64_t *offset) {
     805             : 
     806             :         uint64_t p, osize, h, m;
     807             :         int r;
     808             : 
     809       11306 :         assert(f);
     810       11306 :         assert(field && size > 0);
     811             : 
     812             :         /* If the field hash table is empty, we can't find anything */
     813       11306 :         if (le64toh(f->header->field_hash_table_size) <= 0)
     814           0 :                 return 0;
     815             : 
     816             :         /* Map the field hash table, if it isn't mapped yet. */
     817       11306 :         r = journal_file_map_field_hash_table(f);
     818       11306 :         if (r < 0)
     819           0 :                 return r;
     820             : 
     821       11306 :         osize = offsetof(Object, field.payload) + size;
     822             : 
     823       11306 :         m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
     824       11306 :         if (m <= 0)
     825           0 :                 return -EBADMSG;
     826             : 
     827       11306 :         h = hash % m;
     828       11306 :         p = le64toh(f->field_hash_table[h].head_hash_offset);
     829             : 
     830       11306 :         while (p > 0) {
     831             :                 Object *o;
     832             : 
     833       11248 :                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
     834       11248 :                 if (r < 0)
     835       11235 :                         return r;
     836             : 
     837       22483 :                 if (le64toh(o->field.hash) == hash &&
     838       22470 :                     le64toh(o->object.size) == osize &&
     839       11235 :                     memcmp(o->field.payload, field, size) == 0) {
     840             : 
     841       11235 :                         if (ret)
     842       11235 :                                 *ret = o;
     843       11235 :                         if (offset)
     844       11232 :                                 *offset = p;
     845             : 
     846       11235 :                         return 1;
     847             :                 }
     848             : 
     849          13 :                 p = le64toh(o->field.next_hash_offset);
     850             :         }
     851             : 
     852          71 :         return 0;
     853             : }
     854             : 
     855           3 : int journal_file_find_field_object(
     856             :                 JournalFile *f,
     857             :                 const void *field, uint64_t size,
     858             :                 Object **ret, uint64_t *offset) {
     859             : 
     860             :         uint64_t hash;
     861             : 
     862           3 :         assert(f);
     863           3 :         assert(field && size > 0);
     864             : 
     865           3 :         hash = hash64(field, size);
     866             : 
     867           3 :         return journal_file_find_field_object_with_hash(f,
     868             :                                                         field, size, hash,
     869             :                                                         ret, offset);
     870             : }
     871             : 
     872      159945 : int journal_file_find_data_object_with_hash(
     873             :                 JournalFile *f,
     874             :                 const void *data, uint64_t size, uint64_t hash,
     875             :                 Object **ret, uint64_t *offset) {
     876             : 
     877             :         uint64_t p, osize, h, m;
     878             :         int r;
     879             : 
     880      159945 :         assert(f);
     881      159945 :         assert(data || size == 0);
     882             : 
     883             :         /* If there's no data hash table, then there's no entry. */
     884      159945 :         if (le64toh(f->header->data_hash_table_size) <= 0)
     885           0 :                 return 0;
     886             : 
     887             :         /* Map the data hash table, if it isn't mapped yet. */
     888      159945 :         r = journal_file_map_data_hash_table(f);
     889      159945 :         if (r < 0)
     890           0 :                 return r;
     891             : 
     892      159945 :         osize = offsetof(Object, data.payload) + size;
     893             : 
     894      159945 :         m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
     895      159945 :         if (m <= 0)
     896           0 :                 return -EBADMSG;
     897             : 
     898      159945 :         h = hash % m;
     899      159945 :         p = le64toh(f->data_hash_table[h].head_hash_offset);
     900             : 
     901      159945 :         while (p > 0) {
     902             :                 Object *o;
     903             : 
     904      282621 :                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
     905      282621 :                 if (r < 0)
     906      148261 :                         return r;
     907             : 
     908      282621 :                 if (le64toh(o->data.hash) != hash)
     909      134360 :                         goto next;
     910             : 
     911      148261 :                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
     912             : #if defined(HAVE_XZ) || defined(HAVE_LZ4)
     913             :                         uint64_t l;
     914           0 :                         size_t rsize = 0;
     915             : 
     916           0 :                         l = le64toh(o->object.size);
     917           0 :                         if (l <= offsetof(Object, data.payload))
     918           0 :                                 return -EBADMSG;
     919             : 
     920           0 :                         l -= offsetof(Object, data.payload);
     921             : 
     922           0 :                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
     923           0 :                                             o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
     924           0 :                         if (r < 0)
     925           0 :                                 return r;
     926             : 
     927           0 :                         if (rsize == size &&
     928           0 :                             memcmp(f->compress_buffer, data, size) == 0) {
     929             : 
     930           0 :                                 if (ret)
     931           0 :                                         *ret = o;
     932             : 
     933           0 :                                 if (offset)
     934           0 :                                         *offset = p;
     935             : 
     936           0 :                                 return 1;
     937             :                         }
     938             : #else
     939             :                         return -EPROTONOSUPPORT;
     940             : #endif
     941      296522 :                 } else if (le64toh(o->object.size) == osize &&
     942      148261 :                            memcmp(o->data.payload, data, size) == 0) {
     943             : 
     944      148261 :                         if (ret)
     945      148069 :                                 *ret = o;
     946             : 
     947      148261 :                         if (offset)
     948      148261 :                                 *offset = p;
     949             : 
     950      148261 :                         return 1;
     951             :                 }
     952             : 
     953             :         next:
     954      134360 :                 p = le64toh(o->data.next_hash_offset);
     955             :         }
     956             : 
     957       11684 :         return 0;
     958             : }
     959             : 
     960           7 : int journal_file_find_data_object(
     961             :                 JournalFile *f,
     962             :                 const void *data, uint64_t size,
     963             :                 Object **ret, uint64_t *offset) {
     964             : 
     965             :         uint64_t hash;
     966             : 
     967           7 :         assert(f);
     968           7 :         assert(data || size == 0);
     969             : 
     970           7 :         hash = hash64(data, size);
     971             : 
     972           7 :         return journal_file_find_data_object_with_hash(f,
     973             :                                                        data, size, hash,
     974             :                                                        ret, offset);
     975             : }
     976             : 
     977       11303 : static int journal_file_append_field(
     978             :                 JournalFile *f,
     979             :                 const void *field, uint64_t size,
     980             :                 Object **ret, uint64_t *offset) {
     981             : 
     982             :         uint64_t hash, p;
     983             :         uint64_t osize;
     984             :         Object *o;
     985             :         int r;
     986             : 
     987       11303 :         assert(f);
     988       11303 :         assert(field && size > 0);
     989             : 
     990       11303 :         hash = hash64(field, size);
     991             : 
     992       11303 :         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
     993       11303 :         if (r < 0)
     994           0 :                 return r;
     995       11303 :         else if (r > 0) {
     996             : 
     997       11232 :                 if (ret)
     998       11232 :                         *ret = o;
     999             : 
    1000       11232 :                 if (offset)
    1001       11232 :                         *offset = p;
    1002             : 
    1003       11232 :                 return 0;
    1004             :         }
    1005             : 
    1006          71 :         osize = offsetof(Object, field.payload) + size;
    1007          71 :         r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
    1008          71 :         if (r < 0)
    1009           0 :                 return r;
    1010             : 
    1011          71 :         o->field.hash = htole64(hash);
    1012          71 :         memcpy(o->field.payload, field, size);
    1013             : 
    1014          71 :         r = journal_file_link_field(f, o, p, hash);
    1015          71 :         if (r < 0)
    1016           0 :                 return r;
    1017             : 
    1018             :         /* The linking might have altered the window, so let's
    1019             :          * refresh our pointer */
    1020          71 :         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
    1021          71 :         if (r < 0)
    1022           0 :                 return r;
    1023             : 
    1024             : #ifdef HAVE_GCRYPT
    1025          71 :         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
    1026          71 :         if (r < 0)
    1027           0 :                 return r;
    1028             : #endif
    1029             : 
    1030          71 :         if (ret)
    1031          71 :                 *ret = o;
    1032             : 
    1033          71 :         if (offset)
    1034          71 :                 *offset = p;
    1035             : 
    1036          71 :         return 0;
    1037             : }
    1038             : 
    1039      159312 : static int journal_file_append_data(
    1040             :                 JournalFile *f,
    1041             :                 const void *data, uint64_t size,
    1042             :                 Object **ret, uint64_t *offset) {
    1043             : 
    1044             :         uint64_t hash, p;
    1045             :         uint64_t osize;
    1046             :         Object *o;
    1047      159312 :         int r, compression = 0;
    1048             :         const void *eq;
    1049             : 
    1050      159312 :         assert(f);
    1051      159312 :         assert(data || size == 0);
    1052             : 
    1053      159312 :         hash = hash64(data, size);
    1054             : 
    1055      159312 :         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
    1056      159312 :         if (r < 0)
    1057           0 :                 return r;
    1058      159312 :         else if (r > 0) {
    1059             : 
    1060      148009 :                 if (ret)
    1061      148009 :                         *ret = o;
    1062             : 
    1063      148009 :                 if (offset)
    1064      148009 :                         *offset = p;
    1065             : 
    1066      148009 :                 return 0;
    1067             :         }
    1068             : 
    1069       11303 :         osize = offsetof(Object, data.payload) + size;
    1070       11303 :         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
    1071       11303 :         if (r < 0)
    1072           0 :                 return r;
    1073             : 
    1074       11303 :         o->data.hash = htole64(hash);
    1075             : 
    1076             : #if defined(HAVE_XZ) || defined(HAVE_LZ4)
    1077       11303 :         if (f->compress_xz &&
    1078             :             size >= COMPRESSION_SIZE_THRESHOLD) {
    1079           0 :                 size_t rsize = 0;
    1080             : 
    1081           0 :                 compression = compress_blob(data, size, o->data.payload, &rsize);
    1082             : 
    1083           0 :                 if (compression) {
    1084           0 :                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
    1085           0 :                         o->object.flags |= compression;
    1086             : 
    1087           0 :                         log_debug("Compressed data object %"PRIu64" -> %zu using %s",
    1088             :                                   size, rsize, object_compressed_to_string(compression));
    1089             :                 }
    1090             :         }
    1091             : #endif
    1092             : 
    1093       11303 :         if (!compression && size > 0)
    1094       11303 :                 memcpy(o->data.payload, data, size);
    1095             : 
    1096       11303 :         r = journal_file_link_data(f, o, p, hash);
    1097       11303 :         if (r < 0)
    1098           0 :                 return r;
    1099             : 
    1100             :         /* The linking might have altered the window, so let's
    1101             :          * refresh our pointer */
    1102       11303 :         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
    1103       11303 :         if (r < 0)
    1104           0 :                 return r;
    1105             : 
    1106       11303 :         if (!data)
    1107           0 :                 eq = NULL;
    1108             :         else
    1109       11303 :                 eq = memchr(data, '=', size);
    1110       11303 :         if (eq && eq > data) {
    1111       11303 :                 Object *fo = NULL;
    1112             :                 uint64_t fp;
    1113             : 
    1114             :                 /* Create field object ... */
    1115       11303 :                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
    1116       11303 :                 if (r < 0)
    1117           0 :                         return r;
    1118             : 
    1119             :                 /* ... and link it in. */
    1120       11303 :                 o->data.next_field_offset = fo->field.head_data_offset;
    1121       11303 :                 fo->field.head_data_offset = le64toh(p);
    1122             :         }
    1123             : 
    1124             : #ifdef HAVE_GCRYPT
    1125       11303 :         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
    1126       11303 :         if (r < 0)
    1127           0 :                 return r;
    1128             : #endif
    1129             : 
    1130       11303 :         if (ret)
    1131       11303 :                 *ret = o;
    1132             : 
    1133       11303 :         if (offset)
    1134       11303 :                 *offset = p;
    1135             : 
    1136       11303 :         return 0;
    1137             : }
    1138             : 
    1139       50876 : uint64_t journal_file_entry_n_items(Object *o) {
    1140       50876 :         assert(o);
    1141             : 
    1142       50876 :         if (o->object.type != OBJECT_ENTRY)
    1143           0 :                 return 0;
    1144             : 
    1145       50876 :         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
    1146             : }
    1147             : 
    1148     1043534 : uint64_t journal_file_entry_array_n_items(Object *o) {
    1149     1043534 :         assert(o);
    1150             : 
    1151     1043534 :         if (o->object.type != OBJECT_ENTRY_ARRAY)
    1152           0 :                 return 0;
    1153             : 
    1154     1043534 :         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
    1155             : }
    1156             : 
    1157        2382 : uint64_t journal_file_hash_table_n_items(Object *o) {
    1158        2382 :         assert(o);
    1159             : 
    1160        2716 :         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
    1161         334 :             o->object.type != OBJECT_FIELD_HASH_TABLE)
    1162           0 :                 return 0;
    1163             : 
    1164        2382 :         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
    1165             : }
    1166             : 
    1167      164288 : static int link_entry_into_array(JournalFile *f,
    1168             :                                  le64_t *first,
    1169             :                                  le64_t *idx,
    1170             :                                  uint64_t p) {
    1171             :         int r;
    1172      164288 :         uint64_t n = 0, ap = 0, q, i, a, hidx;
    1173             :         Object *o;
    1174             : 
    1175      164288 :         assert(f);
    1176      164288 :         assert(first);
    1177      164288 :         assert(idx);
    1178      164288 :         assert(p > 0);
    1179             : 
    1180      164288 :         a = le64toh(*first);
    1181      164288 :         i = hidx = le64toh(*idx);
    1182     1129581 :         while (a > 0) {
    1183             : 
    1184      960335 :                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
    1185      960335 :                 if (r < 0)
    1186           0 :                         return r;
    1187             : 
    1188      960335 :                 n = journal_file_entry_array_n_items(o);
    1189      960335 :                 if (i < n) {
    1190      159330 :                         o->entry_array.items[i] = htole64(p);
    1191      159330 :                         *idx = htole64(hidx + 1);
    1192      159330 :                         return 0;
    1193             :                 }
    1194             : 
    1195      801005 :                 i -= n;
    1196      801005 :                 ap = a;
    1197      801005 :                 a = le64toh(o->entry_array.next_entry_array_offset);
    1198             :         }
    1199             : 
    1200        4958 :         if (hidx > n)
    1201         907 :                 n = (hidx+1) * 2;
    1202             :         else
    1203        4051 :                 n = n * 2;
    1204             : 
    1205        4958 :         if (n < 4)
    1206        2619 :                 n = 4;
    1207             : 
    1208        4958 :         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
    1209             :                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
    1210             :                                        &o, &q);
    1211        4958 :         if (r < 0)
    1212           0 :                 return r;
    1213             : 
    1214             : #ifdef HAVE_GCRYPT
    1215        4958 :         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
    1216        4958 :         if (r < 0)
    1217           0 :                 return r;
    1218             : #endif
    1219             : 
    1220        4958 :         o->entry_array.items[i] = htole64(p);
    1221             : 
    1222        4958 :         if (ap == 0)
    1223        2619 :                 *first = htole64(q);
    1224             :         else {
    1225        2339 :                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
    1226        2339 :                 if (r < 0)
    1227           0 :                         return r;
    1228             : 
    1229        2339 :                 o->entry_array.next_entry_array_offset = htole64(q);
    1230             :         }
    1231             : 
    1232        4958 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
    1233        4958 :                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
    1234             : 
    1235        4958 :         *idx = htole64(hidx + 1);
    1236             : 
    1237        4958 :         return 0;
    1238             : }
    1239             : 
    1240      159312 : static int link_entry_into_array_plus_one(JournalFile *f,
    1241             :                                           le64_t *extra,
    1242             :                                           le64_t *first,
    1243             :                                           le64_t *idx,
    1244             :                                           uint64_t p) {
    1245             : 
    1246             :         int r;
    1247             : 
    1248      159312 :         assert(f);
    1249      159312 :         assert(extra);
    1250      159312 :         assert(first);
    1251      159312 :         assert(idx);
    1252      159312 :         assert(p > 0);
    1253             : 
    1254      159312 :         if (*idx == 0)
    1255       11303 :                 *extra = htole64(p);
    1256             :         else {
    1257             :                 le64_t i;
    1258             : 
    1259      148009 :                 i = htole64(le64toh(*idx) - 1);
    1260      148009 :                 r = link_entry_into_array(f, first, &i, p);
    1261      148009 :                 if (r < 0)
    1262           0 :                         return r;
    1263             :         }
    1264             : 
    1265      159312 :         *idx = htole64(le64toh(*idx) + 1);
    1266      159312 :         return 0;
    1267             : }
    1268             : 
    1269      159312 : static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
    1270             :         uint64_t p;
    1271             :         int r;
    1272      159312 :         assert(f);
    1273      159312 :         assert(o);
    1274      159312 :         assert(offset > 0);
    1275             : 
    1276      159312 :         p = le64toh(o->entry.items[i].object_offset);
    1277      159312 :         if (p == 0)
    1278           0 :                 return -EINVAL;
    1279             : 
    1280      159312 :         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
    1281      159312 :         if (r < 0)
    1282           0 :                 return r;
    1283             : 
    1284      477936 :         return link_entry_into_array_plus_one(f,
    1285      159312 :                                               &o->data.entry_offset,
    1286      159312 :                                               &o->data.entry_array_offset,
    1287      159312 :                                               &o->data.n_entries,
    1288             :                                               offset);
    1289             : }
    1290             : 
    1291       16279 : static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
    1292             :         uint64_t n, i;
    1293             :         int r;
    1294             : 
    1295       16279 :         assert(f);
    1296       16279 :         assert(o);
    1297       16279 :         assert(offset > 0);
    1298             : 
    1299       16279 :         if (o->object.type != OBJECT_ENTRY)
    1300           0 :                 return -EINVAL;
    1301             : 
    1302       16279 :         __sync_synchronize();
    1303             : 
    1304             :         /* Link up the entry itself */
    1305       32558 :         r = link_entry_into_array(f,
    1306       16279 :                                   &f->header->entry_array_offset,
    1307       16279 :                                   &f->header->n_entries,
    1308             :                                   offset);
    1309       16279 :         if (r < 0)
    1310           0 :                 return r;
    1311             : 
    1312             :         /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
    1313             : 
    1314       16279 :         if (f->header->head_entry_realtime == 0)
    1315          12 :                 f->header->head_entry_realtime = o->entry.realtime;
    1316             : 
    1317       16279 :         f->header->tail_entry_realtime = o->entry.realtime;
    1318       16279 :         f->header->tail_entry_monotonic = o->entry.monotonic;
    1319             : 
    1320       16279 :         f->tail_entry_monotonic_valid = true;
    1321             : 
    1322             :         /* Link up the items */
    1323       16279 :         n = journal_file_entry_n_items(o);
    1324      175591 :         for (i = 0; i < n; i++) {
    1325      159312 :                 r = journal_file_link_entry_item(f, o, offset, i);
    1326      159312 :                 if (r < 0)
    1327           0 :                         return r;
    1328             :         }
    1329             : 
    1330       16279 :         return 0;
    1331             : }
    1332             : 
    1333       16279 : static int journal_file_append_entry_internal(
    1334             :                 JournalFile *f,
    1335             :                 const dual_timestamp *ts,
    1336             :                 uint64_t xor_hash,
    1337             :                 const EntryItem items[], unsigned n_items,
    1338             :                 uint64_t *seqnum,
    1339             :                 Object **ret, uint64_t *offset) {
    1340             :         uint64_t np;
    1341             :         uint64_t osize;
    1342             :         Object *o;
    1343             :         int r;
    1344             : 
    1345       16279 :         assert(f);
    1346       16279 :         assert(items || n_items == 0);
    1347       16279 :         assert(ts);
    1348             : 
    1349       16279 :         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
    1350             : 
    1351       16279 :         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
    1352       16279 :         if (r < 0)
    1353           0 :                 return r;
    1354             : 
    1355       16279 :         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
    1356       16279 :         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
    1357       16279 :         o->entry.realtime = htole64(ts->realtime);
    1358       16279 :         o->entry.monotonic = htole64(ts->monotonic);
    1359       16279 :         o->entry.xor_hash = htole64(xor_hash);
    1360       16279 :         o->entry.boot_id = f->header->boot_id;
    1361             : 
    1362             : #ifdef HAVE_GCRYPT
    1363       16279 :         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
    1364       16279 :         if (r < 0)
    1365           0 :                 return r;
    1366             : #endif
    1367             : 
    1368       16279 :         r = journal_file_link_entry(f, o, np);
    1369       16279 :         if (r < 0)
    1370           0 :                 return r;
    1371             : 
    1372       16279 :         if (ret)
    1373           0 :                 *ret = o;
    1374             : 
    1375       16279 :         if (offset)
    1376           0 :                 *offset = np;
    1377             : 
    1378       16279 :         return 0;
    1379             : }
    1380             : 
    1381        6278 : void journal_file_post_change(JournalFile *f) {
    1382        6278 :         assert(f);
    1383             : 
    1384             :         /* inotify() does not receive IN_MODIFY events from file
    1385             :          * accesses done via mmap(). After each access we hence
    1386             :          * trigger IN_MODIFY by truncating the journal file to its
    1387             :          * current size which triggers IN_MODIFY. */
    1388             : 
    1389        6278 :         __sync_synchronize();
    1390             : 
    1391        6278 :         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
    1392           0 :                 log_error_errno(errno, "Failed to truncate file to its own size: %m");
    1393        6278 : }
    1394             : 
    1395         260 : static int entry_item_cmp(const void *_a, const void *_b) {
    1396         260 :         const EntryItem *a = _a, *b = _b;
    1397             : 
    1398         260 :         if (le64toh(a->object_offset) < le64toh(b->object_offset))
    1399           5 :                 return -1;
    1400         255 :         if (le64toh(a->object_offset) > le64toh(b->object_offset))
    1401         255 :                 return 1;
    1402           0 :         return 0;
    1403             : }
    1404             : 
    1405        6278 : int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
    1406             :         unsigned i;
    1407             :         EntryItem *items;
    1408             :         int r;
    1409        6278 :         uint64_t xor_hash = 0;
    1410             :         struct dual_timestamp _ts;
    1411             : 
    1412        6278 :         assert(f);
    1413        6278 :         assert(iovec || n_iovec == 0);
    1414             : 
    1415        6278 :         if (!ts) {
    1416           0 :                 dual_timestamp_get(&_ts);
    1417           0 :                 ts = &_ts;
    1418             :         }
    1419             : 
    1420       12545 :         if (f->tail_entry_monotonic_valid &&
    1421        6267 :             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
    1422           0 :                 return -EINVAL;
    1423             : 
    1424             : #ifdef HAVE_GCRYPT
    1425        6278 :         r = journal_file_maybe_append_tag(f, ts->realtime);
    1426        6278 :         if (r < 0)
    1427           0 :                 return r;
    1428             : #endif
    1429             : 
    1430             :         /* alloca() can't take 0, hence let's allocate at least one */
    1431        6278 :         items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
    1432             : 
    1433       12816 :         for (i = 0; i < n_iovec; i++) {
    1434             :                 uint64_t p;
    1435             :                 Object *o;
    1436             : 
    1437        6538 :                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
    1438        6538 :                 if (r < 0)
    1439           0 :                         return r;
    1440             : 
    1441        6538 :                 xor_hash ^= le64toh(o->data.hash);
    1442        6538 :                 items[i].object_offset = htole64(p);
    1443        6538 :                 items[i].hash = o->data.hash;
    1444             :         }
    1445             : 
    1446             :         /* Order by the position on disk, in order to improve seek
    1447             :          * times for rotating media. */
    1448        6278 :         qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
    1449             : 
    1450        6278 :         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
    1451             : 
    1452             :         /* If the memory mapping triggered a SIGBUS then we return an
    1453             :          * IO error and ignore the error code passed down to us, since
    1454             :          * it is very likely just an effect of a nullified replacement
    1455             :          * mapping page */
    1456             : 
    1457        6278 :         if (mmap_cache_got_sigbus(f->mmap, f->fd))
    1458           0 :                 r = -EIO;
    1459             : 
    1460        6278 :         journal_file_post_change(f);
    1461             : 
    1462        6278 :         return r;
    1463             : }
    1464             : 
    1465             : typedef struct ChainCacheItem {
    1466             :         uint64_t first; /* the array at the beginning of the chain */
    1467             :         uint64_t array; /* the cached array */
    1468             :         uint64_t begin; /* the first item in the cached array */
    1469             :         uint64_t total; /* the total number of items in all arrays before this one in the chain */
    1470             :         uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
    1471             : } ChainCacheItem;
    1472             : 
    1473       20771 : static void chain_cache_put(
    1474             :                 OrderedHashmap *h,
    1475             :                 ChainCacheItem *ci,
    1476             :                 uint64_t first,
    1477             :                 uint64_t array,
    1478             :                 uint64_t begin,
    1479             :                 uint64_t total,
    1480             :                 uint64_t last_index) {
    1481             : 
    1482       20771 :         if (!ci) {
    1483             :                 /* If the chain item to cache for this chain is the
    1484             :                  * first one it's not worth caching anything */
    1485         157 :                 if (array == first)
    1486         148 :                         return;
    1487             : 
    1488           9 :                 if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) {
    1489           0 :                         ci = ordered_hashmap_steal_first(h);
    1490           0 :                         assert(ci);
    1491             :                 } else {
    1492           9 :                         ci = new(ChainCacheItem, 1);
    1493           9 :                         if (!ci)
    1494           0 :                                 return;
    1495             :                 }
    1496             : 
    1497           9 :                 ci->first = first;
    1498             : 
    1499           9 :                 if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
    1500           0 :                         free(ci);
    1501           0 :                         return;
    1502             :                 }
    1503             :         } else
    1504       20614 :                 assert(ci->first == first);
    1505             : 
    1506       20623 :         ci->array = array;
    1507       20623 :         ci->begin = begin;
    1508       20623 :         ci->total = total;
    1509       20623 :         ci->last_index = last_index;
    1510             : }
    1511             : 
    1512       10313 : static int generic_array_get(
    1513             :                 JournalFile *f,
    1514             :                 uint64_t first,
    1515             :                 uint64_t i,
    1516             :                 Object **ret, uint64_t *offset) {
    1517             : 
    1518             :         Object *o;
    1519       10313 :         uint64_t p = 0, a, t = 0;
    1520             :         int r;
    1521             :         ChainCacheItem *ci;
    1522             : 
    1523       10313 :         assert(f);
    1524             : 
    1525       10313 :         a = first;
    1526             : 
    1527             :         /* Try the chain cache first */
    1528       10313 :         ci = ordered_hashmap_get(f->chain_cache, &first);
    1529       10313 :         if (ci && i > ci->total) {
    1530       10237 :                 a = ci->array;
    1531       10237 :                 i -= ci->total;
    1532       10237 :                 t = ci->total;
    1533             :         }
    1534             : 
    1535       20652 :         while (a > 0) {
    1536             :                 uint64_t k;
    1537             : 
    1538       10339 :                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
    1539       10339 :                 if (r < 0)
    1540           0 :                         return r;
    1541             : 
    1542       10339 :                 k = journal_file_entry_array_n_items(o);
    1543       10339 :                 if (i < k) {
    1544       10313 :                         p = le64toh(o->entry_array.items[i]);
    1545       10313 :                         goto found;
    1546             :                 }
    1547             : 
    1548          26 :                 i -= k;
    1549          26 :                 t += k;
    1550          26 :                 a = le64toh(o->entry_array.next_entry_array_offset);
    1551             :         }
    1552             : 
    1553           0 :         return 0;
    1554             : 
    1555             : found:
    1556             :         /* Let's cache this item for the next invocation */
    1557       10313 :         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
    1558             : 
    1559       10313 :         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
    1560       10313 :         if (r < 0)
    1561           0 :                 return r;
    1562             : 
    1563       10313 :         if (ret)
    1564       10310 :                 *ret = o;
    1565             : 
    1566       10313 :         if (offset)
    1567       10312 :                 *offset = p;
    1568             : 
    1569       10313 :         return 1;
    1570             : }
    1571             : 
    1572          19 : static int generic_array_get_plus_one(
    1573             :                 JournalFile *f,
    1574             :                 uint64_t extra,
    1575             :                 uint64_t first,
    1576             :                 uint64_t i,
    1577             :                 Object **ret, uint64_t *offset) {
    1578             : 
    1579             :         Object *o;
    1580             : 
    1581          19 :         assert(f);
    1582             : 
    1583          19 :         if (i == 0) {
    1584             :                 int r;
    1585             : 
    1586          15 :                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
    1587          15 :                 if (r < 0)
    1588           0 :                         return r;
    1589             : 
    1590          15 :                 if (ret)
    1591           3 :                         *ret = o;
    1592             : 
    1593          15 :                 if (offset)
    1594          12 :                         *offset = extra;
    1595             : 
    1596          15 :                 return 1;
    1597             :         }
    1598             : 
    1599           4 :         return generic_array_get(f, first, i-1, ret, offset);
    1600             : }
    1601             : 
    1602             : enum {
    1603             :         TEST_FOUND,
    1604             :         TEST_LEFT,
    1605             :         TEST_RIGHT
    1606             : };
    1607             : 
    1608       10472 : static int generic_array_bisect(
    1609             :                 JournalFile *f,
    1610             :                 uint64_t first,
    1611             :                 uint64_t n,
    1612             :                 uint64_t needle,
    1613             :                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
    1614             :                 direction_t direction,
    1615             :                 Object **ret,
    1616             :                 uint64_t *offset,
    1617             :                 uint64_t *idx) {
    1618             : 
    1619       10472 :         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
    1620       10472 :         bool subtract_one = false;
    1621       10472 :         Object *o, *array = NULL;
    1622             :         int r;
    1623             :         ChainCacheItem *ci;
    1624             : 
    1625       10472 :         assert(f);
    1626       10472 :         assert(test_object);
    1627             : 
    1628             :         /* Start with the first array in the chain */
    1629       10472 :         a = first;
    1630             : 
    1631       10472 :         ci = ordered_hashmap_get(f->chain_cache, &first);
    1632       10472 :         if (ci && n > ci->total) {
    1633             :                 /* Ah, we have iterated this bisection array chain
    1634             :                  * previously! Let's see if we can skip ahead in the
    1635             :                  * chain, as far as the last time. But we can't jump
    1636             :                  * backwards in the chain, so let's check that
    1637             :                  * first. */
    1638             : 
    1639       10386 :                 r = test_object(f, ci->begin, needle);
    1640       10386 :                 if (r < 0)
    1641           0 :                         return r;
    1642             : 
    1643       10386 :                 if (r == TEST_LEFT) {
    1644             :                         /* OK, what we are looking for is right of the
    1645             :                          * begin of this EntryArray, so let's jump
    1646             :                          * straight to previously cached array in the
    1647             :                          * chain */
    1648             : 
    1649       10346 :                         a = ci->array;
    1650       10346 :                         n -= ci->total;
    1651       10346 :                         t = ci->total;
    1652       10346 :                         last_index = ci->last_index;
    1653             :                 }
    1654             :         }
    1655             : 
    1656       21026 :         while (a > 0) {
    1657             :                 uint64_t left, right, k, lp;
    1658             : 
    1659       10550 :                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
    1660       10550 :                 if (r < 0)
    1661           0 :                         return r;
    1662             : 
    1663       10550 :                 k = journal_file_entry_array_n_items(array);
    1664       10550 :                 right = MIN(k, n);
    1665       10550 :                 if (right <= 0)
    1666           0 :                         return 0;
    1667             : 
    1668       10550 :                 i = right - 1;
    1669       10550 :                 lp = p = le64toh(array->entry_array.items[i]);
    1670       10550 :                 if (p <= 0)
    1671           0 :                         return -EBADMSG;
    1672             : 
    1673       10550 :                 r = test_object(f, p, needle);
    1674       10550 :                 if (r < 0)
    1675           0 :                         return r;
    1676             : 
    1677       10550 :                 if (r == TEST_FOUND)
    1678          56 :                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
    1679             : 
    1680       10550 :                 if (r == TEST_RIGHT) {
    1681       10451 :                         left = 0;
    1682       10451 :                         right -= 1;
    1683             : 
    1684       10451 :                         if (last_index != (uint64_t) -1) {
    1685       10324 :                                 assert(last_index <= right);
    1686             : 
    1687             :                                 /* If we cached the last index we
    1688             :                                  * looked at, let's try to not to jump
    1689             :                                  * too wildly around and see if we can
    1690             :                                  * limit the range to look at early to
    1691             :                                  * the immediate neighbors of the last
    1692             :                                  * index we looked at. */
    1693             : 
    1694       10324 :                                 if (last_index > 0) {
    1695       10308 :                                         uint64_t x = last_index - 1;
    1696             : 
    1697       10308 :                                         p = le64toh(array->entry_array.items[x]);
    1698       10308 :                                         if (p <= 0)
    1699           0 :                                                 return -EBADMSG;
    1700             : 
    1701       10308 :                                         r = test_object(f, p, needle);
    1702       10308 :                                         if (r < 0)
    1703           0 :                                                 return r;
    1704             : 
    1705       10308 :                                         if (r == TEST_FOUND)
    1706           0 :                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
    1707             : 
    1708       10308 :                                         if (r == TEST_RIGHT)
    1709           0 :                                                 right = x;
    1710             :                                         else
    1711       10308 :                                                 left = x + 1;
    1712             :                                 }
    1713             : 
    1714       10324 :                                 if (last_index < right) {
    1715       10302 :                                         uint64_t y = last_index + 1;
    1716             : 
    1717       10302 :                                         p = le64toh(array->entry_array.items[y]);
    1718       10302 :                                         if (p <= 0)
    1719           0 :                                                 return -EBADMSG;
    1720             : 
    1721       10302 :                                         r = test_object(f, p, needle);
    1722       10302 :                                         if (r < 0)
    1723           0 :                                                 return r;
    1724             : 
    1725       10302 :                                         if (r == TEST_FOUND)
    1726           1 :                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
    1727             : 
    1728       10302 :                                         if (r == TEST_RIGHT)
    1729       10302 :                                                 right = y;
    1730             :                                         else
    1731           0 :                                                 left = y + 1;
    1732             :                                 }
    1733             :                         }
    1734             : 
    1735             :                         for (;;) {
    1736       21058 :                                 if (left == right) {
    1737       10451 :                                         if (direction == DIRECTION_UP)
    1738          44 :                                                 subtract_one = true;
    1739             : 
    1740       10451 :                                         i = left;
    1741       10451 :                                         goto found;
    1742             :                                 }
    1743             : 
    1744       10607 :                                 assert(left < right);
    1745       10607 :                                 i = (left + right) / 2;
    1746             : 
    1747       10607 :                                 p = le64toh(array->entry_array.items[i]);
    1748       10607 :                                 if (p <= 0)
    1749           0 :                                         return -EBADMSG;
    1750             : 
    1751       10607 :                                 r = test_object(f, p, needle);
    1752       10607 :                                 if (r < 0)
    1753           0 :                                         return r;
    1754             : 
    1755       10607 :                                 if (r == TEST_FOUND)
    1756       10268 :                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
    1757             : 
    1758       10607 :                                 if (r == TEST_RIGHT)
    1759       10472 :                                         right = i;
    1760             :                                 else
    1761         135 :                                         left = i + 1;
    1762       10607 :                         }
    1763             :                 }
    1764             : 
    1765          99 :                 if (k >= n) {
    1766          17 :                         if (direction == DIRECTION_UP) {
    1767          10 :                                 i = n;
    1768          10 :                                 subtract_one = true;
    1769          10 :                                 goto found;
    1770             :                         }
    1771             : 
    1772           7 :                         return 0;
    1773             :                 }
    1774             : 
    1775          82 :                 last_p = lp;
    1776             : 
    1777          82 :                 n -= k;
    1778          82 :                 t += k;
    1779          82 :                 last_index = (uint64_t) -1;
    1780          82 :                 a = le64toh(array->entry_array.next_entry_array_offset);
    1781             :         }
    1782             : 
    1783           4 :         return 0;
    1784             : 
    1785             : found:
    1786       10461 :         if (subtract_one && t == 0 && i == 0)
    1787           3 :                 return 0;
    1788             : 
    1789             :         /* Let's cache this item for the next invocation */
    1790       10458 :         chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
    1791             : 
    1792       10458 :         if (subtract_one && i == 0)
    1793           5 :                 p = last_p;
    1794       10453 :         else if (subtract_one)
    1795          46 :                 p = le64toh(array->entry_array.items[i-1]);
    1796             :         else
    1797       10407 :                 p = le64toh(array->entry_array.items[i]);
    1798             : 
    1799       10458 :         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
    1800       10458 :         if (r < 0)
    1801           0 :                 return r;
    1802             : 
    1803       10458 :         if (ret)
    1804          11 :                 *ret = o;
    1805             : 
    1806       10458 :         if (offset)
    1807         148 :                 *offset = p;
    1808             : 
    1809       10458 :         if (idx)
    1810       10307 :                 *idx = t + i + (subtract_one ? -1 : 0);
    1811             : 
    1812       10458 :         return 1;
    1813             : }
    1814             : 
    1815         175 : static int generic_array_bisect_plus_one(
    1816             :                 JournalFile *f,
    1817             :                 uint64_t extra,
    1818             :                 uint64_t first,
    1819             :                 uint64_t n,
    1820             :                 uint64_t needle,
    1821             :                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
    1822             :                 direction_t direction,
    1823             :                 Object **ret,
    1824             :                 uint64_t *offset,
    1825             :                 uint64_t *idx) {
    1826             : 
    1827             :         int r;
    1828         175 :         bool step_back = false;
    1829             :         Object *o;
    1830             : 
    1831         175 :         assert(f);
    1832         175 :         assert(test_object);
    1833             : 
    1834         175 :         if (n <= 0)
    1835           0 :                 return 0;
    1836             : 
    1837             :         /* This bisects the array in object 'first', but first checks
    1838             :          * an extra  */
    1839         175 :         r = test_object(f, extra, needle);
    1840         175 :         if (r < 0)
    1841           0 :                 return r;
    1842             : 
    1843         175 :         if (r == TEST_FOUND)
    1844          16 :                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
    1845             : 
    1846             :         /* if we are looking with DIRECTION_UP then we need to first
    1847             :            see if in the actual array there is a matching entry, and
    1848             :            return the last one of that. But if there isn't any we need
    1849             :            to return this one. Hence remember this, and return it
    1850             :            below. */
    1851         175 :         if (r == TEST_LEFT)
    1852         153 :                 step_back = direction == DIRECTION_UP;
    1853             : 
    1854         175 :         if (r == TEST_RIGHT) {
    1855          22 :                 if (direction == DIRECTION_DOWN)
    1856          19 :                         goto found;
    1857             :                 else
    1858           3 :                         return 0;
    1859             :         }
    1860             : 
    1861         153 :         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
    1862             : 
    1863         153 :         if (r == 0 && step_back)
    1864           3 :                 goto found;
    1865             : 
    1866         150 :         if (r > 0 && idx)
    1867           0 :                 (*idx) ++;
    1868             : 
    1869         150 :         return r;
    1870             : 
    1871             : found:
    1872          22 :         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
    1873          22 :         if (r < 0)
    1874           0 :                 return r;
    1875             : 
    1876          22 :         if (ret)
    1877           0 :                 *ret = o;
    1878             : 
    1879          22 :         if (offset)
    1880          22 :                 *offset = extra;
    1881             : 
    1882          22 :         if (idx)
    1883           0 :                 *idx = 0;
    1884             : 
    1885          22 :         return 1;
    1886             : }
    1887             : 
    1888       52307 : _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
    1889       52307 :         assert(f);
    1890       52307 :         assert(p > 0);
    1891             : 
    1892       52307 :         if (p == needle)
    1893       10355 :                 return TEST_FOUND;
    1894       41952 :         else if (p < needle)
    1895       21028 :                 return TEST_LEFT;
    1896             :         else
    1897       20924 :                 return TEST_RIGHT;
    1898             : }
    1899             : 
    1900          15 : static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
    1901             :         Object *o;
    1902             :         int r;
    1903             : 
    1904          15 :         assert(f);
    1905          15 :         assert(p > 0);
    1906             : 
    1907          15 :         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
    1908          15 :         if (r < 0)
    1909           0 :                 return r;
    1910             : 
    1911          15 :         if (le64toh(o->entry.seqnum) == needle)
    1912           7 :                 return TEST_FOUND;
    1913           8 :         else if (le64toh(o->entry.seqnum) < needle)
    1914           3 :                 return TEST_LEFT;
    1915             :         else
    1916           5 :                 return TEST_RIGHT;
    1917             : }
    1918             : 
    1919           8 : int journal_file_move_to_entry_by_seqnum(
    1920             :                 JournalFile *f,
    1921             :                 uint64_t seqnum,
    1922             :                 direction_t direction,
    1923             :                 Object **ret,
    1924             :                 uint64_t *offset) {
    1925             : 
    1926          16 :         return generic_array_bisect(f,
    1927           8 :                                     le64toh(f->header->entry_array_offset),
    1928           8 :                                     le64toh(f->header->n_entries),
    1929             :                                     seqnum,
    1930             :                                     test_object_seqnum,
    1931             :                                     direction,
    1932             :                                     ret, offset, NULL);
    1933             : }
    1934             : 
    1935           6 : static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
    1936             :         Object *o;
    1937             :         int r;
    1938             : 
    1939           6 :         assert(f);
    1940           6 :         assert(p > 0);
    1941             : 
    1942           6 :         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
    1943           6 :         if (r < 0)
    1944           0 :                 return r;
    1945             : 
    1946           6 :         if (le64toh(o->entry.realtime) == needle)
    1947           0 :                 return TEST_FOUND;
    1948           6 :         else if (le64toh(o->entry.realtime) < needle)
    1949           2 :                 return TEST_LEFT;
    1950             :         else
    1951           4 :                 return TEST_RIGHT;
    1952             : }
    1953             : 
    1954           4 : int journal_file_move_to_entry_by_realtime(
    1955             :                 JournalFile *f,
    1956             :                 uint64_t realtime,
    1957             :                 direction_t direction,
    1958             :                 Object **ret,
    1959             :                 uint64_t *offset) {
    1960             : 
    1961           8 :         return generic_array_bisect(f,
    1962           4 :                                     le64toh(f->header->entry_array_offset),
    1963           4 :                                     le64toh(f->header->n_entries),
    1964             :                                     realtime,
    1965             :                                     test_object_realtime,
    1966             :                                     direction,
    1967             :                                     ret, offset, NULL);
    1968             : }
    1969             : 
    1970           0 : static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
    1971             :         Object *o;
    1972             :         int r;
    1973             : 
    1974           0 :         assert(f);
    1975           0 :         assert(p > 0);
    1976             : 
    1977           0 :         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
    1978           0 :         if (r < 0)
    1979           0 :                 return r;
    1980             : 
    1981           0 :         if (le64toh(o->entry.monotonic) == needle)
    1982           0 :                 return TEST_FOUND;
    1983           0 :         else if (le64toh(o->entry.monotonic) < needle)
    1984           0 :                 return TEST_LEFT;
    1985             :         else
    1986           0 :                 return TEST_RIGHT;
    1987             : }
    1988             : 
    1989           4 : static int find_data_object_by_boot_id(
    1990             :                 JournalFile *f,
    1991             :                 sd_id128_t boot_id,
    1992             :                 Object **o,
    1993             :                 uint64_t *b) {
    1994             : 
    1995           4 :         char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
    1996             : 
    1997           4 :         sd_id128_to_string(boot_id, t + 9);
    1998           4 :         return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
    1999             : }
    2000             : 
    2001           4 : int journal_file_move_to_entry_by_monotonic(
    2002             :                 JournalFile *f,
    2003             :                 sd_id128_t boot_id,
    2004             :                 uint64_t monotonic,
    2005             :                 direction_t direction,
    2006             :                 Object **ret,
    2007             :                 uint64_t *offset) {
    2008             : 
    2009             :         Object *o;
    2010             :         int r;
    2011             : 
    2012           4 :         assert(f);
    2013             : 
    2014           4 :         r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
    2015           4 :         if (r < 0)
    2016           0 :                 return r;
    2017           4 :         if (r == 0)
    2018           4 :                 return -ENOENT;
    2019             : 
    2020           0 :         return generic_array_bisect_plus_one(f,
    2021           0 :                                              le64toh(o->data.entry_offset),
    2022           0 :                                              le64toh(o->data.entry_array_offset),
    2023           0 :                                              le64toh(o->data.n_entries),
    2024             :                                              monotonic,
    2025             :                                              test_object_monotonic,
    2026             :                                              direction,
    2027             :                                              ret, offset, NULL);
    2028             : }
    2029             : 
    2030         664 : void journal_file_reset_location(JournalFile *f) {
    2031         664 :         f->location_type = LOCATION_HEAD;
    2032         664 :         f->current_offset = 0;
    2033         664 :         f->current_seqnum = 0;
    2034         664 :         f->current_realtime = 0;
    2035         664 :         f->current_monotonic = 0;
    2036         664 :         zero(f->current_boot_id);
    2037         664 :         f->current_xor_hash = 0;
    2038         664 : }
    2039             : 
    2040       10457 : void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset) {
    2041       10457 :         f->location_type = LOCATION_SEEK;
    2042       10457 :         f->current_offset = offset;
    2043       10457 :         f->current_seqnum = le64toh(o->entry.seqnum);
    2044       10457 :         f->current_realtime = le64toh(o->entry.realtime);
    2045       10457 :         f->current_monotonic = le64toh(o->entry.monotonic);
    2046       10457 :         f->current_boot_id = o->entry.boot_id;
    2047       10457 :         f->current_xor_hash = le64toh(o->entry.xor_hash);
    2048       10457 : }
    2049             : 
    2050       40656 : int journal_file_compare_locations(JournalFile *af, JournalFile *bf) {
    2051       40656 :         assert(af);
    2052       40656 :         assert(bf);
    2053       40656 :         assert(af->location_type == LOCATION_SEEK);
    2054       40656 :         assert(bf->location_type == LOCATION_SEEK);
    2055             : 
    2056             :         /* If contents and timestamps match, these entries are
    2057             :          * identical, even if the seqnum does not match */
    2058       50635 :         if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
    2059       10061 :             af->current_monotonic == bf->current_monotonic &&
    2060         164 :             af->current_realtime == bf->current_realtime &&
    2061          82 :             af->current_xor_hash == bf->current_xor_hash)
    2062          82 :                 return 0;
    2063             : 
    2064       40574 :         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
    2065             : 
    2066             :                 /* If this is from the same seqnum source, compare
    2067             :                  * seqnums */
    2068           0 :                 if (af->current_seqnum < bf->current_seqnum)
    2069           0 :                         return -1;
    2070           0 :                 if (af->current_seqnum > bf->current_seqnum)
    2071           0 :                         return 1;
    2072             : 
    2073             :                 /* Wow! This is weird, different data but the same
    2074             :                  * seqnums? Something is borked, but let's make the
    2075             :                  * best of it and compare by time. */
    2076             :         }
    2077             : 
    2078       40574 :         if (sd_id128_equal(af->current_boot_id, bf->current_boot_id)) {
    2079             : 
    2080             :                 /* If the boot id matches, compare monotonic time */
    2081        9897 :                 if (af->current_monotonic < bf->current_monotonic)
    2082        1336 :                         return -1;
    2083        8561 :                 if (af->current_monotonic > bf->current_monotonic)
    2084        8561 :                         return 1;
    2085             :         }
    2086             : 
    2087             :         /* Otherwise, compare UTC time */
    2088       30677 :         if (af->current_realtime < bf->current_realtime)
    2089           0 :                 return -1;
    2090       30677 :         if (af->current_realtime > bf->current_realtime)
    2091       30677 :                 return 1;
    2092             : 
    2093             :         /* Finally, compare by contents */
    2094           0 :         if (af->current_xor_hash < bf->current_xor_hash)
    2095           0 :                 return -1;
    2096           0 :         if (af->current_xor_hash > bf->current_xor_hash)
    2097           0 :                 return 1;
    2098             : 
    2099           0 :         return 0;
    2100             : }
    2101             : 
    2102       10333 : int journal_file_next_entry(
    2103             :                 JournalFile *f,
    2104             :                 uint64_t p,
    2105             :                 direction_t direction,
    2106             :                 Object **ret, uint64_t *offset) {
    2107             : 
    2108             :         uint64_t i, n, ofs;
    2109             :         int r;
    2110             : 
    2111       10333 :         assert(f);
    2112             : 
    2113       10333 :         n = le64toh(f->header->n_entries);
    2114       10333 :         if (n <= 0)
    2115           0 :                 return 0;
    2116             : 
    2117       10333 :         if (p == 0)
    2118          26 :                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
    2119             :         else {
    2120       20614 :                 r = generic_array_bisect(f,
    2121       10307 :                                          le64toh(f->header->entry_array_offset),
    2122       10307 :                                          le64toh(f->header->n_entries),
    2123             :                                          p,
    2124             :                                          test_object_offset,
    2125             :                                          DIRECTION_DOWN,
    2126             :                                          NULL, NULL,
    2127             :                                          &i);
    2128       10307 :                 if (r <= 0)
    2129           0 :                         return r;
    2130             : 
    2131       10307 :                 if (direction == DIRECTION_DOWN) {
    2132       10285 :                         if (i >= n - 1)
    2133          14 :                                 return 0;
    2134             : 
    2135       10271 :                         i++;
    2136             :                 } else {
    2137          22 :                         if (i <= 0)
    2138          10 :                                 return 0;
    2139             : 
    2140          12 :                         i--;
    2141             :                 }
    2142             :         }
    2143             : 
    2144             :         /* And jump to it */
    2145       20618 :         r = generic_array_get(f,
    2146       10309 :                               le64toh(f->header->entry_array_offset),
    2147             :                               i,
    2148             :                               ret, &ofs);
    2149       10309 :         if (r <= 0)
    2150           0 :                 return r;
    2151             : 
    2152       10309 :         if (p > 0 &&
    2153             :             (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
    2154           0 :                 log_debug("%s: entry array corrupted at entry %"PRIu64,
    2155             :                           f->path, i);
    2156           0 :                 return -EBADMSG;
    2157             :         }
    2158             : 
    2159       10309 :         if (offset)
    2160       10309 :                 *offset = ofs;
    2161             : 
    2162       10309 :         return 1;
    2163             : }
    2164             : 
    2165          19 : int journal_file_next_entry_for_data(
    2166             :                 JournalFile *f,
    2167             :                 Object *o, uint64_t p,
    2168             :                 uint64_t data_offset,
    2169             :                 direction_t direction,
    2170             :                 Object **ret, uint64_t *offset) {
    2171             : 
    2172             :         uint64_t n, i;
    2173             :         int r;
    2174             :         Object *d;
    2175             : 
    2176          19 :         assert(f);
    2177          19 :         assert(p > 0 || !o);
    2178             : 
    2179          19 :         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
    2180          19 :         if (r < 0)
    2181           0 :                 return r;
    2182             : 
    2183          19 :         n = le64toh(d->data.n_entries);
    2184          19 :         if (n <= 0)
    2185           0 :                 return n;
    2186             : 
    2187          19 :         if (!o)
    2188          19 :                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
    2189             :         else {
    2190           0 :                 if (o->object.type != OBJECT_ENTRY)
    2191           0 :                         return -EINVAL;
    2192             : 
    2193           0 :                 r = generic_array_bisect_plus_one(f,
    2194           0 :                                                   le64toh(d->data.entry_offset),
    2195           0 :                                                   le64toh(d->data.entry_array_offset),
    2196           0 :                                                   le64toh(d->data.n_entries),
    2197             :                                                   p,
    2198             :                                                   test_object_offset,
    2199             :                                                   DIRECTION_DOWN,
    2200             :                                                   NULL, NULL,
    2201             :                                                   &i);
    2202             : 
    2203           0 :                 if (r <= 0)
    2204           0 :                         return r;
    2205             : 
    2206           0 :                 if (direction == DIRECTION_DOWN) {
    2207           0 :                         if (i >= n - 1)
    2208           0 :                                 return 0;
    2209             : 
    2210           0 :                         i++;
    2211             :                 } else {
    2212           0 :                         if (i <= 0)
    2213           0 :                                 return 0;
    2214             : 
    2215           0 :                         i--;
    2216             :                 }
    2217             : 
    2218             :         }
    2219             : 
    2220          57 :         return generic_array_get_plus_one(f,
    2221          19 :                                           le64toh(d->data.entry_offset),
    2222          19 :                                           le64toh(d->data.entry_array_offset),
    2223             :                                           i,
    2224             :                                           ret, offset);
    2225             : }
    2226             : 
    2227         175 : int journal_file_move_to_entry_by_offset_for_data(
    2228             :                 JournalFile *f,
    2229             :                 uint64_t data_offset,
    2230             :                 uint64_t p,
    2231             :                 direction_t direction,
    2232             :                 Object **ret, uint64_t *offset) {
    2233             : 
    2234             :         int r;
    2235             :         Object *d;
    2236             : 
    2237         175 :         assert(f);
    2238             : 
    2239         175 :         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
    2240         175 :         if (r < 0)
    2241           0 :                 return r;
    2242             : 
    2243         525 :         return generic_array_bisect_plus_one(f,
    2244         175 :                                              le64toh(d->data.entry_offset),
    2245         175 :                                              le64toh(d->data.entry_array_offset),
    2246         175 :                                              le64toh(d->data.n_entries),
    2247             :                                              p,
    2248             :                                              test_object_offset,
    2249             :                                              direction,
    2250             :                                              ret, offset, NULL);
    2251             : }
    2252             : 
    2253           0 : int journal_file_move_to_entry_by_monotonic_for_data(
    2254             :                 JournalFile *f,
    2255             :                 uint64_t data_offset,
    2256             :                 sd_id128_t boot_id,
    2257             :                 uint64_t monotonic,
    2258             :                 direction_t direction,
    2259             :                 Object **ret, uint64_t *offset) {
    2260             : 
    2261             :         Object *o, *d;
    2262             :         int r;
    2263             :         uint64_t b, z;
    2264             : 
    2265           0 :         assert(f);
    2266             : 
    2267             :         /* First, seek by time */
    2268           0 :         r = find_data_object_by_boot_id(f, boot_id, &o, &b);
    2269           0 :         if (r < 0)
    2270           0 :                 return r;
    2271           0 :         if (r == 0)
    2272           0 :                 return -ENOENT;
    2273             : 
    2274           0 :         r = generic_array_bisect_plus_one(f,
    2275           0 :                                           le64toh(o->data.entry_offset),
    2276           0 :                                           le64toh(o->data.entry_array_offset),
    2277           0 :                                           le64toh(o->data.n_entries),
    2278             :                                           monotonic,
    2279             :                                           test_object_monotonic,
    2280             :                                           direction,
    2281             :                                           NULL, &z, NULL);
    2282           0 :         if (r <= 0)
    2283           0 :                 return r;
    2284             : 
    2285             :         /* And now, continue seeking until we find an entry that
    2286             :          * exists in both bisection arrays */
    2287             : 
    2288             :         for (;;) {
    2289             :                 Object *qo;
    2290             :                 uint64_t p, q;
    2291             : 
    2292           0 :                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
    2293           0 :                 if (r < 0)
    2294           0 :                         return r;
    2295             : 
    2296           0 :                 r = generic_array_bisect_plus_one(f,
    2297           0 :                                                   le64toh(d->data.entry_offset),
    2298           0 :                                                   le64toh(d->data.entry_array_offset),
    2299           0 :                                                   le64toh(d->data.n_entries),
    2300             :                                                   z,
    2301             :                                                   test_object_offset,
    2302             :                                                   direction,
    2303             :                                                   NULL, &p, NULL);
    2304           0 :                 if (r <= 0)
    2305           0 :                         return r;
    2306             : 
    2307           0 :                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
    2308           0 :                 if (r < 0)
    2309           0 :                         return r;
    2310             : 
    2311           0 :                 r = generic_array_bisect_plus_one(f,
    2312           0 :                                                   le64toh(o->data.entry_offset),
    2313           0 :                                                   le64toh(o->data.entry_array_offset),
    2314           0 :                                                   le64toh(o->data.n_entries),
    2315             :                                                   p,
    2316             :                                                   test_object_offset,
    2317             :                                                   direction,
    2318             :                                                   &qo, &q, NULL);
    2319             : 
    2320           0 :                 if (r <= 0)
    2321           0 :                         return r;
    2322             : 
    2323           0 :                 if (p == q) {
    2324           0 :                         if (ret)
    2325           0 :                                 *ret = qo;
    2326           0 :                         if (offset)
    2327           0 :                                 *offset = q;
    2328             : 
    2329           0 :                         return 1;
    2330             :                 }
    2331             : 
    2332           0 :                 z = q;
    2333           0 :         }
    2334             : }
    2335             : 
    2336           0 : int journal_file_move_to_entry_by_seqnum_for_data(
    2337             :                 JournalFile *f,
    2338             :                 uint64_t data_offset,
    2339             :                 uint64_t seqnum,
    2340             :                 direction_t direction,
    2341             :                 Object **ret, uint64_t *offset) {
    2342             : 
    2343             :         Object *d;
    2344             :         int r;
    2345             : 
    2346           0 :         assert(f);
    2347             : 
    2348           0 :         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
    2349           0 :         if (r < 0)
    2350           0 :                 return r;
    2351             : 
    2352           0 :         return generic_array_bisect_plus_one(f,
    2353           0 :                                              le64toh(d->data.entry_offset),
    2354           0 :                                              le64toh(d->data.entry_array_offset),
    2355           0 :                                              le64toh(d->data.n_entries),
    2356             :                                              seqnum,
    2357             :                                              test_object_seqnum,
    2358             :                                              direction,
    2359             :                                              ret, offset, NULL);
    2360             : }
    2361             : 
    2362           0 : int journal_file_move_to_entry_by_realtime_for_data(
    2363             :                 JournalFile *f,
    2364             :                 uint64_t data_offset,
    2365             :                 uint64_t realtime,
    2366             :                 direction_t direction,
    2367             :                 Object **ret, uint64_t *offset) {
    2368             : 
    2369             :         Object *d;
    2370             :         int r;
    2371             : 
    2372           0 :         assert(f);
    2373             : 
    2374           0 :         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
    2375           0 :         if (r < 0)
    2376           0 :                 return r;
    2377             : 
    2378           0 :         return generic_array_bisect_plus_one(f,
    2379           0 :                                              le64toh(d->data.entry_offset),
    2380           0 :                                              le64toh(d->data.entry_array_offset),
    2381           0 :                                              le64toh(d->data.n_entries),
    2382             :                                              realtime,
    2383             :                                              test_object_realtime,
    2384             :                                              direction,
    2385             :                                              ret, offset, NULL);
    2386             : }
    2387             : 
    2388           2 : void journal_file_dump(JournalFile *f) {
    2389             :         Object *o;
    2390             :         int r;
    2391             :         uint64_t p;
    2392             : 
    2393           2 :         assert(f);
    2394             : 
    2395           2 :         journal_file_print_header(f);
    2396             : 
    2397           2 :         p = le64toh(f->header->header_size);
    2398        6411 :         while (p != 0) {
    2399        6407 :                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
    2400        6407 :                 if (r < 0)
    2401           0 :                         goto fail;
    2402             : 
    2403        6407 :                 switch (o->object.type) {
    2404             : 
    2405             :                 case OBJECT_UNUSED:
    2406           0 :                         printf("Type: OBJECT_UNUSED\n");
    2407           0 :                         break;
    2408             : 
    2409             :                 case OBJECT_DATA:
    2410          79 :                         printf("Type: OBJECT_DATA\n");
    2411          79 :                         break;
    2412             : 
    2413             :                 case OBJECT_FIELD:
    2414           3 :                         printf("Type: OBJECT_FIELD\n");
    2415           3 :                         break;
    2416             : 
    2417             :                 case OBJECT_ENTRY:
    2418       18009 :                         printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
    2419        6003 :                                le64toh(o->entry.seqnum),
    2420        6003 :                                le64toh(o->entry.monotonic),
    2421        6003 :                                le64toh(o->entry.realtime));
    2422        6003 :                         break;
    2423             : 
    2424             :                 case OBJECT_FIELD_HASH_TABLE:
    2425           2 :                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
    2426           2 :                         break;
    2427             : 
    2428             :                 case OBJECT_DATA_HASH_TABLE:
    2429           2 :                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
    2430           2 :                         break;
    2431             : 
    2432             :                 case OBJECT_ENTRY_ARRAY:
    2433         318 :                         printf("Type: OBJECT_ENTRY_ARRAY\n");
    2434         318 :                         break;
    2435             : 
    2436             :                 case OBJECT_TAG:
    2437           0 :                         printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
    2438           0 :                                le64toh(o->tag.seqnum),
    2439           0 :                                le64toh(o->tag.epoch));
    2440           0 :                         break;
    2441             : 
    2442             :                 default:
    2443           0 :                         printf("Type: unknown (%i)\n", o->object.type);
    2444           0 :                         break;
    2445             :                 }
    2446             : 
    2447        6407 :                 if (o->object.flags & OBJECT_COMPRESSION_MASK)
    2448           0 :                         printf("Flags: %s\n",
    2449           0 :                                object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
    2450             : 
    2451        6407 :                 if (p == le64toh(f->header->tail_object_offset))
    2452           2 :                         p = 0;
    2453             :                 else
    2454        6405 :                         p = p + ALIGN64(le64toh(o->object.size));
    2455             :         }
    2456             : 
    2457           2 :         return;
    2458             : fail:
    2459           0 :         log_error("File corrupt");
    2460             : }
    2461             : 
    2462          12 : static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
    2463             :         const char *x;
    2464             : 
    2465          12 :         x = format_timestamp(buf, l, t);
    2466          12 :         if (x)
    2467           4 :                 return x;
    2468           8 :         return " --- ";
    2469             : }
    2470             : 
    2471           6 : void journal_file_print_header(JournalFile *f) {
    2472             :         char a[33], b[33], c[33], d[33];
    2473             :         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
    2474             :         struct stat st;
    2475             :         char bytes[FORMAT_BYTES_MAX];
    2476             : 
    2477           6 :         assert(f);
    2478             : 
    2479         137 :         printf("File Path: %s\n"
    2480             :                "File ID: %s\n"
    2481             :                "Machine ID: %s\n"
    2482             :                "Boot ID: %s\n"
    2483             :                "Sequential Number ID: %s\n"
    2484             :                "State: %s\n"
    2485             :                "Compatible Flags:%s%s\n"
    2486             :                "Incompatible Flags:%s%s%s\n"
    2487             :                "Header size: %"PRIu64"\n"
    2488             :                "Arena size: %"PRIu64"\n"
    2489             :                "Data Hash Table Size: %"PRIu64"\n"
    2490             :                "Field Hash Table Size: %"PRIu64"\n"
    2491             :                "Rotate Suggested: %s\n"
    2492             :                "Head Sequential Number: %"PRIu64"\n"
    2493             :                "Tail Sequential Number: %"PRIu64"\n"
    2494             :                "Head Realtime Timestamp: %s\n"
    2495             :                "Tail Realtime Timestamp: %s\n"
    2496             :                "Tail Monotonic Timestamp: %s\n"
    2497             :                "Objects: %"PRIu64"\n"
    2498             :                "Entry Objects: %"PRIu64"\n",
    2499             :                f->path,
    2500           6 :                sd_id128_to_string(f->header->file_id, a),
    2501           6 :                sd_id128_to_string(f->header->machine_id, b),
    2502           6 :                sd_id128_to_string(f->header->boot_id, c),
    2503           6 :                sd_id128_to_string(f->header->seqnum_id, d),
    2504           6 :                f->header->state == STATE_OFFLINE ? "OFFLINE" :
    2505           5 :                f->header->state == STATE_ONLINE ? "ONLINE" :
    2506           0 :                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
    2507           6 :                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
    2508           6 :                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
    2509           6 :                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
    2510           6 :                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
    2511           6 :                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
    2512           6 :                le64toh(f->header->header_size),
    2513           6 :                le64toh(f->header->arena_size),
    2514           6 :                le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
    2515           6 :                le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
    2516           6 :                yes_no(journal_file_rotate_suggested(f, 0)),
    2517           6 :                le64toh(f->header->head_entry_seqnum),
    2518           6 :                le64toh(f->header->tail_entry_seqnum),
    2519           6 :                format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
    2520           6 :                format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
    2521           6 :                format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
    2522           6 :                le64toh(f->header->n_objects),
    2523           6 :                le64toh(f->header->n_entries));
    2524             : 
    2525           6 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
    2526          12 :                 printf("Data Objects: %"PRIu64"\n"
    2527             :                        "Data Hash Table Fill: %.1f%%\n",
    2528           6 :                        le64toh(f->header->n_data),
    2529           6 :                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
    2530             : 
    2531           6 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
    2532          12 :                 printf("Field Objects: %"PRIu64"\n"
    2533             :                        "Field Hash Table Fill: %.1f%%\n",
    2534           6 :                        le64toh(f->header->n_fields),
    2535           6 :                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
    2536             : 
    2537           6 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
    2538           6 :                 printf("Tag Objects: %"PRIu64"\n",
    2539           6 :                        le64toh(f->header->n_tags));
    2540           6 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
    2541           6 :                 printf("Entry Array Objects: %"PRIu64"\n",
    2542           6 :                        le64toh(f->header->n_entry_arrays));
    2543             : 
    2544           6 :         if (fstat(f->fd, &st) >= 0)
    2545           6 :                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
    2546           6 : }
    2547             : 
    2548          18 : static int journal_file_warn_btrfs(JournalFile *f) {
    2549             :         unsigned attrs;
    2550             :         int r;
    2551             : 
    2552          18 :         assert(f);
    2553             : 
    2554             :         /* Before we write anything, check if the COW logic is turned
    2555             :          * off on btrfs. Given our write pattern that is quite
    2556             :          * unfriendly to COW file systems this should greatly improve
    2557             :          * performance on COW file systems, such as btrfs, at the
    2558             :          * expense of data integrity features (which shouldn't be too
    2559             :          * bad, given that we do our own checksumming). */
    2560             : 
    2561          18 :         r = btrfs_is_filesystem(f->fd);
    2562          18 :         if (r < 0)
    2563           0 :                 return log_warning_errno(r, "Failed to determine if journal is on btrfs: %m");
    2564          18 :         if (!r)
    2565          17 :                 return 0;
    2566             : 
    2567           1 :         r = read_attr_fd(f->fd, &attrs);
    2568           1 :         if (r < 0)
    2569           0 :                 return log_warning_errno(r, "Failed to read file attributes: %m");
    2570             : 
    2571           1 :         if (attrs & FS_NOCOW_FL) {
    2572           0 :                 log_debug("Detected btrfs file system with copy-on-write disabled, all is good.");
    2573           0 :                 return 0;
    2574             :         }
    2575             : 
    2576           1 :         log_notice("Creating journal file %s on a btrfs file system, and copy-on-write is enabled. "
    2577             :                    "This is likely to slow down journal access substantially, please consider turning "
    2578             :                    "off the copy-on-write file attribute on the journal directory, using chattr +C.", f->path);
    2579             : 
    2580           1 :         return 1;
    2581             : }
    2582             : 
    2583        1365 : int journal_file_open(
    2584             :                 const char *fname,
    2585             :                 int flags,
    2586             :                 mode_t mode,
    2587             :                 bool compress,
    2588             :                 bool seal,
    2589             :                 JournalMetrics *metrics,
    2590             :                 MMapCache *mmap_cache,
    2591             :                 JournalFile *template,
    2592             :                 JournalFile **ret) {
    2593             : 
    2594        1365 :         bool newly_created = false;
    2595             :         JournalFile *f;
    2596             :         void *h;
    2597             :         int r;
    2598             : 
    2599        1365 :         assert(fname);
    2600        1365 :         assert(ret);
    2601             : 
    2602        1384 :         if ((flags & O_ACCMODE) != O_RDONLY &&
    2603          19 :             (flags & O_ACCMODE) != O_RDWR)
    2604           0 :                 return -EINVAL;
    2605             : 
    2606        1977 :         if (!endswith(fname, ".journal") &&
    2607         612 :             !endswith(fname, ".journal~"))
    2608           0 :                 return -EINVAL;
    2609             : 
    2610        1365 :         f = new0(JournalFile, 1);
    2611        1365 :         if (!f)
    2612           0 :                 return -ENOMEM;
    2613             : 
    2614        1365 :         f->fd = -1;
    2615        1365 :         f->mode = mode;
    2616             : 
    2617        1365 :         f->flags = flags;
    2618        1365 :         f->prot = prot_from_flags(flags);
    2619        1365 :         f->writable = (flags & O_ACCMODE) != O_RDONLY;
    2620             : #if defined(HAVE_LZ4)
    2621             :         f->compress_lz4 = compress;
    2622             : #elif defined(HAVE_XZ)
    2623        1365 :         f->compress_xz = compress;
    2624             : #endif
    2625             : #ifdef HAVE_GCRYPT
    2626        1365 :         f->seal = seal;
    2627             : #endif
    2628             : 
    2629        1365 :         if (mmap_cache)
    2630        1347 :                 f->mmap = mmap_cache_ref(mmap_cache);
    2631             :         else {
    2632          18 :                 f->mmap = mmap_cache_new();
    2633          18 :                 if (!f->mmap) {
    2634           0 :                         r = -ENOMEM;
    2635           0 :                         goto fail;
    2636             :                 }
    2637             :         }
    2638             : 
    2639        1365 :         f->path = strdup(fname);
    2640        1365 :         if (!f->path) {
    2641           0 :                 r = -ENOMEM;
    2642           0 :                 goto fail;
    2643             :         }
    2644             : 
    2645        1365 :         f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
    2646        1365 :         if (!f->chain_cache) {
    2647           0 :                 r = -ENOMEM;
    2648           0 :                 goto fail;
    2649             :         }
    2650             : 
    2651        1365 :         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
    2652        1365 :         if (f->fd < 0) {
    2653           0 :                 r = -errno;
    2654           0 :                 goto fail;
    2655             :         }
    2656             : 
    2657        1365 :         r = journal_file_fstat(f);
    2658        1365 :         if (r < 0)
    2659           0 :                 goto fail;
    2660             : 
    2661        1365 :         if (f->last_stat.st_size == 0 && f->writable) {
    2662             : 
    2663          18 :                 (void) journal_file_warn_btrfs(f);
    2664             : 
    2665             :                 /* Let's attach the creation time to the journal file,
    2666             :                  * so that the vacuuming code knows the age of this
    2667             :                  * file even if the file might end up corrupted one
    2668             :                  * day... Ideally we'd just use the creation time many
    2669             :                  * file systems maintain for each file, but there is
    2670             :                  * currently no usable API to query this, hence let's
    2671             :                  * emulate this via extended attributes. If extended
    2672             :                  * attributes are not supported we'll just skip this,
    2673             :                  * and rely solely on mtime/atime/ctime of the file. */
    2674             : 
    2675          18 :                 fd_setcrtime(f->fd, 0);
    2676             : 
    2677             : #ifdef HAVE_GCRYPT
    2678             :                 /* Try to load the FSPRG state, and if we can't, then
    2679             :                  * just don't do sealing */
    2680          18 :                 if (f->seal) {
    2681           5 :                         r = journal_file_fss_load(f);
    2682           5 :                         if (r < 0)
    2683           5 :                                 f->seal = false;
    2684             :                 }
    2685             : #endif
    2686             : 
    2687          18 :                 r = journal_file_init_header(f, template);
    2688          18 :                 if (r < 0)
    2689           0 :                         goto fail;
    2690             : 
    2691          18 :                 r = journal_file_fstat(f);
    2692          18 :                 if (r < 0)
    2693           0 :                         goto fail;
    2694             : 
    2695          18 :                 newly_created = true;
    2696             :         }
    2697             : 
    2698        1365 :         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
    2699           0 :                 r = -EIO;
    2700           0 :                 goto fail;
    2701             :         }
    2702             : 
    2703        1365 :         r = mmap_cache_get(f->mmap, f->fd, f->prot, CONTEXT_HEADER, true, 0, PAGE_ALIGN(sizeof(Header)), &f->last_stat, &h);
    2704        1365 :         if (r < 0)
    2705           0 :                 goto fail;
    2706             : 
    2707        1365 :         f->header = h;
    2708             : 
    2709        1365 :         if (!newly_created) {
    2710        1347 :                 r = journal_file_verify_header(f);
    2711        1347 :                 if (r < 0)
    2712         816 :                         goto fail;
    2713             :         }
    2714             : 
    2715             : #ifdef HAVE_GCRYPT
    2716         549 :         if (!newly_created && f->writable) {
    2717           1 :                 r = journal_file_fss_load(f);
    2718           1 :                 if (r < 0)
    2719           0 :                         goto fail;
    2720             :         }
    2721             : #endif
    2722             : 
    2723         549 :         if (f->writable) {
    2724          19 :                 if (metrics) {
    2725           0 :                         journal_default_metrics(metrics, f->fd);
    2726           0 :                         f->metrics = *metrics;
    2727          19 :                 } else if (template)
    2728           3 :                         f->metrics = template->metrics;
    2729             : 
    2730          19 :                 r = journal_file_refresh_header(f);
    2731          19 :                 if (r < 0)
    2732           0 :                         goto fail;
    2733             :         }
    2734             : 
    2735             : #ifdef HAVE_GCRYPT
    2736         549 :         r = journal_file_hmac_setup(f);
    2737         549 :         if (r < 0)
    2738           0 :                 goto fail;
    2739             : #endif
    2740             : 
    2741         549 :         if (newly_created) {
    2742          18 :                 r = journal_file_setup_field_hash_table(f);
    2743          18 :                 if (r < 0)
    2744           0 :                         goto fail;
    2745             : 
    2746          18 :                 r = journal_file_setup_data_hash_table(f);
    2747          18 :                 if (r < 0)
    2748           0 :                         goto fail;
    2749             : 
    2750             : #ifdef HAVE_GCRYPT
    2751          18 :                 r = journal_file_append_first_tag(f);
    2752          18 :                 if (r < 0)
    2753           0 :                         goto fail;
    2754             : #endif
    2755             :         }
    2756             : 
    2757         549 :         if (mmap_cache_got_sigbus(f->mmap, f->fd)) {
    2758           0 :                 r = -EIO;
    2759           0 :                 goto fail;
    2760             :         }
    2761             : 
    2762         549 :         *ret = f;
    2763         549 :         return 0;
    2764             : 
    2765             : fail:
    2766         816 :         if (f->fd >= 0 && mmap_cache_got_sigbus(f->mmap, f->fd))
    2767           0 :                 r = -EIO;
    2768             : 
    2769         816 :         journal_file_close(f);
    2770             : 
    2771         816 :         return r;
    2772             : }
    2773             : 
    2774           2 : int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
    2775           4 :         _cleanup_free_ char *p = NULL;
    2776             :         size_t l;
    2777           2 :         JournalFile *old_file, *new_file = NULL;
    2778             :         int r;
    2779             : 
    2780           2 :         assert(f);
    2781           2 :         assert(*f);
    2782             : 
    2783           2 :         old_file = *f;
    2784             : 
    2785           2 :         if (!old_file->writable)
    2786           0 :                 return -EINVAL;
    2787             : 
    2788           2 :         if (!endswith(old_file->path, ".journal"))
    2789           0 :                 return -EINVAL;
    2790             : 
    2791           2 :         l = strlen(old_file->path);
    2792          38 :         r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
    2793           2 :                      (int) l - 8, old_file->path,
    2794          32 :                      SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
    2795           2 :                      le64toh((*f)->header->head_entry_seqnum),
    2796           2 :                      le64toh((*f)->header->head_entry_realtime));
    2797           2 :         if (r < 0)
    2798           0 :                 return -ENOMEM;
    2799             : 
    2800             :         /* Try to rename the file to the archived version. If the file
    2801             :          * already was deleted, we'll get ENOENT, let's ignore that
    2802             :          * case. */
    2803           2 :         r = rename(old_file->path, p);
    2804           2 :         if (r < 0 && errno != ENOENT)
    2805           0 :                 return -errno;
    2806             : 
    2807           2 :         old_file->header->state = STATE_ARCHIVED;
    2808             : 
    2809             :         /* Currently, btrfs is not very good with out write patterns
    2810             :          * and fragments heavily. Let's defrag our journal files when
    2811             :          * we archive them */
    2812           2 :         old_file->defrag_on_close = true;
    2813             : 
    2814           2 :         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
    2815           2 :         journal_file_close(old_file);
    2816             : 
    2817           2 :         *f = new_file;
    2818           2 :         return r;
    2819             : }
    2820             : 
    2821           0 : int journal_file_open_reliably(
    2822             :                 const char *fname,
    2823             :                 int flags,
    2824             :                 mode_t mode,
    2825             :                 bool compress,
    2826             :                 bool seal,
    2827             :                 JournalMetrics *metrics,
    2828             :                 MMapCache *mmap_cache,
    2829             :                 JournalFile *template,
    2830             :                 JournalFile **ret) {
    2831             : 
    2832             :         int r;
    2833             :         size_t l;
    2834           0 :         _cleanup_free_ char *p = NULL;
    2835             : 
    2836           0 :         r = journal_file_open(fname, flags, mode, compress, seal,
    2837             :                               metrics, mmap_cache, template, ret);
    2838           0 :         if (!IN_SET(r,
    2839             :                     -EBADMSG,           /* corrupted */
    2840             :                     -ENODATA,           /* truncated */
    2841             :                     -EHOSTDOWN,         /* other machine */
    2842             :                     -EPROTONOSUPPORT,   /* incompatible feature */
    2843             :                     -EBUSY,             /* unclean shutdown */
    2844             :                     -ESHUTDOWN,         /* already archived */
    2845             :                     -EIO,               /* IO error, including SIGBUS on mmap */
    2846             :                     -EIDRM              /* File has been deleted */))
    2847           0 :                 return r;
    2848             : 
    2849           0 :         if ((flags & O_ACCMODE) == O_RDONLY)
    2850           0 :                 return r;
    2851             : 
    2852           0 :         if (!(flags & O_CREAT))
    2853           0 :                 return r;
    2854             : 
    2855           0 :         if (!endswith(fname, ".journal"))
    2856           0 :                 return r;
    2857             : 
    2858             :         /* The file is corrupted. Rotate it away and try it again (but only once) */
    2859             : 
    2860           0 :         l = strlen(fname);
    2861           0 :         if (asprintf(&p, "%.*s@%016"PRIx64 "-%016"PRIx64 ".journal~",
    2862           0 :                      (int) l - 8, fname,
    2863             :                      now(CLOCK_REALTIME),
    2864             :                      random_u64()) < 0)
    2865           0 :                 return -ENOMEM;
    2866             : 
    2867           0 :         r = rename(fname, p);
    2868           0 :         if (r < 0)
    2869           0 :                 return -errno;
    2870             : 
    2871             :         /* btrfs doesn't cope well with our write pattern and
    2872             :          * fragments heavily. Let's defrag all files we rotate */
    2873             : 
    2874           0 :         (void) chattr_path(p, false, FS_NOCOW_FL);
    2875           0 :         (void) btrfs_defrag(p);
    2876             : 
    2877           0 :         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
    2878             : 
    2879           0 :         return journal_file_open(fname, flags, mode, compress, seal,
    2880             :                                  metrics, mmap_cache, template, ret);
    2881             : }
    2882             : 
    2883       10001 : int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
    2884             :         uint64_t i, n;
    2885       10001 :         uint64_t q, xor_hash = 0;
    2886             :         int r;
    2887             :         EntryItem *items;
    2888             :         dual_timestamp ts;
    2889             : 
    2890       10001 :         assert(from);
    2891       10001 :         assert(to);
    2892       10001 :         assert(o);
    2893       10001 :         assert(p);
    2894             : 
    2895       10001 :         if (!to->writable)
    2896           0 :                 return -EPERM;
    2897             : 
    2898       10001 :         ts.monotonic = le64toh(o->entry.monotonic);
    2899       10001 :         ts.realtime = le64toh(o->entry.realtime);
    2900             : 
    2901       10001 :         n = journal_file_entry_n_items(o);
    2902             :         /* alloca() can't take 0, hence let's allocate at least one */
    2903       10001 :         items = alloca(sizeof(EntryItem) * MAX(1u, n));
    2904             : 
    2905      325550 :         for (i = 0; i < n; i++) {
    2906             :                 uint64_t l, h;
    2907             :                 le64_t le_hash;
    2908             :                 size_t t;
    2909             :                 void *data;
    2910             :                 Object *u;
    2911             : 
    2912      152774 :                 q = le64toh(o->entry.items[i].object_offset);
    2913      152774 :                 le_hash = o->entry.items[i].hash;
    2914             : 
    2915      152774 :                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
    2916      152774 :                 if (r < 0)
    2917           0 :                         return r;
    2918             : 
    2919      152774 :                 if (le_hash != o->data.hash)
    2920           0 :                         return -EBADMSG;
    2921             : 
    2922      152774 :                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
    2923      152774 :                 t = (size_t) l;
    2924             : 
    2925             :                 /* We hit the limit on 32bit machines */
    2926      152774 :                 if ((uint64_t) t != l)
    2927           0 :                         return -E2BIG;
    2928             : 
    2929      152774 :                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
    2930             : #if defined(HAVE_XZ) || defined(HAVE_LZ4)
    2931           1 :                         size_t rsize = 0;
    2932             : 
    2933           2 :                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
    2934           1 :                                             o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
    2935           1 :                         if (r < 0)
    2936           0 :                                 return r;
    2937             : 
    2938           1 :                         data = from->compress_buffer;
    2939           1 :                         l = rsize;
    2940             : #else
    2941             :                         return -EPROTONOSUPPORT;
    2942             : #endif
    2943             :                 } else
    2944      152773 :                         data = o->data.payload;
    2945             : 
    2946      152774 :                 r = journal_file_append_data(to, data, l, &u, &h);
    2947      152774 :                 if (r < 0)
    2948           0 :                         return r;
    2949             : 
    2950      152774 :                 xor_hash ^= le64toh(u->data.hash);
    2951      152774 :                 items[i].object_offset = htole64(h);
    2952      152774 :                 items[i].hash = u->data.hash;
    2953             : 
    2954      152774 :                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
    2955      152774 :                 if (r < 0)
    2956           0 :                         return r;
    2957             :         }
    2958             : 
    2959       10001 :         r = journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
    2960             : 
    2961       10001 :         if (mmap_cache_got_sigbus(to->mmap, to->fd))
    2962           0 :                 return -EIO;
    2963             : 
    2964       10001 :         return r;
    2965             : }
    2966             : 
    2967           0 : void journal_default_metrics(JournalMetrics *m, int fd) {
    2968           0 :         uint64_t fs_size = 0;
    2969             :         struct statvfs ss;
    2970             :         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
    2971             : 
    2972           0 :         assert(m);
    2973           0 :         assert(fd >= 0);
    2974             : 
    2975           0 :         if (fstatvfs(fd, &ss) >= 0)
    2976           0 :                 fs_size = ss.f_frsize * ss.f_blocks;
    2977             : 
    2978           0 :         if (m->max_use == (uint64_t) -1) {
    2979             : 
    2980           0 :                 if (fs_size > 0) {
    2981           0 :                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
    2982             : 
    2983           0 :                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
    2984           0 :                                 m->max_use = DEFAULT_MAX_USE_UPPER;
    2985             : 
    2986           0 :                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
    2987           0 :                                 m->max_use = DEFAULT_MAX_USE_LOWER;
    2988             :                 } else
    2989           0 :                         m->max_use = DEFAULT_MAX_USE_LOWER;
    2990             :         } else {
    2991           0 :                 m->max_use = PAGE_ALIGN(m->max_use);
    2992             : 
    2993           0 :                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
    2994           0 :                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
    2995             :         }
    2996             : 
    2997           0 :         if (m->max_size == (uint64_t) -1) {
    2998           0 :                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
    2999             : 
    3000           0 :                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
    3001           0 :                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
    3002             :         } else
    3003           0 :                 m->max_size = PAGE_ALIGN(m->max_size);
    3004             : 
    3005           0 :         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
    3006           0 :                 m->max_size = JOURNAL_FILE_SIZE_MIN;
    3007             : 
    3008           0 :         if (m->max_size*2 > m->max_use)
    3009           0 :                 m->max_use = m->max_size*2;
    3010             : 
    3011           0 :         if (m->min_size == (uint64_t) -1)
    3012           0 :                 m->min_size = JOURNAL_FILE_SIZE_MIN;
    3013             :         else {
    3014           0 :                 m->min_size = PAGE_ALIGN(m->min_size);
    3015             : 
    3016           0 :                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
    3017           0 :                         m->min_size = JOURNAL_FILE_SIZE_MIN;
    3018             : 
    3019           0 :                 if (m->min_size > m->max_size)
    3020           0 :                         m->max_size = m->min_size;
    3021             :         }
    3022             : 
    3023           0 :         if (m->keep_free == (uint64_t) -1) {
    3024             : 
    3025           0 :                 if (fs_size > 0) {
    3026           0 :                         m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
    3027             : 
    3028           0 :                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
    3029           0 :                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
    3030             : 
    3031             :                 } else
    3032           0 :                         m->keep_free = DEFAULT_KEEP_FREE;
    3033             :         }
    3034             : 
    3035           0 :         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
    3036             :                   format_bytes(a, sizeof(a), m->max_use),
    3037             :                   format_bytes(b, sizeof(b), m->max_size),
    3038             :                   format_bytes(c, sizeof(c), m->min_size),
    3039             :                   format_bytes(d, sizeof(d), m->keep_free));
    3040           0 : }
    3041             : 
    3042           0 : int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
    3043           0 :         assert(f);
    3044           0 :         assert(from || to);
    3045             : 
    3046           0 :         if (from) {
    3047           0 :                 if (f->header->head_entry_realtime == 0)
    3048           0 :                         return -ENOENT;
    3049             : 
    3050           0 :                 *from = le64toh(f->header->head_entry_realtime);
    3051             :         }
    3052             : 
    3053           0 :         if (to) {
    3054           0 :                 if (f->header->tail_entry_realtime == 0)
    3055           0 :                         return -ENOENT;
    3056             : 
    3057           0 :                 *to = le64toh(f->header->tail_entry_realtime);
    3058             :         }
    3059             : 
    3060           0 :         return 1;
    3061             : }
    3062             : 
    3063           0 : int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
    3064             :         Object *o;
    3065             :         uint64_t p;
    3066             :         int r;
    3067             : 
    3068           0 :         assert(f);
    3069           0 :         assert(from || to);
    3070             : 
    3071           0 :         r = find_data_object_by_boot_id(f, boot_id, &o, &p);
    3072           0 :         if (r <= 0)
    3073           0 :                 return r;
    3074             : 
    3075           0 :         if (le64toh(o->data.n_entries) <= 0)
    3076           0 :                 return 0;
    3077             : 
    3078           0 :         if (from) {
    3079           0 :                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
    3080           0 :                 if (r < 0)
    3081           0 :                         return r;
    3082             : 
    3083           0 :                 *from = le64toh(o->entry.monotonic);
    3084             :         }
    3085             : 
    3086           0 :         if (to) {
    3087           0 :                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
    3088           0 :                 if (r < 0)
    3089           0 :                         return r;
    3090             : 
    3091           0 :                 r = generic_array_get_plus_one(f,
    3092           0 :                                                le64toh(o->data.entry_offset),
    3093           0 :                                                le64toh(o->data.entry_array_offset),
    3094           0 :                                                le64toh(o->data.n_entries)-1,
    3095             :                                                &o, NULL);
    3096           0 :                 if (r <= 0)
    3097           0 :                         return r;
    3098             : 
    3099           0 :                 *to = le64toh(o->entry.monotonic);
    3100             :         }
    3101             : 
    3102           0 :         return 1;
    3103             : }
    3104             : 
    3105           6 : bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
    3106           6 :         assert(f);
    3107             : 
    3108             :         /* If we gained new header fields we gained new features,
    3109             :          * hence suggest a rotation */
    3110           6 :         if (le64toh(f->header->header_size) < sizeof(Header)) {
    3111           0 :                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
    3112           0 :                 return true;
    3113             :         }
    3114             : 
    3115             :         /* Let's check if the hash tables grew over a certain fill
    3116             :          * level (75%, borrowing this value from Java's hash table
    3117             :          * implementation), and if so suggest a rotation. To calculate
    3118             :          * the fill level we need the n_data field, which only exists
    3119             :          * in newer versions. */
    3120             : 
    3121           6 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
    3122           6 :                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
    3123           0 :                         log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
    3124             :                                   f->path,
    3125             :                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
    3126             :                                   le64toh(f->header->n_data),
    3127             :                                   le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
    3128             :                                   (unsigned long long) f->last_stat.st_size,
    3129             :                                   f->last_stat.st_size / le64toh(f->header->n_data));
    3130           0 :                         return true;
    3131             :                 }
    3132             : 
    3133           6 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
    3134           6 :                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
    3135           0 :                         log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
    3136             :                                   f->path,
    3137             :                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
    3138             :                                   le64toh(f->header->n_fields),
    3139             :                                   le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
    3140           0 :                         return true;
    3141             :                 }
    3142             : 
    3143             :         /* Are the data objects properly indexed by field objects? */
    3144          12 :         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
    3145          12 :             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
    3146           8 :             le64toh(f->header->n_data) > 0 &&
    3147           2 :             le64toh(f->header->n_fields) == 0)
    3148           0 :                 return true;
    3149             : 
    3150           6 :         if (max_file_usec > 0) {
    3151             :                 usec_t t, h;
    3152             : 
    3153           0 :                 h = le64toh(f->header->head_entry_realtime);
    3154           0 :                 t = now(CLOCK_REALTIME);
    3155             : 
    3156           0 :                 if (h > 0 && t > h + max_file_usec)
    3157           0 :                         return true;
    3158             :         }
    3159             : 
    3160           6 :         return false;
    3161             : }

Generated by: LCOV version 1.11