From 9febff20c71f7112a3652cba81d41b260c178716 Mon Sep 17 00:00:00 2001 From: Emmanuele Bassi Date: Tue, 19 Sep 2017 17:31:27 +0100 Subject: Add initial infrastructure for Profiling The EosProfileProbe API allows defining profiling probes that can be used to efficiently measure the time spent in a critical section. The Profiling API is meant to collect samples and generate a report at the end of the lifetime of the process, either by printing out the results once the process terminates; or by saving the raw data in a binary file that can be loaded at a later date. This profiling API is meant to be as close as possible to a zero cost abstraction: - probes are only allocated if profiling is enabled - all profiling API is a no-op if profiling isn't enabled - the C API is meant to be easily tied to a scope, through the use of auto-cleanup macros provided by GLib This allows projects using the Endless SDK to keep the profiling probes in place, instead of conditionally compile them in. https://phabricator.endlessm.com/T18514 --- endless/Makefile.am | 10 + endless/endless.h | 1 + endless/eosinit.c | 12 +- endless/eosprofile-private.h | 36 +++ endless/eosprofile.c | 610 ++++++++++++++++++++++++++++++++++++ endless/eosprofile.h | 48 +++ endless/gvdb/gvdb-builder.c | 545 ++++++++++++++++++++++++++++++++ endless/gvdb/gvdb-builder.h | 60 ++++ endless/gvdb/gvdb-format.h | 87 ++++++ endless/gvdb/gvdb-reader.c | 720 +++++++++++++++++++++++++++++++++++++++++++ endless/gvdb/gvdb-reader.h | 65 ++++ 11 files changed, 2193 insertions(+), 1 deletion(-) create mode 100644 endless/eosprofile-private.h create mode 100644 endless/eosprofile.c create mode 100644 endless/eosprofile.h create mode 100644 endless/gvdb/gvdb-builder.c create mode 100644 endless/gvdb/gvdb-builder.h create mode 100644 endless/gvdb/gvdb-format.h create mode 100644 endless/gvdb/gvdb-reader.c create mode 100644 endless/gvdb/gvdb-reader.h diff --git a/endless/Makefile.am b/endless/Makefile.am index d4d4fea..a536335 100644 --- a/endless/Makefile.am +++ b/endless/Makefile.am @@ -33,11 +33,14 @@ endless_private_installed_headers = \ endless/eoslicense.h \ endless/eosmacros.h \ endless/eospagemanager.h \ + endless/eosprofile.h \ endless/eostypes.h \ endless/eoswindow.h \ endless/eosflexygrid.h endless_library_sources = \ + endless/gvdb/gvdb-builder.c \ + endless/gvdb/gvdb-reader.c \ endless/eosapplication.c \ endless/eosattribution.c endless/eosattribution-private.h \ endless/eoscellrendererpixbuflink.c endless/eoscellrendererpixbuflink-private.h \ @@ -47,12 +50,18 @@ endless_library_sources = \ endless/eosinit.c endless/eosinit-private.h \ endless/eoslicense.c \ endless/eospagemanager.c \ + endless/eosprofile.c endless/eosprofile-private.h \ endless/eosresource.c endless/eosresource-private.h \ endless/eostopbar.c endless/eostopbar-private.h \ endless/eosutil.c \ endless/eoswindow.c \ endless/eosflexygrid.c endless/eosflexygridcell.c endless/eosflexygrid-private.h +EXTRA_DIST += \ + endless/gvdb/gvdb-builder.h \ + endless/gvdb/gvdb-format.h \ + endless/gvdb/gvdb-reader.h + # Endless GUI library lib_LTLIBRARIES += libendless-@EOS_SDK_API_VERSION@.la libendless_@EOS_SDK_API_VERSION@_la_SOURCES = \ @@ -65,6 +74,7 @@ libendless_@EOS_SDK_API_VERSION@_la_SOURCES = \ # and turn on debug messages libendless_@EOS_SDK_API_VERSION@_la_CPPFLAGS = \ -I$(builddir)/endless \ + -I$(builddir)/endless/gvdb \ @EOS_SDK_CFLAGS@ \ -DG_LOG_DOMAIN=\"EndlessSDK\" \ -DCOMPILING_EOS_SDK \ diff --git a/endless/endless.h b/endless/endless.h index 9d0ceac..6fe624b 100644 --- a/endless/endless.h +++ b/endless/endless.h @@ -18,6 +18,7 @@ G_BEGIN_DECLS #include "eospagemanager.h" #include "eoswindow.h" #include "eoscustomcontainer.h" +#include "eosprofile.h" #undef _EOS_SDK_INSIDE_ENDLESS_H diff --git a/endless/eosinit.c b/endless/eosinit.c index 19d6d9b..f5c6185 100644 --- a/endless/eosinit.c +++ b/endless/eosinit.c @@ -6,13 +6,14 @@ #include "endless.h" #include "eosinit-private.h" +#include "eosprofile-private.h" /* Constructors supported since GCC 2.7; I have this on GLib's authority. This should also work on Clang. */ #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7) #define _EOS_CONSTRUCTOR(func) static void __attribute__((constructor)) func (void); -#define _EOS_DESTRUCTOR(func) static void __atrribute__((destructor)) func (void); +#define _EOS_DESTRUCTOR(func) static void __attribute__((destructor)) func (void); #else @@ -38,10 +39,19 @@ _eos_init (void) bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR); bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8"); + eos_profile_state_init (); + _eos_initialized = TRUE; } } +_EOS_DESTRUCTOR(_eos_fini); +static void +_eos_fini (void) +{ + eos_profile_state_dump (); +} + /* * eos_is_inited: * diff --git a/endless/eosprofile-private.h b/endless/eosprofile-private.h new file mode 100644 index 0000000..a07de0e --- /dev/null +++ b/endless/eosprofile-private.h @@ -0,0 +1,36 @@ +/* Copyright 2017 Endless */ + +#pragma once + +#include "eosprofile.h" + +G_BEGIN_DECLS + +/* Increase every time the probe format changes */ +#define PROBE_DB_VERSION 1 + +#define PROBE_DB_META_BASE_KEY "/com/endlessm/Sdk/meta" +#define PROBE_DB_META_VERSION_KEY PROBE_DB_META_BASE_KEY "/db_version" + +typedef struct { + GHashTable *probes; + + gboolean capture; + char *capture_file; +} ProfileState; + +G_LOCK_DEFINE_STATIC (profile_state); +static ProfileState *profile_state; + +typedef struct { + gint64 start_time; + gint64 end_time; +} ProfileSample; + +void +eos_profile_state_init (void); + +void +eos_profile_state_dump (void); + +G_END_DECLS diff --git a/endless/eosprofile.c b/endless/eosprofile.c new file mode 100644 index 0000000..1f104d9 --- /dev/null +++ b/endless/eosprofile.c @@ -0,0 +1,610 @@ +/* Copyright 2017 Endless Mobile, Inc. */ + +#include "config.h" + +#include "eosprofile-private.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gvdb/gvdb-builder.h" + +/** + * SECTION:profiling + * @Title: Profiling + * @Short_description: Profiling tools for applications + * + * The profiling API provided by the Endless SDK is a simple tool for defining + * profiling probes and collecting data over multiple calls. + * + * ### Enabling profiling + * + * Profile probes try to be as close to zero-cost as possible; they are only + * enabled if the `EOS_PROFILE` environment variable is set. This means that + * you can leave the profile probes in your code, and they will be inert until + * the environment is set up for profiling. + * + * ### Using profiling probes + * + * Typically, you want to declare a profiling probe at the beginning of the + * section of the code you wish to measure, and stop it at the end. + * + * The profiling probes are identified by a unique name, typically expressed + * as a path, like `/com/example/ExampleProbe`; this allows creating a tree of + * probes. + * + * If you are using the C API and a GCC-compatible compiler, you will want to + * use the `g_autoptr()` macro to declare the profiling probe, and have it + * automatically collected at the end of the scope, for instance: + * + * |[ + * static void + * some_function (SomeObject *obj) + * { + * some_set_up (obj); + * + * // Here begins the section we wish to profile + * g_autoptr(EosProfileProbe) outer = EOS_PROFILE ("/com/example/some-function"); + * + * some_expensive_computation (obj); + * some_additional_work (obj); + * + * if (some_state (obj)) + * { + * g_autoptr(EosProfileProbe) inner = EOS_PROFILE ("/com/example/some-function/state"); + * + * some_more_computation (obj); + * } + * } + * ]| + * + * In the example above, the `outer` probe is created after we performed some + * operation; since we are not interested into its cost, we are going to + * ignore it. Additionally, the `inner` probe is created conditionally on some + * state, so we can also gather information on the actual number of times the + * inner function is called. In either cases, both the `outer` and `inner` + * probes are automatically stopped once they get out of scope. + * + * ### Capturing profiling data + * + * By default, when the `EOS_PROFILE` environment variable is set, you will + * get a summary at the end of the process, sent to the standard output. + * + * It is also possible to redirect the profiling data to a capture file, by + * setting the `EOS_PROFILE` environment variable to the `capture` value. In + * that case, the profiling data will be stored in a binary format at the + * end of the process, and you can use the `eos-profile` tool to extract the + * probes, timings, and generate a summary. The default filename for the + * captured data is based on the name of the binary and the process ID, and + * it's saved under the `$XDG_CACHE_HOME` directory (see: g_get_user_cache_dir()). + * + * You can also specify the name of the capture file, by setting the + * `EOS_PROFILE` environment variable to `capture:/path/to/file`. + */ + +static int +sample_compare (gconstpointer a, + gconstpointer b) +{ + const ProfileSample *sample_a = a; + const ProfileSample *sample_b = b; + + /* Times are monotonic, so this is always positive */ + gint64 delta_a = sample_a->end_time - sample_a->start_time; + gint64 delta_b = sample_b->end_time - sample_b->start_time; + + if (delta_a < delta_b) + return -1; + + if (delta_a > delta_b) + return 1; + + return 0; +} + +#define N_SAMPLES 64 + +struct _EosProfileProbe { + volatile int ref_count; + + char *file; + gint32 line; + char *function; + char *name; + + GArray *samples; + + GMutex probe_lock; +}; + +static EosProfileProbe eos_profile_dummy_probe; + +static EosProfileProbe * +eos_profile_probe_new (const char *file, + gsize line, + const char *function, + const char *name) +{ + EosProfileProbe *res = g_new0 (EosProfileProbe, 1); + + res->ref_count = 1; + + res->name = g_strdup (name); + res->function = g_strdup (function); + res->file = g_strdup (file); + res->line = line; + + res->samples = g_array_sized_new (FALSE, FALSE, sizeof (ProfileSample), N_SAMPLES); + + g_mutex_init (&res->probe_lock); + + return res; +} + +static void +eos_profile_probe_destroy (gpointer data) +{ + EosProfileProbe *probe = data; + + g_hash_table_remove (profile_state->probes, probe->name); + + g_array_unref (probe->samples); + g_free (probe->name); + g_free (probe->function); + g_free (probe->file); + + g_free (probe); +} + +static EosProfileProbe * +eos_profile_probe_copy (EosProfileProbe *probe) +{ + return probe; +} + +static void +eos_profile_probe_free (EosProfileProbe *probe) +{ + /* no-op */ +} + +G_DEFINE_BOXED_TYPE (EosProfileProbe, eos_profile_probe, + eos_profile_probe_copy, + eos_profile_probe_free) + +/** + * eos_profile_probe_start: + * @file: the source file for the probe, typically represented by %__FILE__ + * @line: the line in the source @file, typically represented by %__LINE__ + * @function: the function for the probe, typically represented by %G_STRFUNC + * @name: a unique name for the probe + * + * Starts a profiling probe for @name, creating it if necessary. + * + * Returns: (transfer none): a profile probe identifier; use eos_profile_probe_stop() + * to stop the profiling on the returned probe + * + * Since: 0.6 + */ +EosProfileProbe * +eos_profile_probe_start (const char *file, + gsize line, + const char *function, + const char *name) +{ + /* Don't measure the lock */ + gint64 sample_time = g_get_monotonic_time (); + + /* We can take this out of the lock because by the time we reach + * eos_profile_probe_stop() the profile state is guaranteed to + * either always exist, or not + */ + if (profile_state == NULL) + return &eos_profile_dummy_probe; + + G_LOCK (profile_state); + + EosProfileProbe *res = g_hash_table_lookup (profile_state->probes, name); + if (res == NULL) + { + res = eos_profile_probe_new (file, line, function, name); + + g_hash_table_insert (profile_state->probes, res->name, res); + } + + g_array_append_vals (res->samples, + &(ProfileSample) { + .start_time = sample_time, + .end_time = -1 + }, + 1); + + G_UNLOCK (profile_state); + + return (EosProfileProbe *) res; +} + +/** + * eos_profile_probe_stop: + * @probe: a #EosProfileProbe + * + * Stops a profiling probe started using eos_profile_probe_start(). + * + * Since: 0.6 + */ +void +eos_profile_probe_stop (EosProfileProbe *probe) +{ + if (probe == &eos_profile_dummy_probe) + return; + + /* Don't measure the lock */ + gint64 sample_time = g_get_monotonic_time (); + + g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&probe->probe_lock); + + /* Ideally, we just want to update the sample we just created, which means + * picking the last slot in the samples array; in practice, this is what + * should happen most of the time, unless we end up recursing. In that case + * we end up with creating a sample while the outer sample is still in + * flight. + * + * If we just recorded the inner samples, we'd end up with a skewed capture, + * as those samples would inevitably be masking the timing of the outer + * samples. + * + * The easiest approach is to discard the inner samples until we reach the + * outermost live sample + */ + int first_in_flight = probe->samples->len - 1; + for (int i = probe->samples->len - 1; i >= 0; i--) + { + const ProfileSample *sample = &g_array_index (probe->samples, ProfileSample, i); + + if (sample->end_time > 0) + break; + + first_in_flight = i; + } + + ProfileSample *sample = &g_array_index (probe->samples, ProfileSample, first_in_flight); + + sample->end_time = sample_time; + + if (first_in_flight != probe->samples->len - 1) + { + int range = probe->samples->len - first_in_flight - 1; + + g_array_remove_range (probe->samples, first_in_flight + 1, range); + } +} + +void +eos_profile_state_init (void) +{ + static gboolean profile_state_inited; + + if (profile_state_inited) + return; + + profile_state_inited = TRUE; + + const char *str = getenv ("EOS_PROFILE"); + if (str != NULL) + { + profile_state = g_new0 (ProfileState, 1); + profile_state->probes = g_hash_table_new_full (g_str_hash, g_str_equal, + NULL, + eos_profile_probe_destroy); + + int capture_prefix_len = strlen ("capture"); + + if (g_ascii_strncasecmp (str, "capture", capture_prefix_len) == 0) + { + profile_state->capture = TRUE; + + const char *filename = str + capture_prefix_len; + if (*filename == ':') + { + filename += 1; + + if (*filename != '\0') + profile_state->capture_file = g_strdup (filename); + } + + if (profile_state->capture_file == NULL || *profile_state->capture_file == '\0') + { + g_autofree char *capture_dir = g_build_filename (g_get_user_cache_dir (), + "com.endlessm.Sdk.Profile", + NULL); + + if (g_mkdir_with_parents (capture_dir, 0700) < 0) + capture_dir = g_get_current_dir (); + + profile_state->capture_file = g_strdup_printf ("%s%s%s.db", + capture_dir, + G_DIR_SEPARATOR_S, + g_get_prgname ()); + } + } + } +} + +static const double +scale_val (double val) +{ + if (val >= G_USEC_PER_SEC) + return val / G_USEC_PER_SEC; + + if (val >= 1000) + return val / 1000.0; + + return val; +} + +static const char * +unit_for (double val) +{ + enum { + SECONDS, + MILLISECONDS, + MICROSECONDS + }; + + const char *units[] = { + [SECONDS] = "s", + [MILLISECONDS] = "ms", + [MICROSECONDS] = "µs", + }; + + if (val >= G_USEC_PER_SEC) + return units[SECONDS]; + + if (val >= 1000) + return units[MILLISECONDS]; + + return units[MICROSECONDS]; +} + +static void +profile_state_dump_to_console (void) +{ + gushort max_columns = 256; + + if (isatty (STDOUT_FILENO)) + { + struct winsize w; + + ioctl (STDOUT_FILENO, TIOCGWINSZ, &w); + + max_columns = w.ws_col; + } + + GHashTableIter iter; + gpointer value; + + g_hash_table_iter_init (&iter, profile_state->probes); + while (g_hash_table_iter_next (&iter, NULL, &value)) + { + EosProfileProbe *probe = value; + + /* Take ownership of the samples in order to sort them; we want to + * pre-sort so that we can easily discard the outliers when doing + * our analysis, later on + */ + GArray *sorted_samples = g_steal_pointer (&probe->samples); + g_array_sort (sorted_samples, sample_compare); + + gint64 min_sample = G_MAXINT64, max_sample = 0; + gint64 total = 0; + + g_autoptr(GArray) valid_samples = g_array_new (FALSE, FALSE, sizeof (guint)); + + for (int i = 0; i < sorted_samples->len; i++) + { + const ProfileSample *sample = &g_array_index (sorted_samples, ProfileSample, i); + + gint64 delta = sample->end_time - sample->start_time; + + /* If the probe never got stopped we need to skip this sample */ + if (delta < 0) + continue; + + g_array_append_val (valid_samples, i); + + if (delta < min_sample) + min_sample = delta; + if (delta > max_sample) + max_sample = delta; + + total += delta; + } + + g_autofree char *msg = NULL; + + if (valid_samples->len > 1) + { + double avg = total / (double) valid_samples->len; + double s = 0; + double s_part = 0; + + for (int i = 1; i < valid_samples->len - 1; i++) + { + guint idx = g_array_index (valid_samples, guint, i); + const ProfileSample *sample = &g_array_index (sorted_samples, ProfileSample, idx); + + gint64 delta = sample->end_time - sample->start_time; + g_assert (delta >= 0); + + double deviation = delta - avg; + s_part += (deviation * deviation); + } + + if (valid_samples->len > 1) + s = sqrt (s_part / (double) valid_samples->len - 1); + else + s = 0.0; + + g_autofree char *stddev = g_strdup_printf (", σ:%g", s); + + msg = + g_strdup_printf ("%d samples: avg:%g %s, min:%d %s, max:%d %s%s)", + valid_samples->len, + scale_val (avg), unit_for (avg), + (int) scale_val (min_sample), unit_for (min_sample), + (int) scale_val (max_sample), unit_for (max_sample), + s == 0.0 ? "" : stddev); + } + else + { + msg = g_strdup ("not enough valid samples found"); + } + + g_autofree char *probe_name = NULL; + + int probe_len = strlen (probe->name); + int msg_len = strlen (msg); + if (probe_len + msg_len >= (int) max_columns - 2) + { + gsize name_len = MAX ((int) max_columns - msg_len, 2); + probe_name = g_strndup (probe->name, name_len); + probe_name[name_len - 1] = '~'; + } + else + probe_name = g_strdup (probe->name); + + g_print ("%s%*c%s\n", + probe_name, + max_columns - strlen (probe_name) - msg_len, ' ', + msg); + g_print (" %s at %s:%d\n\n", + probe->function, + probe->file, probe->line); + } +} + +/* Get the immediate parent table in the GVDB table, using the + * key separator '/' to determine the nesting level. If needed, + * this function will create the intermediate tables + */ +static GvdbItem * +get_parent (GHashTable *table, + char *key, + int length) +{ + GvdbItem *grandparent, *parent; + + if (length == 1) + return NULL; + + while (key[--length - 1] != '/') + ; + + key[length] = '\0'; + + parent = g_hash_table_lookup (table, key); + + if (parent == NULL) + { + parent = gvdb_hash_table_insert (table, key); + + grandparent = get_parent (table, key, length); + + if (grandparent != NULL) + gvdb_item_set_parent (parent, grandparent); + } + + return parent; +} + +void +eos_profile_state_dump (void) +{ + if (profile_state == NULL) + return; + + if (!profile_state->capture) + { + profile_state_dump_to_console (); + return; + } + + g_autoptr(GHashTable) db_table = gvdb_hash_table_new (NULL, NULL); + + /* Metadata for the DB */ + g_autofree char *version_key = g_strdup (PROBE_DB_META_VERSION_KEY); + gsize version_key_len = strlen (version_key); + GvdbItem *meta = gvdb_hash_table_insert (db_table, PROBE_DB_META_VERSION_KEY); + gvdb_item_set_parent (meta, get_parent (db_table, version_key, version_key_len)); + gvdb_item_set_value (meta, g_variant_new_int32 (PROBE_DB_VERSION)); + + /* Iterate over the probes */ + GHashTableIter iter; + gpointer value; + g_hash_table_iter_init (&iter, profile_state->probes); + while (g_hash_table_iter_next (&iter, NULL, &value)) + { + EosProfileProbe *probe = value; + + g_autofree char *key = g_strdup (probe->name); + gsize key_len = strlen (probe->name); + + GvdbItem *item = gvdb_hash_table_insert (db_table, key); + gvdb_item_set_parent (item, get_parent (db_table, key, key_len)); + + GVariantBuilder builder; + + g_variant_builder_init (&builder, G_VARIANT_TYPE ("(sssuua(xx))")); + + g_variant_builder_add (&builder, "s", probe->name); + g_variant_builder_add (&builder, "s", probe->function); + g_variant_builder_add (&builder, "s", probe->file); + g_variant_builder_add (&builder, "u", probe->line); + + g_variant_builder_add (&builder, "u", probe->samples->len); + + /* Take ownership of the samples in order to sort them; we want to + * pre-sort so that we can easily discard the outliers when doing + * our analysis, later on + */ + GArray *sorted_samples = g_steal_pointer (&(probe->samples)); + g_array_sort (sorted_samples, sample_compare); + + g_variant_builder_open (&builder, G_VARIANT_TYPE ("a(xx)")); + + for (int i = 0; i < sorted_samples->len; i++) + { + const ProfileSample *sample = &g_array_index (sorted_samples, ProfileSample, i); + + g_variant_builder_open (&builder, G_VARIANT_TYPE ("(xx)")); + g_variant_builder_add (&builder, "x", sample->start_time); + g_variant_builder_add (&builder, "x", sample->end_time); + g_variant_builder_close (&builder); + } + + g_variant_builder_close (&builder); + + g_array_free (sorted_samples, TRUE); + + gvdb_item_set_value (item, g_variant_builder_end (&builder)); + } + + /* Clean up */ + g_hash_table_unref (profile_state->probes); + g_free (profile_state->capture_file); + g_free (profile_state); + + g_autoptr(GError) error = NULL; + gvdb_table_write_contents (db_table, profile_state->capture_file, + G_BYTE_ORDER != G_LITTLE_ENDIAN, + &error); + + if (error != NULL) + g_printerr ("PROFILE: %s\n", error->message); +} diff --git a/endless/eosprofile.h b/endless/eosprofile.h new file mode 100644 index 0000000..9dacb50 --- /dev/null +++ b/endless/eosprofile.h @@ -0,0 +1,48 @@ +/* Copyright 2017 Endless Mobile, Inc. */ + +#pragma once + +#if !(defined(_EOS_SDK_INSIDE_ENDLESS_H) || defined(COMPILING_EOS_SDK)) +#error "Please do not include this header file directly." +#endif + +#include "eostypes.h" +#include + +G_BEGIN_DECLS + +/** + * EosProfileProbe: + * + * An opaque identifier for a profiling probe. + * + * Since: 0.6 + */ +typedef struct _EosProfileProbe EosProfileProbe; + +/** + * EOS_PROFILE_PROBE: + * @name: the name of the profiling probe + * + * A convenience macro that creates a profiling probe at the given + * location. + * + * Since: 0.6 + */ +#define EOS_PROFILE_PROBE(name) \ + eos_profile_probe_start (__FILE__, __LINE__, G_STRFUNC, name) + +EOS_SDK_AVAILABLE_IN_0_6 +GType eos_profile_probe_get_type (void) G_GNUC_CONST; + +EOS_SDK_AVAILABLE_IN_0_6 +EosProfileProbe * eos_profile_probe_start (const char *file, + gsize line, + const char *function, + const char *name); +EOS_SDK_AVAILABLE_IN_0_6 +void eos_profile_probe_stop (EosProfileProbe *probe); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(EosProfileProbe, eos_profile_probe_stop) + +G_END_DECLS diff --git a/endless/gvdb/gvdb-builder.c b/endless/gvdb/gvdb-builder.c new file mode 100644 index 0000000..8ed4410 --- /dev/null +++ b/endless/gvdb/gvdb-builder.c @@ -0,0 +1,545 @@ +/* + * Copyright © 2010 Codethink Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the licence, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Allison Lortie + */ + +#include "gvdb-builder.h" +#include "gvdb-format.h" + +#include +#include +#if !defined(G_OS_WIN32) || !defined(_MSC_VER) +#include +#endif +#include + + +struct _GvdbItem +{ + gchar *key; + guint32 hash_value; + guint32_le assigned_index; + GvdbItem *parent; + GvdbItem *sibling; + GvdbItem *next; + + /* one of: + * this: + */ + GVariant *value; + + /* this: */ + GHashTable *table; + + /* or this: */ + GvdbItem *child; +}; + +static void +gvdb_item_free (gpointer data) +{ + GvdbItem *item = data; + + g_free (item->key); + + if (item->value) + g_variant_unref (item->value); + + if (item->table) + g_hash_table_unref (item->table); + + g_slice_free (GvdbItem, item); +} + +GHashTable * +gvdb_hash_table_new (GHashTable *parent, + const gchar *name_in_parent) +{ + GHashTable *table; + + table = g_hash_table_new_full (g_str_hash, g_str_equal, + g_free, gvdb_item_free); + + if (parent) + { + GvdbItem *item; + + item = gvdb_hash_table_insert (parent, name_in_parent); + gvdb_item_set_hash_table (item, table); + } + + return table; +} + +static guint32 +djb_hash (const gchar *key) +{ + guint32 hash_value = 5381; + + while (*key) + hash_value = hash_value * 33 + *(signed char *)key++; + + return hash_value; +} + +GvdbItem * +gvdb_hash_table_insert (GHashTable *table, + const gchar *key) +{ + GvdbItem *item; + + item = g_slice_new0 (GvdbItem); + item->key = g_strdup (key); + item->hash_value = djb_hash (key); + + g_hash_table_insert (table, g_strdup (key), item); + + return item; +} + +void +gvdb_hash_table_insert_string (GHashTable *table, + const gchar *key, + const gchar *value) +{ + GvdbItem *item; + + item = gvdb_hash_table_insert (table, key); + gvdb_item_set_value (item, g_variant_new_string (value)); +} + +void +gvdb_item_set_value (GvdbItem *item, + GVariant *value) +{ + g_return_if_fail (!item->value && !item->table && !item->child); + + item->value = g_variant_ref_sink (value); +} + +void +gvdb_item_set_hash_table (GvdbItem *item, + GHashTable *table) +{ + g_return_if_fail (!item->value && !item->table && !item->child); + + item->table = g_hash_table_ref (table); +} + +void +gvdb_item_set_parent (GvdbItem *item, + GvdbItem *parent) +{ + GvdbItem **node; + + g_return_if_fail (g_str_has_prefix (item->key, parent->key)); + g_return_if_fail (!parent->value && !parent->table); + g_return_if_fail (!item->parent && !item->sibling); + + for (node = &parent->child; *node; node = &(*node)->sibling) + if (strcmp ((*node)->key, item->key) > 0) + break; + + item->parent = parent; + item->sibling = *node; + *node = item; +} + +typedef struct +{ + GvdbItem **buckets; + gint n_buckets; +} HashTable; + +static HashTable * +hash_table_new (gint n_buckets) +{ + HashTable *table; + + table = g_slice_new (HashTable); + table->buckets = g_new0 (GvdbItem *, n_buckets); + table->n_buckets = n_buckets; + + return table; +} + +static void +hash_table_free (HashTable *table) +{ + g_free (table->buckets); + + g_slice_free (HashTable, table); +} + +static void +hash_table_insert (gpointer key, + gpointer value, + gpointer data) +{ + guint32 hash_value, bucket; + HashTable *table = data; + GvdbItem *item = value; + + hash_value = djb_hash (key); + bucket = hash_value % table->n_buckets; + item->next = table->buckets[bucket]; + table->buckets[bucket] = item; +} + +static guint32_le +item_to_index (GvdbItem *item) +{ + if (item != NULL) + return item->assigned_index; + + return guint32_to_le (-1u); +} + +typedef struct +{ + GQueue *chunks; + guint64 offset; + gboolean byteswap; +} FileBuilder; + +typedef struct +{ + gsize offset; + gsize size; + gpointer data; +} FileChunk; + +static gpointer +file_builder_allocate (FileBuilder *fb, + guint alignment, + gsize size, + struct gvdb_pointer *pointer) +{ + FileChunk *chunk; + + if (size == 0) + return NULL; + + fb->offset += (-fb->offset) & (alignment - 1); + chunk = g_slice_new (FileChunk); + chunk->offset = fb->offset; + chunk->size = size; + chunk->data = g_malloc (size); + + pointer->start = guint32_to_le (fb->offset); + fb->offset += size; + pointer->end = guint32_to_le (fb->offset); + + g_queue_push_tail (fb->chunks, chunk); + + return chunk->data; +} + +static void +file_builder_add_value (FileBuilder *fb, + GVariant *value, + struct gvdb_pointer *pointer) +{ + GVariant *variant, *normal; + gpointer data; + gsize size; + + if (fb->byteswap) + { + value = g_variant_byteswap (value); + variant = g_variant_new_variant (value); + g_variant_unref (value); + } + else + variant = g_variant_new_variant (value); + + normal = g_variant_get_normal_form (variant); + g_variant_unref (variant); + + size = g_variant_get_size (normal); + data = file_builder_allocate (fb, 8, size, pointer); + g_variant_store (normal, data); + g_variant_unref (normal); +} + +static void +file_builder_add_string (FileBuilder *fb, + const gchar *string, + guint32_le *start, + guint16_le *size) +{ + FileChunk *chunk; + gsize length; + + length = strlen (string); + + chunk = g_slice_new (FileChunk); + chunk->offset = fb->offset; + chunk->size = length; + chunk->data = g_malloc (length); + if (length != 0) + memcpy (chunk->data, string, length); + + *start = guint32_to_le (fb->offset); + *size = guint16_to_le (length); + fb->offset += length; + + g_queue_push_tail (fb->chunks, chunk); +} + +static void +file_builder_allocate_for_hash (FileBuilder *fb, + gsize n_buckets, + gsize n_items, + guint bloom_shift, + gsize n_bloom_words, + guint32_le **bloom_filter, + guint32_le **hash_buckets, + struct gvdb_hash_item **hash_items, + struct gvdb_pointer *pointer) +{ + guint32_le bloom_hdr, table_hdr; + guchar *data; + gsize size; + + g_assert (n_bloom_words < (1u << 27)); + + bloom_hdr = guint32_to_le (bloom_shift << 27 | n_bloom_words); + table_hdr = guint32_to_le (n_buckets); + + size = sizeof bloom_hdr + sizeof table_hdr + + n_bloom_words * sizeof (guint32_le) + + n_buckets * sizeof (guint32_le) + + n_items * sizeof (struct gvdb_hash_item); + + data = file_builder_allocate (fb, 4, size, pointer); + +#define chunk(s) (size -= (s), data += (s), data - (s)) + memcpy (chunk (sizeof bloom_hdr), &bloom_hdr, sizeof bloom_hdr); + memcpy (chunk (sizeof table_hdr), &table_hdr, sizeof table_hdr); + *bloom_filter = (guint32_le *) chunk (n_bloom_words * sizeof (guint32_le)); + *hash_buckets = (guint32_le *) chunk (n_buckets * sizeof (guint32_le)); + *hash_items = (struct gvdb_hash_item *) chunk (n_items * + sizeof (struct gvdb_hash_item)); + g_assert (size == 0); +#undef chunk + + memset (*bloom_filter, 0, n_bloom_words * sizeof (guint32_le)); + + /* NOTE - the code to actually fill in the bloom filter here is missing. + * Patches welcome! + * + * http://en.wikipedia.org/wiki/Bloom_filter + * http://0pointer.de/blog/projects/bloom.html + */ +} + +static void +file_builder_add_hash (FileBuilder *fb, + GHashTable *table, + struct gvdb_pointer *pointer) +{ + guint32_le *buckets, *bloom_filter; + struct gvdb_hash_item *items; + HashTable *mytable; + GvdbItem *item; + guint32 index; + gint bucket; + + mytable = hash_table_new (g_hash_table_size (table)); + g_hash_table_foreach (table, hash_table_insert, mytable); + index = 0; + + for (bucket = 0; bucket < mytable->n_buckets; bucket++) + for (item = mytable->buckets[bucket]; item; item = item->next) + item->assigned_index = guint32_to_le (index++); + + file_builder_allocate_for_hash (fb, mytable->n_buckets, index, 5, 0, + &bloom_filter, &buckets, &items, pointer); + + index = 0; + for (bucket = 0; bucket < mytable->n_buckets; bucket++) + { + buckets[bucket] = guint32_to_le (index); + + for (item = mytable->buckets[bucket]; item; item = item->next) + { + struct gvdb_hash_item *entry = items++; + const gchar *basename; + + g_assert (index == guint32_from_le (item->assigned_index)); + entry->hash_value = guint32_to_le (item->hash_value); + entry->parent = item_to_index (item->parent); + entry->unused = 0; + + if (item->parent != NULL) + basename = item->key + strlen (item->parent->key); + else + basename = item->key; + + file_builder_add_string (fb, basename, + &entry->key_start, + &entry->key_size); + + if (item->value != NULL) + { + g_assert (item->child == NULL && item->table == NULL); + + file_builder_add_value (fb, item->value, &entry->value.pointer); + entry->type = 'v'; + } + + if (item->child != NULL) + { + guint32 children = 0, i = 0; + guint32_le *offsets; + GvdbItem *child; + + g_assert (item->table == NULL); + + for (child = item->child; child; child = child->sibling) + children++; + + offsets = file_builder_allocate (fb, 4, 4 * children, + &entry->value.pointer); + entry->type = 'L'; + + for (child = item->child; child; child = child->sibling) + offsets[i++] = child->assigned_index; + + g_assert (children == i); + } + + if (item->table != NULL) + { + entry->type = 'H'; + file_builder_add_hash (fb, item->table, &entry->value.pointer); + } + + index++; + } + } + + hash_table_free (mytable); +} + +static FileBuilder * +file_builder_new (gboolean byteswap) +{ + FileBuilder *builder; + + builder = g_slice_new (FileBuilder); + builder->chunks = g_queue_new (); + builder->offset = sizeof (struct gvdb_header); + builder->byteswap = byteswap; + + return builder; +} + +static GString * +file_builder_serialise (FileBuilder *fb, + struct gvdb_pointer root) +{ + struct gvdb_header header; + GString *result; + + memset (&header, 0, sizeof (struct gvdb_header)); + + if (fb->byteswap) + { + header.signature[0] = GVDB_SWAPPED_SIGNATURE0; + header.signature[1] = GVDB_SWAPPED_SIGNATURE1; + } + else + { + header.signature[0] = GVDB_SIGNATURE0; + header.signature[1] = GVDB_SIGNATURE1; + } + + result = g_string_new (NULL); + + header.root = root; + g_string_append_len (result, (gpointer) &header, sizeof header); + + while (!g_queue_is_empty (fb->chunks)) + { + FileChunk *chunk = g_queue_pop_head (fb->chunks); + + if (result->len != chunk->offset) + { + gchar zero[8] = { 0, }; + + g_assert (chunk->offset > result->len); + g_assert (chunk->offset - result->len < 8); + + g_string_append_len (result, zero, chunk->offset - result->len); + g_assert (result->len == chunk->offset); + } + + g_string_append_len (result, chunk->data, chunk->size); + g_free (chunk->data); + + g_slice_free (FileChunk, chunk); + } + + g_queue_free (fb->chunks); + g_slice_free (FileBuilder, fb); + + return result; +} + +GBytes * +gvdb_table_write_bytes (GHashTable *table, + gboolean byteswap) +{ + struct gvdb_pointer root; + FileBuilder *fb; + GString *str; + GBytes *retval; + + fb = file_builder_new (byteswap); + file_builder_add_hash (fb, table, &root); + str = file_builder_serialise (fb, root); + + retval = g_bytes_new_take (str->str, str->len); + g_string_free (str, FALSE); + + return retval; +} + +gboolean +gvdb_table_write_contents (GHashTable *table, + const gchar *filename, + gboolean byteswap, + GError **error) +{ + struct gvdb_pointer root; + gboolean status; + FileBuilder *fb; + GString *str; + + fb = file_builder_new (byteswap); + file_builder_add_hash (fb, table, &root); + str = file_builder_serialise (fb, root); + + status = g_file_set_contents (filename, str->str, str->len, error); + g_string_free (str, TRUE); + + return status; +} diff --git a/endless/gvdb/gvdb-builder.h b/endless/gvdb/gvdb-builder.h new file mode 100644 index 0000000..3a486a9 --- /dev/null +++ b/endless/gvdb/gvdb-builder.h @@ -0,0 +1,60 @@ +/* + * Copyright © 2010 Codethink Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the licence, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Allison Lortie + */ + +#ifndef __gvdb_builder_h__ +#define __gvdb_builder_h__ + +#include + +typedef struct _GvdbItem GvdbItem; + +G_GNUC_INTERNAL +GHashTable * gvdb_hash_table_new (GHashTable *parent, + const gchar *key); + +G_GNUC_INTERNAL +GvdbItem * gvdb_hash_table_insert (GHashTable *table, + const gchar *key); +G_GNUC_INTERNAL +void gvdb_hash_table_insert_string (GHashTable *table, + const gchar *key, + const gchar *value); + +G_GNUC_INTERNAL +void gvdb_item_set_value (GvdbItem *item, + GVariant *value); +G_GNUC_INTERNAL +void gvdb_item_set_hash_table (GvdbItem *item, + GHashTable *table); +G_GNUC_INTERNAL +void gvdb_item_set_parent (GvdbItem *item, + GvdbItem *parent); + +G_GNUC_INTERNAL +gboolean gvdb_table_write_contents (GHashTable *table, + const gchar *filename, + gboolean byteswap, + GError **error); +G_GNUC_INTERNAL +GBytes * gvdb_table_write_bytes (GHashTable *table, + gboolean byteswap); + +#endif /* __gvdb_builder_h__ */ diff --git a/endless/gvdb/gvdb-format.h b/endless/gvdb/gvdb-format.h new file mode 100644 index 0000000..9efc48d --- /dev/null +++ b/endless/gvdb/gvdb-format.h @@ -0,0 +1,87 @@ +/* + * Copyright © 2010 Codethink Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the licence, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Allison Lortie + */ + +#ifndef __gvdb_format_h__ +#define __gvdb_format_h__ + +#include + +typedef struct { guint16 value; } guint16_le; +typedef struct { guint32 value; } guint32_le; + +struct gvdb_pointer { + guint32_le start; + guint32_le end; +}; + +struct gvdb_hash_header { + guint32_le n_bloom_words; + guint32_le n_buckets; +}; + +struct gvdb_hash_item { + guint32_le hash_value; + guint32_le parent; + + guint32_le key_start; + guint16_le key_size; + gchar type; + gchar unused; + + union + { + struct gvdb_pointer pointer; + gchar direct[8]; + } value; +}; + +struct gvdb_header { + guint32 signature[2]; + guint32_le version; + guint32_le options; + + struct gvdb_pointer root; +}; + +static inline guint32_le guint32_to_le (guint32 value) { + guint32_le result = { GUINT32_TO_LE (value) }; + return result; +} + +static inline guint32 guint32_from_le (guint32_le value) { + return GUINT32_FROM_LE (value.value); +} + +static inline guint16_le guint16_to_le (guint16 value) { + guint16_le result = { GUINT16_TO_LE (value) }; + return result; +} + +static inline guint16 guint16_from_le (guint16_le value) { + return GUINT16_FROM_LE (value.value); +} + +#define GVDB_SIGNATURE0 1918981703 +#define GVDB_SIGNATURE1 1953390953 +#define GVDB_SWAPPED_SIGNATURE0 GUINT32_SWAP_LE_BE (GVDB_SIGNATURE0) +#define GVDB_SWAPPED_SIGNATURE1 GUINT32_SWAP_LE_BE (GVDB_SIGNATURE1) + +#endif /* __gvdb_format_h__ */ diff --git a/endless/gvdb/gvdb-reader.c b/endless/gvdb/gvdb-reader.c new file mode 100644 index 0000000..9fa3310 --- /dev/null +++ b/endless/gvdb/gvdb-reader.c @@ -0,0 +1,720 @@ +/* + * Copyright © 2010 Codethink Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the licence, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Allison Lortie + */ + +#include "gvdb-reader.h" +#include "gvdb-format.h" + +#include + +struct _GvdbTable { + GBytes *bytes; + + const gchar *data; + gsize size; + + gboolean byteswapped; + gboolean trusted; + + const guint32_le *bloom_words; + guint32 n_bloom_words; + guint bloom_shift; + + const guint32_le *hash_buckets; + guint32 n_buckets; + + struct gvdb_hash_item *hash_items; + guint32 n_hash_items; +}; + +static const gchar * +gvdb_table_item_get_key (GvdbTable *file, + const struct gvdb_hash_item *item, + gsize *size) +{ + guint32 start, end; + + start = guint32_from_le (item->key_start); + *size = guint16_from_le (item->key_size); + end = start + *size; + + if G_UNLIKELY (start > end || end > file->size) + return NULL; + + return file->data + start; +} + +static gconstpointer +gvdb_table_dereference (GvdbTable *file, + const struct gvdb_pointer *pointer, + gint alignment, + gsize *size) +{ + guint32 start, end; + + start = guint32_from_le (pointer->start); + end = guint32_from_le (pointer->end); + + if G_UNLIKELY (start > end || end > file->size || start & (alignment - 1)) + return NULL; + + *size = end - start; + + return file->data + start; +} + +static void +gvdb_table_setup_root (GvdbTable *file, + const struct gvdb_pointer *pointer) +{ + const struct gvdb_hash_header *header; + guint32 n_bloom_words; + guint32 n_buckets; + gsize size; + + header = gvdb_table_dereference (file, pointer, 4, &size); + + if G_UNLIKELY (header == NULL || size < sizeof *header) + return; + + size -= sizeof *header; + + n_bloom_words = guint32_from_le (header->n_bloom_words); + n_buckets = guint32_from_le (header->n_buckets); + n_bloom_words &= (1u << 27) - 1; + + if G_UNLIKELY (n_bloom_words * sizeof (guint32_le) > size) + return; + + file->bloom_words = (gpointer) (header + 1); + size -= n_bloom_words * sizeof (guint32_le); + file->n_bloom_words = n_bloom_words; + + if G_UNLIKELY (n_buckets > G_MAXUINT / sizeof (guint32_le) || + n_buckets * sizeof (guint32_le) > size) + return; + + file->hash_buckets = file->bloom_words + file->n_bloom_words; + size -= n_buckets * sizeof (guint32_le); + file->n_buckets = n_buckets; + + if G_UNLIKELY (size % sizeof (struct gvdb_hash_item)) + return; + + file->hash_items = (gpointer) (file->hash_buckets + n_buckets); + file->n_hash_items = size / sizeof (struct gvdb_hash_item); +} + +/** + * gvdb_table_new_from_bytes: + * @bytes: the #GBytes with the data + * @trusted: if the contents of @bytes are trusted + * @error: %NULL, or a pointer to a %NULL #GError + * @returns: a new #GvdbTable + * + * Creates a new #GvdbTable from the contents of @bytes. + * + * This call can fail if the header contained in @bytes is invalid. + * + * You should call gvdb_table_free() on the return result when you no + * longer require it. + **/ +GvdbTable * +gvdb_table_new_from_bytes (GBytes *bytes, + gboolean trusted, + GError **error) +{ + const struct gvdb_header *header; + GvdbTable *file; + + file = g_slice_new0 (GvdbTable); + file->bytes = g_bytes_ref (bytes); + file->data = g_bytes_get_data (bytes, &file->size); + file->trusted = trusted; + + if (file->size < sizeof (struct gvdb_header)) + goto invalid; + + header = (gpointer) file->data; + + if (header->signature[0] == GVDB_SIGNATURE0 && + header->signature[1] == GVDB_SIGNATURE1 && + guint32_from_le (header->version) == 0) + file->byteswapped = FALSE; + + else if (header->signature[0] == GVDB_SWAPPED_SIGNATURE0 && + header->signature[1] == GVDB_SWAPPED_SIGNATURE1 && + guint32_from_le (header->version) == 0) + file->byteswapped = TRUE; + + else + goto invalid; + + gvdb_table_setup_root (file, &header->root); + + return file; + +invalid: + g_set_error_literal (error, G_FILE_ERROR, G_FILE_ERROR_INVAL, "invalid gvdb header"); + + g_bytes_unref (file->bytes); + + g_slice_free (GvdbTable, file); + + return NULL; +} + +/** + * gvdb_table_new: + * @filename: a filename + * @trusted: if the contents of @bytes are trusted + * @error: %NULL, or a pointer to a %NULL #GError + * @returns: a new #GvdbTable + * + * Creates a new #GvdbTable using the #GMappedFile for @filename as the + * #GBytes. + **/ +GvdbTable * +gvdb_table_new (const gchar *filename, + gboolean trusted, + GError **error) +{ + GMappedFile *mapped; + GvdbTable *table; + GBytes *bytes; + + mapped = g_mapped_file_new (filename, FALSE, error); + if (!mapped) + return NULL; + + bytes = g_mapped_file_get_bytes (mapped); + table = gvdb_table_new_from_bytes (bytes, trusted, error); + g_mapped_file_unref (mapped); + g_bytes_unref (bytes); + + g_prefix_error (error, "%s: ", filename); + + return table; +} + +static gboolean +gvdb_table_bloom_filter (GvdbTable *file, + guint32 hash_value) +{ + guint32 word, mask; + + if (file->n_bloom_words == 0) + return TRUE; + + word = (hash_value / 32) % file->n_bloom_words; + mask = 1 << (hash_value & 31); + mask |= 1 << ((hash_value >> file->bloom_shift) & 31); + + return (guint32_from_le (file->bloom_words[word]) & mask) == mask; +} + +static gboolean +gvdb_table_check_name (GvdbTable *file, + struct gvdb_hash_item *item, + const gchar *key, + guint key_length) +{ + const gchar *this_key; + gsize this_size; + guint32 parent; + + this_key = gvdb_table_item_get_key (file, item, &this_size); + + if G_UNLIKELY (this_key == NULL || this_size > key_length) + return FALSE; + + key_length -= this_size; + + if G_UNLIKELY (memcmp (this_key, key + key_length, this_size) != 0) + return FALSE; + + parent = guint32_from_le (item->parent); + if (key_length == 0 && parent == 0xffffffffu) + return TRUE; + + if G_LIKELY (parent < file->n_hash_items && this_size > 0) + return gvdb_table_check_name (file, + &file->hash_items[parent], + key, key_length); + + return FALSE; +} + +static const struct gvdb_hash_item * +gvdb_table_lookup (GvdbTable *file, + const gchar *key, + gchar type) +{ + guint32 hash_value = 5381; + guint key_length; + guint32 bucket; + guint32 lastno; + guint32 itemno; + + if G_UNLIKELY (file->n_buckets == 0 || file->n_hash_items == 0) + return NULL; + + for (key_length = 0; key[key_length]; key_length++) + hash_value = (hash_value * 33) + ((signed char *) key)[key_length]; + + if (!gvdb_table_bloom_filter (file, hash_value)) + return NULL; + + bucket = hash_value % file->n_buckets; + itemno = guint32_from_le (file->hash_buckets[bucket]); + + if (bucket == file->n_buckets - 1 || + (lastno = guint32_from_le(file->hash_buckets[bucket + 1])) > file->n_hash_items) + lastno = file->n_hash_items; + + while G_LIKELY (itemno < lastno) + { + struct gvdb_hash_item *item = &file->hash_items[itemno]; + + if (hash_value == guint32_from_le (item->hash_value)) + if G_LIKELY (gvdb_table_check_name (file, item, key, key_length)) + if G_LIKELY (item->type == type) + return item; + + itemno++; + } + + return NULL; +} + +static gboolean +gvdb_table_list_from_item (GvdbTable *table, + const struct gvdb_hash_item *item, + const guint32_le **list, + guint *length) +{ + gsize size; + + *list = gvdb_table_dereference (table, &item->value.pointer, 4, &size); + + if G_LIKELY (*list == NULL || size % 4) + return FALSE; + + *length = size / 4; + + return TRUE; +} + +/** + * gvdb_table_get_names: + * @table: a #GvdbTable + * @length: the number of items returned, or %NULL + * + * Gets a list of all names contained in @table. + * + * No call to gvdb_table_get_table(), gvdb_table_list() or + * gvdb_table_get_value() will succeed unless it is for one of the + * names returned by this function. + * + * Note that some names that are returned may still fail for all of the + * above calls in the case of the corrupted file. Note also that the + * returned strings may not be utf8. + * + * Returns: a %NULL-terminated list of strings, of length @length + **/ +gchar ** +gvdb_table_get_names (GvdbTable *table, + gint *length) +{ + gchar **names; + guint n_names; + guint filled; + guint total; + guint i; + + /* We generally proceed by iterating over the list of items in the + * hash table (in order of appearance) recording them into an array. + * + * Each item has a parent item (except root items). The parent item + * forms part of the name of the item. We could go fetching the + * parent item chain at the point that we encounter each item but then + * we would need to implement some sort of recursion along with checks + * for self-referential items. + * + * Instead, we do a number of passes. Each pass will build up one + * level of names (starting from the root). We continue to do passes + * until no more items are left. The first pass will only add root + * items and each further pass will only add items whose direct parent + * is an item added in the immediately previous pass. It's also + * possible that items get filled if they follow their parent within a + * particular pass. + * + * At most we will have a number of passes equal to the depth of the + * tree. Self-referential items will never be filled in (since their + * parent will have never been filled in). We continue until we have + * a pass that fills in no additional items. + * + * This takes an O(n) algorithm and turns it into O(n*m) where m is + * the depth of the tree, but in all sane cases the tree won't be very + * deep and the constant factor of this algorithm is lower (and the + * complexity of coding it, as well). + */ + + n_names = table->n_hash_items; + names = g_new0 (gchar *, n_names + 1); + + /* 'names' starts out all-NULL. On each pass we record the number + * of items changed from NULL to non-NULL in 'filled' so we know if we + * should repeat the loop. 'total' counts the total number of items + * filled. If 'total' ends up equal to 'n_names' then we know that + * 'names' has been completely filled. + */ + + total = 0; + do + { + /* Loop until we have filled no more entries */ + filled = 0; + + for (i = 0; i < n_names; i++) + { + const struct gvdb_hash_item *item = &table->hash_items[i]; + const gchar *name; + gsize name_length; + guint32 parent; + + /* already got it on a previous pass */ + if (names[i] != NULL) + continue; + + parent = guint32_from_le (item->parent); + + if (parent == 0xffffffffu) + { + /* it's a root item */ + name = gvdb_table_item_get_key (table, item, &name_length); + + if (name != NULL) + { + names[i] = g_strndup (name, name_length); + filled++; + } + } + + else if (parent < n_names && names[parent] != NULL) + { + /* It's a non-root item whose parent was filled in already. + * + * Calculate the name of this item by combining it with + * its parent name. + */ + name = gvdb_table_item_get_key (table, item, &name_length); + + if (name != NULL) + { + const gchar *parent_name = names[parent]; + gsize parent_length; + gchar *fullname; + + parent_length = strlen (parent_name); + fullname = g_malloc (parent_length + name_length + 1); + memcpy (fullname, parent_name, parent_length); + memcpy (fullname + parent_length, name, name_length); + fullname[parent_length + name_length] = '\0'; + names[i] = fullname; + filled++; + } + } + } + + total += filled; + } + while (filled && total < n_names); + + /* If the table was corrupted then 'names' may have holes in it. + * Collapse those. + */ + if G_UNLIKELY (total != n_names) + { + GPtrArray *fixed_names; + + fixed_names = g_ptr_array_new (); + for (i = 0; i < n_names; i++) + if (names[i] != NULL) + g_ptr_array_add (fixed_names, names[i]); + + g_free (names); + n_names = fixed_names->len; + g_ptr_array_add (fixed_names, NULL); + names = (gchar **) g_ptr_array_free (fixed_names, FALSE); + } + + if (length) + *length = n_names; + + return names; +} + +/** + * gvdb_table_list: + * @file: a #GvdbTable + * @key: a string + * @returns: a %NULL-terminated string array + * + * List all of the keys that appear below @key. The nesting of keys + * within the hash file is defined by the program that created the hash + * file. One thing is constant: each item in the returned array can be + * concatenated to @key to obtain the full name of that key. + * + * It is not possible to tell from this function if a given key is + * itself a path, a value, or another hash table; you are expected to + * know this for yourself. + * + * You should call g_strfreev() on the return result when you no longer + * require it. + **/ +gchar ** +gvdb_table_list (GvdbTable *file, + const gchar *key) +{ + const struct gvdb_hash_item *item; + const guint32_le *list; + gchar **strv; + guint length; + guint i; + + if ((item = gvdb_table_lookup (file, key, 'L')) == NULL) + return NULL; + + if (!gvdb_table_list_from_item (file, item, &list, &length)) + return NULL; + + strv = g_new (gchar *, length + 1); + for (i = 0; i < length; i++) + { + guint32 itemno = guint32_from_le (list[i]); + + if (itemno < file->n_hash_items) + { + const struct gvdb_hash_item *item; + const gchar *string; + gsize strsize; + + item = file->hash_items + itemno; + + string = gvdb_table_item_get_key (file, item, &strsize); + + if (string != NULL) + strv[i] = g_strndup (string, strsize); + else + strv[i] = g_malloc0 (1); + } + else + strv[i] = g_malloc0 (1); + } + + strv[i] = NULL; + + return strv; +} + +/** + * gvdb_table_has_value: + * @file: a #GvdbTable + * @key: a string + * @returns: %TRUE if @key is in the table + * + * Checks for a value named @key in @file. + * + * Note: this function does not consider non-value nodes (other hash + * tables, for example). + **/ +gboolean +gvdb_table_has_value (GvdbTable *file, + const gchar *key) +{ + static const struct gvdb_hash_item *item; + gsize size; + + item = gvdb_table_lookup (file, key, 'v'); + + if (item == NULL) + return FALSE; + + return gvdb_table_dereference (file, &item->value.pointer, 8, &size) != NULL; +} + +static GVariant * +gvdb_table_value_from_item (GvdbTable *table, + const struct gvdb_hash_item *item) +{ + GVariant *variant, *value; + gconstpointer data; + GBytes *bytes; + gsize size; + + data = gvdb_table_dereference (table, &item->value.pointer, 8, &size); + + if G_UNLIKELY (data == NULL) + return NULL; + + bytes = g_bytes_new_from_bytes (table->bytes, ((gchar *) data) - table->data, size); + variant = g_variant_new_from_bytes (G_VARIANT_TYPE_VARIANT, bytes, table->trusted); + value = g_variant_get_variant (variant); + g_variant_unref (variant); + g_bytes_unref (bytes); + + return value; +} + +/** + * gvdb_table_get_value: + * @file: a #GvdbTable + * @key: a string + * @returns: a #GVariant, or %NULL + * + * Looks up a value named @key in @file. + * + * If the value is not found then %NULL is returned. Otherwise, a new + * #GVariant instance is returned. The #GVariant does not depend on the + * continued existence of @file. + * + * You should call g_variant_unref() on the return result when you no + * longer require it. + **/ +GVariant * +gvdb_table_get_value (GvdbTable *file, + const gchar *key) +{ + const struct gvdb_hash_item *item; + GVariant *value; + + if ((item = gvdb_table_lookup (file, key, 'v')) == NULL) + return NULL; + + value = gvdb_table_value_from_item (file, item); + + if (value && file->byteswapped) + { + GVariant *tmp; + + tmp = g_variant_byteswap (value); + g_variant_unref (value); + value = tmp; + } + + return value; +} + +/** + * gvdb_table_get_raw_value: + * @table: a #GvdbTable + * @key: a string + * @returns: a #GVariant, or %NULL + * + * Looks up a value named @key in @file. + * + * This call is equivalent to gvdb_table_get_value() except that it + * never byteswaps the value. + **/ +GVariant * +gvdb_table_get_raw_value (GvdbTable *table, + const gchar *key) +{ + const struct gvdb_hash_item *item; + + if ((item = gvdb_table_lookup (table, key, 'v')) == NULL) + return NULL; + + return gvdb_table_value_from_item (table, item); +} + +/** + * gvdb_table_get_table: + * @file: a #GvdbTable + * @key: a string + * @returns: a new #GvdbTable, or %NULL + * + * Looks up the hash table named @key in @file. + * + * The toplevel hash table in a #GvdbTable can contain reference to + * child hash tables (and those can contain further references...). + * + * If @key is not found in @file then %NULL is returned. Otherwise, a + * new #GvdbTable is returned, referring to the child hashtable as + * contained in the file. This newly-created #GvdbTable does not depend + * on the continued existence of @file. + * + * You should call gvdb_table_free() on the return result when you no + * longer require it. + **/ +GvdbTable * +gvdb_table_get_table (GvdbTable *file, + const gchar *key) +{ + const struct gvdb_hash_item *item; + GvdbTable *new; + + item = gvdb_table_lookup (file, key, 'H'); + + if (item == NULL) + return NULL; + + new = g_slice_new0 (GvdbTable); + new->bytes = g_bytes_ref (file->bytes); + new->byteswapped = file->byteswapped; + new->trusted = file->trusted; + new->data = file->data; + new->size = file->size; + + gvdb_table_setup_root (new, &item->value.pointer); + + return new; +} + +/** + * gvdb_table_free: + * @file: a #GvdbTable + * + * Frees @file. + **/ +void +gvdb_table_free (GvdbTable *file) +{ + g_bytes_unref (file->bytes); + g_slice_free (GvdbTable, file); +} + +/** + * gvdb_table_is_valid: + * @table: a #GvdbTable + * @returns: %TRUE if @table is still valid + * + * Checks if the table is still valid. + * + * An on-disk GVDB can be marked as invalid. This happens when the file + * has been replaced. The appropriate action is typically to reopen the + * file. + **/ +gboolean +gvdb_table_is_valid (GvdbTable *table) +{ + return !!*table->data; +} diff --git a/endless/gvdb/gvdb-reader.h b/endless/gvdb/gvdb-reader.h new file mode 100644 index 0000000..398824a --- /dev/null +++ b/endless/gvdb/gvdb-reader.h @@ -0,0 +1,65 @@ +/* + * Copyright © 2010 Codethink Limited + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the licence, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + * Author: Allison Lortie + */ + +#ifndef __gvdb_reader_h__ +#define __gvdb_reader_h__ + +#include + +typedef struct _GvdbTable GvdbTable; + +G_BEGIN_DECLS + +G_GNUC_INTERNAL +GvdbTable * gvdb_table_new_from_bytes (GBytes *bytes, + gboolean trusted, + GError **error); +G_GNUC_INTERNAL +GvdbTable * gvdb_table_new (const gchar *filename, + gboolean trusted, + GError **error); +G_GNUC_INTERNAL +void gvdb_table_free (GvdbTable *table); +G_GNUC_INTERNAL +gchar ** gvdb_table_get_names (GvdbTable *table, + gint *length); +G_GNUC_INTERNAL +gchar ** gvdb_table_list (GvdbTable *table, + const gchar *key); +G_GNUC_INTERNAL +GvdbTable * gvdb_table_get_table (GvdbTable *table, + const gchar *key); +G_GNUC_INTERNAL +GVariant * gvdb_table_get_raw_value (GvdbTable *table, + const gchar *key); +G_GNUC_INTERNAL +GVariant * gvdb_table_get_value (GvdbTable *table, + const gchar *key); + +G_GNUC_INTERNAL +gboolean gvdb_table_has_value (GvdbTable *table, + const gchar *key); +G_GNUC_INTERNAL +gboolean gvdb_table_is_valid (GvdbTable *table); + +G_END_DECLS + +#endif /* __gvdb_reader_h__ */ -- cgit v1.2.3