Mnesia Schema Maintenance
Table of Contents
Introduction
Any database driven application faces the problem of initialization and maintenance of the data store in conjunction with the rest of the application, and Mnesia driven applications are no exception. Consequently the following type of inquiry is periodically directed to the list
Hi all,
I have a question about how folks bootstrap an OTP application that
depends on mnesia. Consider an OTP application foo that uses mnesia.
Since mnesia is specified in the needed applications list, mnesia must
be started before you start foo.
For the first run of foo, one needs to call mnesia:create_schema/1
_before_ mnesia is started.
I'd like to be able to start foo on a clean system and have the schema
created if needed, but don't see a way to handle this. I suspect that
I need to adjust my expectations :-)
Is there a recommended way to handle mnesia initialization? Is the
standard practice to create the schema outside of the application
start up flow or am I missing a way to handle this as part of foo's
application initialization?
Thanks,
+ seth
Digging into the thread there are actually two questions here:
- How to initialize and maintain the (distributed) schema.
- How to initialize and maintain application specific tables.
The first question was answered in another article Mnesia Distributed Hello World, so I'll focus on the second question.
Basics: Making Sure Tables are There
My favorite solution is to have a gen_server in the application supervision tree which owns the schema, and I use this strategy for erlanganswers.com. The init callback for the gen_server owning the schema for this site is
init ([]) ->
ensure_schema (),
{ ok, #statev4 { backup = none } }.
All the heavy lifting is in the ensure_schema/0 function. Note: if you adapt this code for your own purposes, you will find that unless you are using mnesiaex the calls mentioning external_copies will fail, since vanilla mnesia lacks the concept.
ensure_schema () ->
case fast_change_table_copy_type (schema, node (), disc_copies) of
{ atomic, ok } -> ok;
{ aborted, { already_exists, schema, _, disc_copies } } -> ok
end,
ensure_table (mcedoc,
[ { attributes, record_info (fields, mcedoc) },
{ type, { external, ordered_set, tcbdbtab } },
{ external_copies, [ node () ] },
{ user_properties, [
{ deflate, true },
{ async_write, true },
{ bucket_array_size, 101 },
{ leaf_node_cache, 1 },
{ nonleaf_node_cache, 1 },
{ leaf_members, 256 },
{ non_leaf_members, 512 }
] }
]),
maybe_transform_mcedoc (),
ensure_table (mcedoctime,
[ { attributes, record_info (fields, mcedoctime) },
{ type, { external, ordered_set, tcbdbtab } },
{ external_copies, [ node () ] },
{ user_properties, [
{ deflate, true },
{ async_write, true },
{ bucket_array_size, 101 },
{ leaf_node_cache, 1 },
{ nonleaf_node_cache, 1 },
{ leaf_members, 256 },
{ non_leaf_members, 512 }
] }
]),
ensure_table (mcecomment,
[ { attributes, record_info (fields, mcecomment) },
{ type, { external, ordered_set, tcbdbtab } },
{ external_copies, [ node () ] },
{ user_properties, [
{ deflate, true },
{ async_write, true },
{ bucket_array_size, 101 },
{ leaf_node_cache, 1 },
{ nonleaf_node_cache, 1 },
{ leaf_members, 256 },
{ non_leaf_members, 512 }
] }
]),
ok.
ensure_table (TableName, TabDef) ->
Result = case fast_create_table (TableName, TabDef) of
{ atomic, ok } -> ok;
{ aborted, { already_exists, _ } } -> ok;
{ aborted, { combine_error, _, _ } } -> ok % wtf: why not already_exists?
end,
ok = mnesia:wait_for_tables ([ TableName ], infinity),
ok = mnesia:wait_for_tables ([ frag_table_name (TableName, N)
|| N <- lists:seq (1, num_frags (TableName)),
N > 1 ],
infinity),
Result.
fast_change_table_copy_type (TableName, Node, CopyType) ->
try { lists:member (Node, used_nodes (TableName)),
lists:member (Node, used_nodes (TableName, CopyType)) } of
{ true, true } ->
{ aborted, { already_exists, TableName, Node, CopyType } };
{ true, false } ->
mnesia:change_table_copy_type (TableName, Node, CopyType);
{ false, _ } ->
{ aborted, { no_exists, TableName, Node } }
catch
_ : _ ->
{ aborted, { no_exists, TableName } }
end.
fast_create_table (TableName, TabDef) ->
try mnesia:table_info (TableName, type),
{ aborted, { already_exists, TableName } }
catch
_ : _ ->
mnesia:create_table (TableName, TabDef)
end.
frag_table_name (TableName, 1) -> TableName;
frag_table_name (TableName, FragNum) when FragNum > 1 ->
list_to_atom (atom_to_list (TableName) ++
"_frag" ++
integer_to_list (FragNum)).
maybe_transform_mcedoc () ->
case mnesia:table_info (mcedoc, attributes) =:=
record_info (fields, mcedoc) of
true ->
ok;
false ->
transform_mcedoc ()
end.
num_frags (TableName) ->
FragProps = mnesia:table_info (TableName, frag_properties),
case lists:keysearch (n_fragments, 1, FragProps) of
{ value, { n_fragments, NFrags } } -> NFrags;
_ -> 1
end.
transform_mcedoc () ->
{ atomic, ok } =
mnesia:transform_table (mcedoc,
fun ({ mcedoc, PathTime, Contents }) ->
#mcedoc { path_time = PathTime,
contents = Contents,
draft = false };
({ mcedoc, PathTime, Contents, Draft }) ->
#mcedoc { path_time = PathTime,
contents = Contents,
draft = Draft }
end,
record_info (fields, mcedoc),
mcedoc).
used_nodes (TableName) ->
lists:usort (used_nodes (TableName, ram_copies) ++
used_nodes (TableName, disc_copies) ++
used_nodes (TableName, external_copies) ++
used_nodes (TableName, disc_only_copies)).
used_nodes (TableName, CopyType) ->
mnesia:table_info (TableName, CopyType).
Let's go through it step-by-step.
- First I ensure that the local schema is disc-based by essentially calling mnesia:change_table_copy_type/3. fast_change_table_copy_type/3 is a local function that I wrote for schemafinder to prevent unnecessary calls to mnesia:change_table_copy_type/3. Here it is overkill, but in a large distributed configuration under load the latency associated with acquiring a schema transaction can be sizeable so using "dirty schema reads" (mnesia:table_info/2, mnesia:system_info/1) can save time.
- Next via ensure_table/2 I ensure that the mcedoc table exists, and otherwise create it with the indicated parameters. (These are tcerl tables which leverage mnesiaex, you should modify them to be ets or dets tables if you are using standard mnesia.) The ensure_table/2 function creates the table if it does not already exist, and then calls mnesia:wait_for_tables/1 to guarantee the table (and any associated fragments) are ready to be used. Note that ensure_table/2 is idempotent.
- Ignore the maybe_transform_mcedoc/0 call for now, we'll cover it in the next section.
- The mcedoctime and mcecomment tables are similarly treated via ensure_table/2.
Since this is called in the init callback of a gen_server which is in the application hierarchy of the application providing the API to the document store for the site, we are assured that the application is not started until the database is ready. In addition, we can list this gen_server as the first child in the childspec for the application, to ensure that no other processes associated with the document store attempt to access the schema prematurely.
If you do time-consuming initialization of the database during the init/1 callback (e.g., loading in a large starting dataset), you should increase the timeout passed to gen_server:start_link/3,4 appropriately (or, alternatively, do not specify a timeout).
Advanced: Schema Migrations
Rarely is the initial schema associated with the first version of the application sufficient as the application evolves (unless an essentially schema-free approach is taken). One of the nice features of Mnesia is the ability to migrate the schema while the database is live, via mnesia:transform_table/4. Since we have delegated ownership of our schema to a gen_server, we can migrate the schema as part of the code upgrade process.
In particular the relevant section of code for erlanganswers.com is
-record (state, {}).
-record (statev2, { backup }).
-record (statev3, { backup }).
-record (statev4, { backup }).
code_change (_OldVsn, #state{}, _Extra) ->
register (?MODULE, self ()),
ensure_schema (),
{ ok, #statev4 { backup = none } };
code_change (_OldVsn, #statev2{ backup = Backup }, _Extra) ->
ensure_schema (),
{ ok, #statev4 { backup = Backup } };
code_change (_OldVsn, #statev3{ backup = Backup }, _Extra) ->
ensure_schema (),
{ ok, #statev4 { backup = Backup } };
code_change (_OldVsn, State, _Extra) ->
{ ok, State }.
This is the code change handler for the gen_server. There have been 3 hot upgrades of this server since it was launched, and by convention I represent these as different records.
- #state{}: This was the original version of the server.
- #statev2{}: For this upgrade, the mcedoc table was modified. In addition I forgot to register the server (I used gen_server:start_link/3 instead of gen_server:start_link/4) which was frustrating implementation of the backup system. Therefore I register the server in the code change handler. Newly started instances of the server will be registered so this is not required for subsequent code changes.
- #statev3{}: Another change to the mcedoc table was introduced.
- #statev4{}: The mcecomments table was introduced as part of the launch of the comments system.
In all cases I write ensure_schema/0 to take any version of the database and bring it all the way up to date. In most cases this is the easiest way to reason about the system, but sometimes migration must take a certain path through database versions (this is more likely on a commercial project where only certain database state transitions have been tested and quality assurance is paramount).
We have already met much of the ensure_schema/0 function, with the exception of the maybe_transform_mcedoc/0 function. Let's analyze that now.
maybe_transform_mcedoc () ->
case mnesia:table_info (mcedoc, attributes) =:=
record_info (fields, mcedoc) of
true ->
ok;
false ->
transform_mcedoc ()
end.
transform_mcedoc () ->
{ atomic, ok } =
mnesia:transform_table (mcedoc,
fun ({ mcedoc, PathTime, Contents }) ->
#mcedoc { path_time = PathTime,
contents = Contents,
draft = false };
({ mcedoc, PathTime, Contents, Draft }) ->
#mcedoc { path_time = PathTime,
contents = Contents,
draft = Draft }
end,
record_info (fields, mcedoc),
mcedoc).
The decision to transform is driven by whether the record fields associated with the table match the current definition of the record in the software. If there is a mismatch, mnesia:transform_table/4 is invoked. Importantly, we cannot use the record syntax to access an old version of the record, as this will lead to a run-time error. Instead we use the correspondence between records and tuples and access the old versions of the record as tuples. In this case, there was originally a definition of the mcedoc record, #mcedoc{path_time, contents=""}, which was then augmented with a draft column, #mcedoc{path_time, contents="", draft=true}, when I introduced the ability to have work-in-progress articles. This was further augmented with a comments_ok column, #mcedoc{path_time, contents="", draft=true, comments_ok=true}, when I added the comments system. The transform function can map any version of the record that has ever been used into the latest version.
Since the default timeout on a code change handler is 5 seconds, you should use one of the high level release instructions in your appup file to increase the timeout when doing nontrivial amounts of work in a code change handler.
Share this articleComments
Post a Comment
About Me
My name is Paul Mineiro. I'm an avid user of Erlang and an avid reader of the Erlang Questions mailing list. I am available for consulting work. I use purple alot on this site because it is my daughter's favorite color.




