 |
C++Talk.NET C++ language newsgroups
|
| View previous topic :: View next topic |
| Author |
Message |
David Turner Guest
|
Posted: Sat Sep 06, 2003 12:38 pm Post subject: Ternary search tree (string_map) implementation; comments? |
|
|
I've implement a ternary search tree after the fashion of the standard
library. I'd like to hear any comments about C++ style, consistency, etc.
The usage style is reminiscent of std::map:
string_map<int> t;
t["Hello"] = 1;
t["World"] = 2;
string_map<int>::const_iterator i = t.find("Hello");
if(i != t.end())
cout << i->key() << " has value " << i->value() << endl;
Ternary search trees are described
http://www.javaworld.com/javaworld/jw-02-2001/jw-0216-ternary.html (or you
could google for a link like I did ).
No implementation yet of partial_find or anything like that; but I'd like to
hear suggestions about features that would be nice, too.
Regards
David Turner
--------------- Code paste begins ------------------
#include
template<typename CharType, typename DataType>
class basic_string_map
{
class Node
{
friend class basic_string_map;
CharType ch;
DataType* data;
Node *l, *r, *n, *p;
Node(const CharType& c, Node* parent):
ch(c), l(0), r(0), n(0), p(parent), data(0)
{
}
~Node()
{
delete l;
delete r;
delete n;
delete data;
}
public:
::std::basic_string<CharType> key() const
{
int count = 1;
const Node *i = p, *j = this;
while(i) {
if(i->n == j)
++ count;
j = i;
i = i->p;
}
::std::basic_string<CharType> result(count, 0);
for(i=this, j = n; i; j=i, i=i->p) {
if(i->n == j)
result[--count] = i->ch;
}
return result;
}
const DataType& value() const
{
return *data;
}
DataType& value()
{
return *data;
}
} *root;
Node* _Insertion(const CharType* key)
{
Node **idx = &root, *parent = 0;
if(key == 0 || *key == 0)
return 0;
while(*idx) {
if(*key < (*idx)->ch)
{
parent = *idx;
idx = &((*idx)->l);
}
else if((*idx)->ch < *key)
{
parent = *idx;
idx = &((*idx)->r);
}
else if(*(++key) == 0)
break;
else
{
parent = *idx;
idx = &((*idx)->n);
}
}
if(*idx == 0)
{
*idx = new Node(*key, parent);
while(*++key != 0)
{
(*idx)->n = new Node(*key, *idx);
idx = &((*idx)->n);
}
}
return *idx;
}
static Node* _Leftmost(Node* n)
{
while(n)
{
if(n->l)
n = n->l;
else if(n->data)
break;
else
n = n->n;
}
return n;
}
static Node* _Next(Node* n)
{
if(n->n)
return _Leftmost(n->n);
else
{
while(n)
{
if(n->p && n->p->l == n)
{
n = n->p;
if(n->data)
return n;
if(n->n)
return _Leftmost(n->n);
if(n->r)
return _Leftmost(n->r);
}
else if(n->p && n->p->n == n)
{
n = n->p;
if(n->r)
return _Leftmost(n->r);
}
else
n = n->p;
}
}
return n;
}
public:
class const_iterator;
class iterator
{
friend class basic_string_map;
friend class const_iterator;
Node* ref;
iterator(Node* n): ref(n) { }
public:
iterator(const iterator& rhs): ref(rhs.ref) { }
Node* operator ->() { return ref; }
const Node* operator ->() const { return ref; }
Node& operator *() { return *ref; }
const Node& operator *() const { return *ref; }
iterator& operator ++()
{
ref = _Next(ref);
return *this;
}
iterator operator ++(int)
{
Node* oldref = ref;
ref = _Next(ref);
return iterator(oldref);
}
bool operator == (const iterator& rhs) const
{
return ref == rhs.ref;
}
bool operator != (const iterator& rhs) const
{
return ref != rhs.ref;
}
};
class const_iterator
{
friend class basic_string_map;
Node* ref;
const_iterator(Node* n): ref(n) { }
public:
const_iterator(const const_iterator& rhs): ref(rhs.ref) { }
const_iterator(const iterator& rhs): ref(rhs.ref) { }
const Node* operator ->() const { return ref; }
const Node& operator *() const { return *ref; }
const_iterator& operator ++()
{
ref = _Next(ref);
return *this;
}
const_iterator operator ++(int)
{
Node* oldref = ref;
ref = _Next(ref);
return iterator(oldref);
}
bool operator == (const const_iterator& rhs) const
{
return ref == rhs.ref;
}
bool operator != (const const_iterator& rhs) const
{
return ref != rhs.ref;
}
};
basic_string_map(): root(0) { }
~basic_string_map()
{
delete root;
}
const_iterator find(const CharType* value) const
{
Node* idx = root;
while(idx) {
if(*value == 0)
idx = 0;
else if(*value < idx->ch)
idx = idx->l;
else if(idx->ch < *value)
idx = idx->r;
else if(*(++value) == 0)
break;
else
idx = idx->n;
}
return idx;
}
iterator find(const CharType* value)
{
Node* idx = root;
while(idx) {
if(*value == 0)
idx = 0;
else if(*value < idx->ch)
idx = idx->l;
else if(idx->ch < *value)
idx = idx->r;
else if(*(++value) == 0)
break;
else
idx = idx->n;
}
return idx;
}
const_iterator find(const ::std::basic_string<CharType>& value)
const
{
return find(value.c_str());
}
iterator find(const ::std::basic_string<CharType>& value)
{
return find(value.c_str());
}
iterator insert(const CharType* key, const DataType& data)
{
Node* idx = _Insertion(key);
delete idx->data;
idx->data = new DataType(data);
return idx;
}
iterator insert(const ::std::basic_string<CharType>& key,
const DataType& data)
{
return insert(key.c_str(), data);
}
DataType& operator [](const CharType* key)
{
Node* idx = _Insertion(key);
if(idx->data == 0)
idx->data = new DataType();
return *idx->data;
}
iterator begin()
{
if(root == 0)
return 0;
return _Leftmost(root);
}
iterator end()
{
return 0;
}
};
template<typename Data>
class string_map: public basic_string_map<char, Data>
{
};
template<typename Data>
class wstring_map: public basic_string_map<wchar_t, Data>
{
};
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
Siemel Naran Guest
|
Posted: Thu Sep 11, 2003 10:29 am Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
"David Turner" <david (AT) firepro (DOT) co.za> wrote
This is a nice data structure. I'm sure there exists a concept of a
balanced ternary tree. Anyway, when is the regular std::map a better choice
than your ternary_tree?
| Quote: | template<typename CharType, typename DataType
class basic_string_map
|
Fine, but consider changing this to
template
Less=std::less
class basic_ternary_tree
| Quote: | ::std::basic_string<CharType> key() const
{
int count = 1;
const Node *i = p, *j = this;
while(i) {
if(i->n == j)
++ count;
j = i;
i = i->p;
}
::std::basic_string<CharType> result(count, 0);
for(i=this, j = n; i; j=i, i=i->p) {
if(i->n == j)
result[--count] = i->ch;
}
return result;
}
|
What if we want a std::vector<CharType>, std::list, or something else? In
functions like copy we accomplish this by passing in an output iterator as a
function argument, and the iterator may be a
front_insert_iterator<std::basic_string.
template <class OutputIter>
OutputIter key(OutputIter out) const
{
for(Node * i=this, j = n; i; j=i, i=i->p) {
if (i->n == j) {
*out = i->ch;
++out;
}
}
return out;
}
Only problem is that std::basic_string and std::vector don't have
push_front.
| Quote: | const DataType& value() const
{
return *data;
}
|
From the article it appears data could be NULL. You could document that one
should not dereference a node with null data. Or to be safe
const DataType& value() const {
if (!data) throw null_value();
return *data;
}
| Quote: | Node* _Insertion(const CharType* key)
{
Node **idx = &root, *parent = 0;
if(key == 0 || *key == 0)
return 0;
while(*idx) {
if(*key < (*idx)->ch)
{
parent = *idx;
idx = &((*idx)->l);
}
else if((*idx)->ch < *key)
{
parent = *idx;
idx = &((*idx)->r);
}
else if(*(++key) == 0)
break;
else
{
parent = *idx;
idx = &((*idx)->n);
}
}
if(*idx == 0)
{
*idx = new Node(*key, parent);
while(*++key != 0)
{
(*idx)->n = new Node(*key, *idx);
idx = &((*idx)->n);
}
}
return *idx;
}
|
This is good, but to make it more general change the signature to
template <class InputIter>
Node* _Insertion(InputIter begin, const InputIter end) {
The one argument _Insertion is still useful because we'll often use raw char
arrays as in your example usage. We can implement it as follows:
Node* _Insertion(const CharType* key) {
return _Insertion(key, key+std::char_traits<CharType>::length(key));
}
The call to traits::length will likely not result in poor performance, but
if it does, then keep your original function -- but try to factor the common
sections of the two functions into one.
| Quote: | static Node* _Next(Node* n)
|
Very good. Is it possible to write _Prev too?
| Quote: | class iterator
{
friend class basic_string_map;
friend class const_iterator;
Node* ref;
iterator(Node* n): ref(n) { }
public:
iterator(const iterator& rhs): ref(rhs.ref) { }
|
Isn't the compiler generated copy constructor the same as what you have?
Also make your private constructor explicit. Most importantly, don't forget
to define the 5 typedefs (or nested classes) for iterators otherwise you
can't use them in many common algorithms:
iterator_category (eg. std::bidirectional_iterator_tag if you are able to
define operator--)
value_type
reference
pointer
difference_type (I use void sometimes, and it seems appropriate here, and so
far it hasn't given me trouble).
| Quote: | Node* operator ->() { return ref; }
const Node* operator ->() const { return ref; }
Node& operator *() { return *ref; }
const Node& operator *() const { return *ref; }
|
Class Node is a private class. Does it make sense to return a pointer or
reference to it? How about we return a DataType* rather than Node*. Also
probably supply a member function key() that returns ref->key(). Also there
should be just two functions:
pointer operator ->() const { return ref->data; }
reference operator *() const { pointer out = ref->data; if (!out)
throw null_value(); return *out; }
| Quote: | basic_string_map(): root(0) { }
~basic_string_map()
|
Good. But it appears the copy constructor and operator= are missing. Oops.
| Quote: | const_iterator find(const CharType* value) const
iterator find(const CharType* value)
|
I think it's standard to implement one function in terms of the other using
const_cast. Say you implement the second non-const function. Then
const_iterator find(const CharType* value) const {
return
const_iterator(const_cast<basic_string_map*>(this)->find(value));
}
| Quote: | iterator insert(const CharType* key, const DataType& data)
{
Node* idx = _Insertion(key);
delete idx->data;
idx->data = new DataType(data);
return idx;
}
|
As the new DataType(data) may throw, it's good to set idx->data = NULL after
deleting it in order to make your code extremely exception safe. Or try
this,
iterator insert(const CharType* key, const DataType& data)
{
Node* idx = _Insertion(key);
DataType * & treedata = idx->data;
if (treedata) {
*treedata = data;
}
else {
treedata = new DataType(data);
}
return iterator(idx);
}
| Quote: | DataType& operator [](const CharType* key)
|
Good. I wish std::map had the const function too, as it would make the
class much easier to use. Well, you can add it to yours
const DataType& operator [](const CharType* key) const;
// may throw nested class not_found
| Quote: | template
class string_map: public basic_string_map
{
};
template
class wstring_map: public basic_string_map
{
};
|
Fine, though this is where template typedefs (an addition to the language)
would be a good idea.
--
+++++++++++
Siemel Naran
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
David Turner Guest
|
Posted: Fri Sep 12, 2003 10:30 am Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
Hi Siemel
I think you missed the point slightly. A TST acts like a map FROM a string
TO another structure. As to balancing them, yes, this is possible, but the
opinion of most of the articles I have read is that it's not worthwhile.
For example, string_map<int> is [almost] semantically equivalent to
map<string, int>.
| Quote: | template<typename CharType, typename DataType
class basic_string_map
Fine, but consider changing this to
template
Less=std::less
|
Surely you mean std::less
| Quote: | ::std::basic_string<CharType> key() const
What if we want a std::vector<CharType>, std::list, or something else? In
|
Interesting. Note that key() returns the (immutable) key value of the node
the iterator points to, which by definition is a string. It seemed
reasonably to return this as a basic_string<CharType>.
| Quote: | const DataType& value() const
{
return *data;
}
From the article it appears data could be NULL. You could document that
one
should not dereference a node with null data. Or to be safe
|
I don't think so, since the iterators never stop at a node where data == 0.
The data member serves two purposes: it marks nodes which are intermediate
by the fact that it is null, and it stores a pointer to the data value when
the node is the end of a string. For example:
a* r o l d*
/
H e l l o*
This represents a TST containing three strings, Ha, Harold and Hello. The *
indicates nodes where the data member is not null; these are the ends of the
strings in the tree.
| Quote: | Node* _Insertion(const CharType* key)
This is good, but to make it more general change the signature to
template <class InputIter
Node* _Insertion(InputIter begin, const InputIter end) {
|
Good idea . No need for the const CharType* key specialization as you go
on to suggest, though.
| Quote: | static Node* _Next(Node* n)
Very good. Is it possible to write _Prev too?
|
I had considered it. Why not? std::map supplies bidirectional iterators,
so string_map should too.
| Quote: | class iterator
Isn't the compiler generated copy constructor the same as what you have?
Also make your private constructor explicit. Most importantly, don't
forget
to define the 5 typedefs (or nested classes) for iterators otherwise you
can't use them in many common algorithms:
iterator_category (eg. std::bidirectional_iterator_tag if you are able to
define operator--)
value_type
reference
pointer
difference_type (I use void sometimes, and it seems appropriate here, and
so
far it hasn't given me trouble).
|
True, true, and true Thanks for pointing those out.
| Quote: | Node* operator ->() { return ref; }
const Node* operator ->() const { return ref; }
Node& operator *() { return *ref; }
const Node& operator *() const { return *ref; }
Class Node is a private class. Does it make sense to return a pointer or
reference to it? How about we return a DataType* rather than Node*. Also
|
Does it make sense to return Node*? I think it does. Compare with
std::map:
map<string, int>::iterator i = m.find("abc");
assert(i->first == "abc" && typeid(i->second) = typeid(int));
string_map<int>::iterator i = sm.find("abc");
assert(i->key() == "abc" && typeid(i->value()) == typeid(int));
| Quote: | basic_string_map(): root(0) { }
~basic_string_map()
Good. But it appears the copy constructor and operator= are missing.
Oops. |
Oops.
| Quote: | const_iterator find(const CharType* value) const
iterator find(const CharType* value)
I think it's standard to implement one function in terms of the other
using
const_cast. Say you implement the second non-const function. Then
|
Okay.
| Quote: | As the new DataType(data) may throw, it's good to set idx->data = NULL
after
deleting it in order to make your code extremely exception safe. Or try
this,
|
Well spotted.
| Quote: | iterator insert(const CharType* key, const DataType& data)
{
Node* idx = _Insertion(key);
DataType * & treedata = idx->data;
if (treedata) {
*treedata = data;
|
I have a problem with this line. Most of the STL structures don't use
operator = if they can avoid it. I'd rather use placement new here, as in
new (treedata) DataType(data);. Is that a good idea?
| Quote: | I wish std::map had the const function too, as it would make the
class much easier to use. Well, you can add it to yours
|
Debatable. Do you really want a find operation that throws an exception?
| Quote: | template
class string_map: public basic_string_map
{
};
Fine, though this is where template typedefs (an addition to the language)
would be a good idea.
|
Tell me about it....
Regards
David Turner
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
Siemel Naran Guest
|
Posted: Sat Sep 13, 2003 8:22 am Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
"David Turner"
| Quote: | Siemel Naran
This is a nice data structure. I'm sure there exists a concept of a
balanced ternary tree. Anyway, when is the regular std::map a better
choice than your ternary_tree?
I think you missed the point slightly. A TST acts like a map FROM a string
TO another structure. As to balancing them, yes, this is possible, but the
opinion of most of the articles I have read is that it's not worthwhile.
For example, string_map<int> is [almost] semantically equivalent to
map<string, int>.
|
So when is string_map<int> a better choice than map<string, int>.
| Quote: | template<typename CharType, typename DataType, typename
Less=std::less
Surely you mean std::less
|
Yes.
| Quote: | ::std::basic_string<CharType> key() const
What if we want a std::vector<CharType>, std::list, or something else? In
Interesting. Note that key() returns the (immutable) key value of the node
the iterator points to, which by definition is a string. It seemed
reasonably to return this as a basic_string<CharType>.
Node* _Insertion(const CharType* key)
This is good, but to make it more general change the signature to
template <class InputIter
Node* _Insertion(InputIter begin, const InputIter end) {
Good idea . No need for the const CharType* key specialization as you go
on to suggest, though.
|
It doesn't make sense to make one function general but not the other.
Either make both general, or leave both as is.
| Quote: | Class Node is a private class. Does it make sense to return a pointer or
reference to it? How about we return a DataType* rather than Node*. Also
Does it make sense to return Node*? I think it does. Compare with
std::map:
map
assert(i->first == "abc" && typeid(i->second) = typeid(int));
string_map<int>::iterator i = sm.find("abc");
assert(i->key() == "abc" && typeid(i->value()) == typeid(int));
|
True, but since class basic_ternary_tree::Node is a private class (see
your original class definition) then the statement i->key() or
i->value() should fail compile because you can't access class Node.
| Quote: | iterator insert(const CharType* key, const DataType& data)
{
Node* idx = _Insertion(key);
DataType * & treedata = idx->data;
if (treedata) {
*treedata = data;
I have a problem with this line. Most of the STL structures don't use
operator = if they can avoid it. I'd rather use placement new here, as in
new (treedata) DataType(data);. Is that a good idea?
|
The std containers require the type to have an operator=, so I guess
library implementors are free to use it. In your function you either
insert a data element (if the key does not exist) or replace it (if it
does). In functions like vector::push_back they're always inserting a
new row.
| Quote: | I wish std::map had the const function too, as it would make the
class much easier to use. Well, you can add it to yours
Debatable. Do you really want a find operation that throws an exception?
|
Yes.
| Quote: | Fine, though this is where template typedefs (an addition to the language)
would be a good idea.
Tell me about it....
|
The proposal is
template <class Data>
typedef basic_ternary_map<char, Data> string_ternary_map;
string_ternary_map<int> myMap;
myMap["hello"] = 3;
I don't know if anyone has actually requested this though.
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
David Turner Guest
|
Posted: Mon Sep 15, 2003 10:01 am Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
Hi
| Quote: | For example, string_map<int> is [almost] semantically equivalent to
map<string, int>.
So when is string_map<int> a better choice than map<string, int>.
|
Always, I would imagine... string_map<int> performs far fewer operations to
find a given entry than map<string, int>. Google for "ternary search tree".
Regards
David Turner
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
Allan Odgaard Guest
|
Posted: Wed Sep 17, 2003 7:49 pm Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
David Turner:
| Quote: | I'm sure there exists a concept of a balanced ternary tree. [...]
[...] As to balancing them, yes, this is possible, but the
opinion of most of the articles I have read is that it's not worthwhile.
|
Worst case for an unbalanced TST is O(m x n), where m is the alphabet
size and n is the key length. If we balance the tree, we reduce it to
O(log(m) x n),
If we assume that m is 60 (a-z, A-Z, 0-9) then that would mean a
speedup of about 10 for the balanced versus a worst-case unbalanced
(which is likely to appear, as much data comes from a sorted source).
If the class is intended for others to use, then I think it would be
worthwhile to obtain this speedup.
Btw: why do you "hardcode" this class to strings? Let it be templated
on the value type instead and work with iterator ranges.
I would further suggest to add a "find prefix of range" which return
the longest key in the tree which is a prefix of the range passed in.
That way I can insert a dozen strings as keys and let their value be a
function pointer. This gives me a very cheap parser, which can be used
something like this:
while(first != last)
{
value_type const& r = t.find_prefix_of(first, last);
if(r != t.end())
first = (*r.second)(first, last);
else ++first;
}
Here (first, last) is the sequence (of text) to be parsed, value_type
is the value_type of the TST, which I expect to be a std::pair (key,
value), where the value (second in the pair) is a function pointer --
so basically it will find all the keywords in the sequence and call
the appropriate function to handle each of them.
This is IMHO a great advantage of ternary search trees, as the above
is not possible with standard binary search trees, since we do not
know the length of the key before we have actually found it in the
tree.
It also removes the need to actually check the length of the range.
E.g. if we meet "align=", we expect to find "left", "right" or
"center", but we cannot do a std::equal() with the three strings,
because we would need to ensure that the range is actually long enough
to hold the string, and when working with non-random-access iterators,
such a check is expensive.
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
Siemel Naran Guest
|
Posted: Fri Sep 19, 2003 11:07 am Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
"Allan Odgaard" <Duff (AT) DIKU (DOT) DK> wrote in message
| Quote: | Worst case for an unbalanced TST is O(m x n), where m is the alphabet
size and n is the key length. If we balance the tree, we reduce it to
O(log(m) x n),
|
Is this the time to find a key? How do you get O(m * n)?
| Quote: | If we assume that m is 60 (a-z, A-Z, 0-9) then that would mean a
speedup of about 10 for the balanced versus a worst-case unbalanced
(which is likely to appear, as much data comes from a sorted source).
|
The ratio of 10 does not follow. It is true that (m*n)/(ln(m)*n) = 14 for
m=60. But the actual time of the algorithm may be something like
4*ln(m)*n+d. With m=60 and d=0 for simplicity, the ratio is 3.6, all thanks
to the 4*. So we have to do actual experiments to determine the running
time.
| Quote: | I would further suggest to add a "find prefix of range" which return
the longest key in the tree which is a prefix of the range passed in.
|
Which reminds me. The original article mentioned the function matchAlmost,
which to me seemed to be one of the most useful features of ternary_tree.
Yet the OP's class did not have this function.
| Quote: | That way I can insert a dozen strings as keys and let their value be a
function pointer. This gives me a very cheap parser, which can be used
something like this:
while(first != last)
{
value_type const& r = t.find_prefix_of(first, last);
if(r != t.end())
first = (*r.second)(first, last);
else ++first;
}
Here (first, last) is the sequence (of text) to be parsed, value_type
is the value_type of the TST, which I expect to be a std::pair (key,
value), where the value (second in the pair) is a function pointer --
so basically it will find all the keywords in the sequence and call
the appropriate function to handle each of them.
This is IMHO a great advantage of ternary search trees, as the above
is not possible with standard binary search trees, since we do not
know the length of the key before we have actually found it in the
tree.
It also removes the need to actually check the length of the range.
E.g. if we meet "align=", we expect to find "left", "right" or
"center", but we cannot do a std::equal() with the three strings,
because we would need to ensure that the range is actually long enough
to hold the string, and when working with non-random-access iterators,
such a check is expensive.
|
This is also a great idea.
--
+++++++++++
Siemel Naran
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
Allan Odgaard Guest
|
Posted: Sat Sep 20, 2003 3:07 pm Post subject: Re: Ternary search tree (string_map) implementation; comment |
|
|
Siemel Naran:
| Quote: | Worst case for an unbalanced TST is O(m x n), where m is the alphabet
size and n is the key length. If we balance the tree, we reduce it to
O(log(m) x n),
Is this the time to find a key? How do you get O(m * n)?
|
Yes -- for each character in the key (of which we have n), we need to
find the node which holds that same character, and as there are m
different characters, we would need to compare the (possibly) m nodes
n times, yielding O(m x n),
Example:
Alphabet: {a, b, c, d, e, f }, thus m = 6.
Tree contains "aaa", "aab", ..., "ffe", "fff" (216 nodes).
We wish to find "fff".
We take the first character from our key ("f") and the first character
from the root node ("a"), a compare reveals that our character is
higher, thus we must advance left child, where we find "b", again we
must advance to the left and find "c", ... and after m times, we find
"f". Now we can read the next character from our key (another "f") and
advance down the middle of our "f"-node, where we find "a", and again
we need to advance to the left m times, before we can take the middle
branch -- we must do this for every character in the key.
So all in all we do m x n character comparisons, giving 6 x 3 = 18
actual comparisons.
Had the tree been balanced, we would at most do lg(m) comparisons pr.
character, i.e. "lg(m) x n" which amounts to: ~7.8 comparisons.
The general speedup here is m / lg(m) => 6 / lg(6) => 2.32... and
18/7.8 is 2.3.
So with an alphabet size of 6, you would double the performance (at
best).
| Quote: | If we assume that m is 60 (a-z, A-Z, 0-9) then that would mean a
speedup of about 10 for the balanced versus a worst-case unbalanced
The ratio of 10 does not follow.
|
I obtained this with the formula above, i.e. we do lg(m) comparisons
pr. character in the key, rather than m. So pr. character (in the key)
the speedup is "m / lg(m)", which here is 60 / lg(60) = 10.2...
As an example, imagine a key size of 10, then using the unbalanced
tree we would at most do 60 x 10 = 600 comparisons, where in contrast,
lg(60) x 10 is only 59.06... so, ten times as few.
| Quote: | But the actual time of the algorithm may be something like
4*ln(m)*n+d.
|
What is this? Are the "times 4" there to hint that searching the
balanced tree has 4 times as much overhead?
Although I was talking about the theoretical time complexity, not the
practical running time, then the code which searches the tree is
exactly the same, it is the layout of the tree which changes. So there
are no hidden costs in searching a balanced versus unbalanced tree.
| Quote: | So we have to do actual experiments to determine the running time.
|
Which might depend on the architecture, skewed input data, the
implementation, caches and similar.
My "10 times faster" was definitely to be taken with a grain of salt
-- but important to notice that the speedup is a function of the
alphabet size, not the key length, nor the number of nodes in the
tree.
So if the authors that the OP referred to did test the balanced
implementation by adding a lot of strings to the tree, they would find
that the balanced didn't scale any better than the unbalanced, nor
should the key length matter in this.
But had they instead turned to e.g. simplified Chinese for the keys,
then they would most likely have seen *much* worse performance for the
unbalanced (with the same number of strings), where the balanced would
be roughly the same.
This is why I said I'd opt for making it balanced if it were to go in
a library, since you can never know what it will be used for -- it is
a great tool to manage a symbol tables, and many languages today can
be written in unicode, so the alphabet is not necessarily [a-zA-Z0-9].
| Quote: | That way I can insert a dozen strings as keys and let their value be a
function pointer. [...]
This is also a great idea.
|
Thanks, I refined it slightly by adding a templated constructor which
takes an array of key/value pairs for easy filling of the tree, e.g.:
template <typename T, int N>
scanner (T(&values)[N])
{
for(int i = 0; i < N; ++i)
insert(values[i].Key, values[i].Value);
}
That way we can declare our keywords like this:
struct { char const* Key; align_t Value; } AlignmentKeywords[] =
{
{ "left", align_left },
{ "right", align_right },
{ "center", align_center },
{ "justified", align_justified },
};
...
static const scanner
Meaning we wouldn't need to insert the keys manually. Unfortunately
the key/value declaration cannot be in the functionn scope due to
template rules.
[ See http://www.gotw.ca/resources/clcm.htm for info about ]
[ comp.lang.c++.moderated. First time posters: Do this! ]
|
|
| Back to top |
|
 |
|
|
You cannot post new topics in this forum You cannot reply to topics in this forum You cannot edit your posts in this forum You cannot delete your posts in this forum You cannot vote in polls in this forum
|
|