From eec07955cca26b3ed26e00751b4249b7b2c0d847 Mon Sep 17 00:00:00 2001 From: Tessa Nordgren Date: Wed, 25 Mar 2015 09:53:31 -0700 Subject: [PATCH] First release. --- LICENSE | 165 +++++++++++++++++++++++++++++++++++++ README.md | 47 +++++++++++ ntree.go | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++ ntree_test.go | 195 ++++++++++++++++++++++++++++++++++++++++++++ point.go | 8 ++ search.go | 59 ++++++++++++++ 6 files changed, 695 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 ntree.go create mode 100644 ntree_test.go create mode 100644 point.go create mode 100644 search.go diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9befc03 --- /dev/null +++ b/README.md @@ -0,0 +1,47 @@ +# ntree - an n-dimensional tree structure in Go. + +This library is an abstraction of the design behind +[Quadtrees](https://en.wikipedia.org/wiki/Quadtree) and +[Octrees](https://en.wikipedia.org/wiki/Octree), useful data structures in 2d +and 3d graphics. Those cases are n=2 and n=3 specific cases for this library, +which should handle any case up to 63 dimensions. + +Note that each additional dimension added makes the tree structure exponentially +slower, so cases over about 16 dimensions are not recommended. + +## Install + +This library is fully installable via go get: + + go get -u -v github.com/nergdron/ntree + +## Tests and Benchmarks + +There are useful unit tests and benchmarks included. Run them with the +standard go test command: + + go test -bench=.* + +You can also verify thread safety by running the same tests with more +active CPUs: + + go test -bench=.* -cpu 4 + +You should find that Add() becomes slightly slower per op with more concurrency +due to lock contention, but Search() becomes linearly faster due to parallelism. + +## License + +NTree is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +kdtree is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with kdtree. If not, see +[http://www.gnu.org/licenses/](http://www.gnu.org/licenses/). diff --git a/ntree.go b/ntree.go new file mode 100644 index 0000000..3f7126e --- /dev/null +++ b/ntree.go @@ -0,0 +1,221 @@ +// NTrees are an N-dimensional subdividing spacial representation. +// Common dimension-specific types are the 2-dimensional (quadtree) and +// 3-dimensional (octree) variants. This library supports an arbitrary number of +// dimensions, implemented in the same manner as those specific cases. + +package ntree + +import ( + "errors" + "strconv" + "sync" + + "github.com/cznic/mathutil" +) + +// Maximum number of dimensions handled by this lib. Currently restricted by +// number of usable bits in Go's int type. +const MaxN = 63 + +// NTree is a bounding box in N-dimensional space, along with optional data +// and children. +type NTree struct { + // The bounding n-dimensional box for this ntree. It should always + // be true that origin[i] += bounds[i] contains p.coords[i]. + center, bounds []float64 + // Optional piece of data to associate with this node. Location may be + // imprecise on leaf nodes. + p *Point + // Slice for child storage, should be 2^n if initialized. + children []*NTree + // Used to coordinate write operations, concurrency. + mutex sync.RWMutex + // keep track of child point counts under each node, useful for + // histograms, density predictions, etc. + count uint64 +} + +// New creates an ntree root node, using N dimensional slices for +// the center coordinates and relative bounds of the tree space. +// Bounds slice values must be positive, as they define +// a range of center[i] +- bounds[i] for each dimension. +// +// Returns an error if center and bounds don't have the same cardinality, +// or a bounds dimension is <= 0. +func New(center, bounds []float64) (nt *NTree, err error) { + if len(center) > MaxN { + return nil, errors.New("64 bit ints limit this library to <= 63 dimensions") + } + if len(center) != len(bounds) { + return nil, errors.New("center and bounds have mismatched lengths") + } + if len(center) == 0 { + return nil, errors.New("Can't have 0-dimensional ntree") + } + for i := range bounds { + if bounds[i] <= 0 { + return nil, errors.New("Dimension " + strconv.FormatInt(int64(i), 10) + + " has bounding size <= 0.") + } + } + nt = new(NTree) + nt.center = center + nt.bounds = bounds + return nt, nil +} + +// N returns the number of dimensions (N) for this NTree. +func (nt *NTree) N() int { + return len(nt.center) +} + +// Center returns the center coordinates for this NTree node. +func (nt *NTree) Center() []float64 { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + return nt.center +} + +// Bounds returns the positive bounding dimensions from center for this NTree +// node. This node covers the entire space of Center() +- Bounds(). +func (nt *NTree) Bounds() []float64 { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + return nt.bounds +} + +// BoundPoints returns the min and max points for this NTree node. +// This is a shortcut instead of doing the Center() +- Bounds() math manually. +func (nt *NTree) BoundPoints() (min, max []float64) { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + min = make([]float64, len(nt.center)) + max = make([]float64, len(nt.center)) + for i := range nt.center { + min[i] = nt.center[i] - nt.bounds[i] + max[i] = nt.center[i] + nt.bounds[i] + } + return min, max +} + +// Point returns the optional data chunk associated with this NTree nodes. +// This will return null if there's no Point on this node, which should happen +// on any non-leaf node. +func (nt *NTree) Point() *Point { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + return nt.p +} + +// Count returns this node's estimate of how many Points lie within it. +func (nt *NTree) Count() uint64 { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + return nt.count +} + +// Contains checks if point p is within the bounds of the ntree. +// Returns an error if len(p) != nt.N(). +func (nt *NTree) Contains(p *Point) (bool, error) { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + if p == nil { + return false, errors.New("Point is nil") + } + if len(p.Coords) != nt.N() { + return false, errors.New("Point is " + strconv.FormatUint(uint64(len(p.Coords)), 10) + + " dimensional, NTree is " + strconv.FormatUint(uint64(nt.N()), 10)) + } + // fmt.Println(p) + for i := range p.Coords { + if (nt.center[i]-nt.bounds[i]) > p.Coords[i] || (nt.center[i]+nt.bounds[i]) < p.Coords[i] { + return false, nil + } + } + return true, nil +} + +// Bitwise operations on array indices are used to keep track of what subset of +// space each child occupies, as described here: +// http://www.brandonpelfrey.com/blog/coding-a-simple-octree/ +func hasBit(n int, pos uint) bool { + val := n & (1 << pos) + return (val > 0) +} + +func setBit(n int, pos uint) int { + n |= (1 << pos) + return n +} + +// Add inserts a new Point into the NTree. Returns an error on any failure, +// or nil. +func (nt *NTree) Add(p *Point) error { + in, err := nt.Contains(p) + if err != nil { + return err + } + if !in { + return errors.New("Point doesn't fall within bounds of NTree.") + } + nt.mutex.Lock() + if nt.p == nil && nt.children == nil { + defer nt.mutex.Unlock() + // simplest case, add to current node + nt.p = p + nt.count++ + return nil + } + if nt.children != nil { + defer nt.mutex.Unlock() + // recurse into children by generating child bounding bitmask + var target int + for j := range nt.center { + if p.Coords[j] > nt.center[j] { + target = setBit(target, uint(j)) + } + } + err = nt.children[target].Add(p) + if err == nil { + nt.count++ + } + return err + } + if nt.p != nil && nt.children == nil { + // create children, re-add current node's Point data, then add Point p. + size := mathutil.ModPowUint64(2, uint64(nt.N()), mathutil.MaxInt) + nt.children = make([]*NTree, size) + // create new child nodes with correct bounds + for i := range nt.children { + // determine child dimensions + center := make([]float64, nt.N()) + bounds := make([]float64, nt.N()) + for j := range center { + // use bitmask of child index to determine dimension range for child. + // positive bit means positive range, otherwise negative range. + if hasBit(i, uint(j)) { + bounds[j] = nt.bounds[j] / 2.0 + center[j] = nt.center[j] + bounds[j] + } else { + bounds[j] = nt.bounds[j] / 2.0 + center[j] = nt.center[j] - bounds[j] + } + } + if nt.children[i], err = New(center, bounds); err != nil { + return err + } + } + // remove current Point data and re-add so it cascades into child nodes. + // need to bounce the mutex for this, prossible race condition? + curP := nt.p + nt.p = nil + nt.count-- + nt.mutex.Unlock() + if err = nt.Add(curP); err != nil { + return err + } + // now add new Point + return nt.Add(p) + } + return nil +} diff --git a/ntree_test.go b/ntree_test.go new file mode 100644 index 0000000..62969f8 --- /dev/null +++ b/ntree_test.go @@ -0,0 +1,195 @@ +package ntree + +import ( + "math/rand" + "runtime" + "sync" + "testing" + "time" +) + +var wg sync.WaitGroup + +func init() { + // Use different values every test to try and catch edge cases. + rand.Seed(time.Now().Unix()) +} + +func TestInit(t *testing.T) { + // test all available dimensional combinations + for n := 1; n <= MaxN; n++ { + t.Log("Testing", n, "dimension(s).") + center := make([]float64, n) + bounds := make([]float64, n) + for i := range center { + center[i] = 0 + bounds[i] = 1 + } + nt, err := New(center, bounds) + if err != nil { + t.Fatal(err) + } + if nt.N() != n { + t.Fatal(n, "dimensional tree returned n=", nt.N()) + } + } +} + +func TestAdd(t *testing.T) { + count := 500 + max := runtime.GOMAXPROCS(-1) + // > 16 dimensions quickly becomes impractical for memory and processing reasons. + for n := 1; n <= 16; n++ { + t.Log("Testing", n, "dimension(s).") + center := make([]float64, n) + bounds := make([]float64, n) + p1 := make([]float64, n) + for i := range center { + center[i] = 0.0 + bounds[i] = 1.0 + p1[i] = -1.0 + } + nt, err := New(center, bounds) + if err != nil { + t.Fatal(err, n) + } + wg.Add(max) + for i := 0; i < max; i++ { + go func() { + for j := 0; j < count/max; j++ { + p := new(Point) + p.Coords = make([]float64, n) + for j := range p.Coords { + p.Coords[j] = (rand.Float64() * 2.0) - 1.0 + } + if err = nt.Add(p); err != nil { + t.Error(err, n, i) + } + } + wg.Done() + }() + } + count = (count / max) * max + + wg.Wait() + points, err := nt.Search(p1, bounds) + if err != nil { + t.Fatal(err) + } + if len(points) != count { + t.Fatal("Search returned", len(points), "points instead of", count) + } + if nt.Count() != uint64(count) { + t.Error("Tree estimated incorrect, sees", nt.Count(), "points instead of", count) + } + } +} + +func benchAdd(b *testing.B, n int) { + b.StopTimer() + max := runtime.GOMAXPROCS(-1) + center := make([]float64, n) + bounds := make([]float64, n) + for i := range center { + center[i] = 0 + bounds[i] = 1 + } + nt, err := New(center, bounds) + if err != nil { + b.Fatal(err) + } + wg.Add(max) + b.StartTimer() + for i := 0; i < max; i++ { + go func() { + for j := 0; j < b.N/max; j++ { + p := new(Point) + p.Coords = make([]float64, n) + for j := range p.Coords { + p.Coords[j] = (rand.Float64() * 2.0) - 1.0 + } + if err = nt.Add(p); err != nil { + b.Error(err) + } + } + wg.Done() + }() + } + wg.Wait() +} + +func benchSearch(b *testing.B, n int) { + b.StopTimer() + max := runtime.GOMAXPROCS(-1) + center := make([]float64, n) + bounds := make([]float64, n) + for i := range center { + center[i] = 0 + bounds[i] = 1 + } + nt, err := New(center, bounds) + if err != nil { + b.Fatal(err) + } + // 10k points gives a reasonable search space. + for i := 0; i < 10000; i++ { + p := new(Point) + p.Coords = make([]float64, n) + for j := range p.Coords { + p.Coords[j] = (rand.Float64() * 2.0) - 1.0 + } + if err = nt.Add(p); err != nil { + b.Fatal(err) + } + } + var swap float64 + var count uint64 + wg.Add(max) + + b.StartTimer() + for i := 0; i < max; i++ { + go func() { + p1 := make([]float64, n) + p2 := make([]float64, n) + for j := 0; j < (b.N / max); j++ { + // generate random search area + for j := range p1 { + p1[j] = (rand.Float64() * 2.0) - 1.0 + p2[j] = (rand.Float64() * 2.0) - 1.0 + if p1[j] > p2[j] { + swap = p1[j] + p1[j] = p2[j] + p2[j] = swap + } + } + // find points within it! + points, err := nt.Search(p1, p2) + if err != nil { + b.Fatal(err) + } + count += uint64(len(points)) + } + wg.Done() + }() + } + wg.Wait() + if count == 0 && b.N > 100 { + b.Log("Failed to find any points after", b.N, "searches.") + } +} + +func BenchmarkQuadtreeAdd(b *testing.B) { + benchAdd(b, 2) +} + +func BenchmarkQuadtreeSearch(b *testing.B) { + benchSearch(b, 2) +} + +func BenchmarkOctreeAdd(b *testing.B) { + benchAdd(b, 3) +} + +func BenchmarkOcttreeSearch(b *testing.B) { + benchSearch(b, 3) +} diff --git a/point.go b/point.go new file mode 100644 index 0000000..806427f --- /dev/null +++ b/point.go @@ -0,0 +1,8 @@ +package ntree + +// Point information to be stored in an NTree leaf node. +type Point struct { + Coords []float64 + // Arbitrary data attached to this Point. + Data *interface{} +} diff --git a/search.go b/search.go new file mode 100644 index 0000000..a2e0c38 --- /dev/null +++ b/search.go @@ -0,0 +1,59 @@ +package ntree + +import "errors" + +// Iter runs function f on every node in the tree. +func (nt *NTree) Iter(f func(n *NTree)) { + nt.mutex.Lock() + defer nt.mutex.Unlock() + f(nt) + if nt.children != nil { + for i := range nt.children { + f(nt.children[i]) + } + } +} + +// Search finds all Points falling within the bounding box between p1 and p2. +// Note that it is assumed for every dimension i, p1[i] <= p2[i]. +// This is an inclusive search, so Points whose coordinates are equal to +// the supplied bounds in a given dimension will match. +// +// Returns nil, error if the length of p1, p2 don't match +// nt.N(). +func (nt *NTree) Search(p1, p2 []float64) (points []*Point, err error) { + nt.mutex.RLock() + defer nt.mutex.RUnlock() + if len(p1) != len(p2) || len(p2) != nt.N() { + return nil, errors.New("Bounding points have different dimensions than tree.") + } + if nt.children == nil && nt.p != nil { + // check local Point for leaf node + for i := range nt.p.Coords { + if nt.p.Coords[i] < p1[i] || nt.p.Coords[i] > p2[i] { + return nil, nil + } + } + return []*Point{nt.p}, nil + } + if nt.children != nil { + // check children for matching bounds, and collect matching points from them. + points = make([]*Point, 0) + for _, child := range nt.children { + for i := range p1 { + s1 := child.center[i] - child.bounds[i] + s2 := child.center[i] + child.bounds[i] + // skip this child if outside this dimension's bounds + if (s1 < p1[i] && s2 < p1[i]) || (s1 > p2[i] && s2 > p2[i]) { + continue + } + } + p, err := child.Search(p1, p2) + if err != nil { + return nil, err + } + points = append(points, p...) + } + } + return points, nil +}