1 /** Interval Tree backed by Implicit Augmented Interval Tree, by Heng Li
2 
3     See: https://github.com/lh3/cgranges/
4 
5     Wrapper copyright: Copyright 2019 James S Blachly, MD
6     Wrapper license: MIT
7 */
8 module intervaltree.iitree;
9 
10 import core.stdc.stdlib;    // malloc
11 import core.stdc.string;    // memcpy
12 import core.stdc.stdint;
13 
14 import core.memory;
15 
16 import std.bitmanip;
17 import std.string : toStringz;
18 import std.traits;
19 
20 debug import std.stdio;
21 
22 import intervaltree.cgranges;
23 version(instrument) public import intervaltree.cgranges: _iitree_visited, _iitree_visited_size, _iitree_visited_capacity;
24 
25 /** Implicit Interval Tree */
26 struct IITree(IntervalType)
27 if (__traits(hasMember, IntervalType, "start") &&
28     __traits(hasMember, IntervalType, "end"))
29 {
30     cgranges_t* cr;     /// encapsulated range
31     debug bool indexed; /// ensure the IITree is indexed before query
32 
33     invariant
34     {
35         assert(this.cr !is null);
36     }
37     ~this()
38     {
39         if (this.cr !is null)
40             cr_destroy(this.cr);
41     }
42     @disable this(this);    /// if reenable, must add refcounting and check to destructor
43 
44     alias add = insert; // prior API 
45 
46     /// Insert interval for contig
47     ///
48     /// Note this differs from the other trees in the package in inclusion of "contig" parameter,
49     /// because underlying cgranges has built-in hashmap and essentially stores multiple trees.
50     /// Passing a \0-terminated C string contig will be faster than passing a D string or char[],
51     /// due to the need to call toStringz before calling the C API.
52     ///
53     /// last param "label" of cr_add not used by cgranges as of 2019-05-04
54     ///
55     /// Params:
56     ///     S = string-like contig name (if absent will pass "default" to cr_add)
57     ///     i = IntervalType or IntervalType*
58     ///     trackGC = (Default true) add i to the GC scanned ranges. see discussion
59     ///     GCptr   = (Default true) when i is IntervalType*, is the pointer to GC-mgd memory?
60     ///
61     /// Non-payload variant: params contig, start, end
62     ///
63     /// Discussion:
64     ///     This container structure may store an IntervalType in one of two ways.
65     ///     First, you can pass a pointer which will be stored directly without additional
66     ///     memory allocations. Note however that if this pointer is to garbage collected
67     ///     memory and no other reference exists in the D code, it could be swept away.
68     ///     Even if it is not to GC memory, if it encapsulates a pointer (e.g., a string )
69     ///     you can still be bitten. Thus trackGC defaults to true. If unneeded, you might
70     ///     obtain a marginal speedup by setting trackGC = false.
71     ///
72     ///     The interval to be contained may also be passed by value. In this case,
73     ///     memory will be allocated with malloc and the pointer will be stored. In this case
74     ///     the same caveats regarding contained GC-pointing objects like D-arrays and strings
75     ///     still apply. For example, passing a struct by value may not necessitate tracking
76     ///     the memory in the GC if it contains only value types, but if the struct contains
77     ///     a D-style array or string, it must be tracked
78     ///
79     ///     Finally, there is a variant missing the first contig parameter. Unlike other trees,
80     ///     cgranges requires a string contig parameter, so a default value of "default"
81     ///     will be supplied.
82     cr_intv_t* insert(S)(S contig, IntervalType* i, bool trackGC = true, bool GCptr = true)
83     if(isSomeString!S || is(S: const(char)*))
84     in ( i !is null )
85     {
86         if (GCptr)  // i is ptr to GC-managed memory
87             GC.addRoot(i);
88         else        // i is ptr to non-GC heap mem but may contain ptr to GC-mem
89             if (trackGC) GC.addRange(i, IntervalType.sizeof, typeid(IntervalType));
90 
91         static if (isSomeString!S)
92             return cr_add(this.cr, toStringz(contig), cast(int) i.start, cast(int) i.end, 0, i);
93         else
94             return cr_add(this.cr, contig, cast(int) i.start, cast(int) i.end, 0, i);
95     }
96     /// ditto
97     cr_intv_t* insert(S)(S contig, IntervalType i, bool trackGC = true)
98     if(isSomeString!S || is(S: const(char)*))
99     {
100         // TODO: this is a direct leak reported by LSAN (was indirect until i added free(cr->r) in cgranges.c)
101         IntervalType* iheap = cast(IntervalType *) malloc(IntervalType.sizeof);
102         memcpy(iheap, &i, IntervalType.sizeof);
103         if (trackGC) GC.addRange(iheap, IntervalType.sizeof, typeid(IntervalType));
104         
105         static if (isSomeString!S)
106             return cr_add(this.cr, toStringz(contig), cast(int) i.start, cast(int) i.end, 0, iheap);
107         else
108             return cr_add(this.cr, contig, cast(int) i.start, cast(int) i.end, 0, iheap);
109     }
110     /// ditto
111     cr_intv_t* insert(I)(I i, bool trackGC = true)
112     {
113         pragma(inline,true);
114         immutable(char) *contig = "default";
115         return insert(contig, i, trackGC);
116     }
117     /// ditto
118     cr_intv_t* insert(T)(const(char)* contig, T start, T end)
119     if (is(T == int) || is(T == uint) || is(T == long) || is(T == ulong))
120     {
121         return cr_add(this.cr, cast(int) contig, cast(int) start, end, 0, null);
122     }
123 
124     /// Index the data structure -- required after all inserts completed, before query
125     @nogc nothrow
126     void index()
127     {
128         cr_index(this.cr);
129         debug { this.indexed = true; }
130     }
131 
132     /// Locate and return intervals overlapping parameter qinterval in contig
133     ///
134     /// qinterval must have members "start" and "end" (just like IntervalType
135     /// stored in the tree, but the types needn't be the same).
136     ///
137     /// Note that because the cgranges IITree stores contig this must be included,
138     /// unlike the other interval tree implementations.
139     ///
140     /// findOverlapsWith may also be called with \0-terminated contig, integer start, end
141     auto findOverlapsWith(T)(const(char)[] contig, T qinterval)
142     if (__traits(hasMember, T, "start") &&
143     __traits(hasMember, T, "end"))
144     {
145         version(DigitalMars) pragma(inline);
146         version(GNU) pragma(inline, true);
147         version(LDC) pragma(inline, true);
148         return findOverlapsWith(toStringz(contig), qinterval.start, qinterval.end);
149     }
150     /// ditto
151     const(cr_intv_t)[] findOverlapsWith(T)(const(char)* contig, T start, T end)
152     if (is(T == int) || is(T == uint) || is(T == long) || is(T == ulong))
153     {
154         debug
155         {
156             if (!this.indexed) {
157                 stderr.writeln("WARNING cgranges: query before index!");
158                 this.index();
159             }
160         }
161 
162         // static to prevent cr_overlap from calling malloc/realloc every time
163         // TODO should move to struct level then free(b) in destructor
164         static int64_t *b;
165         static int64_t m_b;
166         const auto n_b = cr_overlap(this.cr, contig, cast(int) start, cast(int) end, &b, &m_b);
167         if (!n_b) return [];
168 
169         if (n_b == 1)
170             return this.cr.r[b[0] .. (b[0] + 1)];
171         else {
172             // can't do this.cr.r[b[0] .. (b[0] + n_b)] because elements of b[] may not be contiguous throughout r
173             // PROOF:
174             // consider intervals (3,10) (4, 6) (5, 12) (6, 20) (7,15) which lie in r[] sorted by start coord
175             // coordinate 7 overlaps first and last
176             /+ WORKS +/
177             cr_intv_t[] ret;
178             ret.length = n_b;
179             for(int i; i<n_b; i++)
180             {
181                 ret[i] = this.cr.r[b[i]];
182             }
183             return ret;
184         }
185         //const(cr_intv_t)[] ret = this.cr.r[b[0] .. (b[0] + n_b)];
186         //return ret;
187     }
188 }