1 /** Interval Tree backed by Implicit Augmented Interval Tree, by Heng Li
2 
3     See: https://github.com/lh3/cgranges/
4 
5     Wrapper copyright: Copyright 2019 James S Blachly, MD
6     Wrapper license: MIT
7 */
8 module intervaltree.iitree;
9 
10 import core.stdc.stdlib;    // malloc
11 import core.stdc.string;    // memcpy
12 import core.stdc.stdint;
13 
14 import core.memory;
15 
16 import std.bitmanip;
17 import std.string : toStringz;
18 import std.traits;
19 
20 debug import std.stdio;
21 
22 import intervaltree.cgranges;
23 version(instrument) public import intervaltree.cgranges: _iitree_visited, _iitree_visited_size, _iitree_visited_capacity;
24 
25 /** Implicit Interval Tree */
26 struct IITree(IntervalType)
27 if (__traits(hasMember, IntervalType, "start") &&
28     __traits(hasMember, IntervalType, "end"))
29 {
30     cgranges_t* cr;     /// encapsulated range
31     debug bool indexed; /// ensure the IITree is indexed before query
32 
33     invariant
34     {
35         assert(this.cr !is null);
36     }
37     ~this()
38     {
39         if (this.cr !is null)
40             cr_destroy(this.cr);
41     }
42     @disable this(this);    /// if reenable, must add refcounting and check to destructor
43 
44     alias add = insert; // prior API 
45 
46     /// Insert interval for contig
47     ///
48     /// Note this differs from the other trees in the package in inclusion of "contig" parameter,
49     /// because underlying cgranges has built-in hashmap and essentially stores multiple trees.
50     /// Passing a \0-terminated C string contig will be faster than passing a D string or char[],
51     /// due to the need to call toStringz before calling the C API.
52     ///
53     /// last param "label" of cr_add not used by cgranges as of 2019-05-04
54     ///
55     /// Params:
56     ///     S = string-like contig name (if absent will pass "default" to cr_add)
57     ///     i = IntervalType or IntervalType*
58     ///     trackGC = (Default true) add i to the GC scanned ranges. see discussion
59     ///     GCptr   = (Default true) when i is IntervalType*, is the pointer to GC-mgd memory?
60     ///
61     /// Non-payload variant: params contig, start, end
62     ///
63     /// Discussion:
64     ///     This container structure may store an IntervalType in one of two ways.
65     ///     First, you can pass a pointer which will be stored directly without additional
66     ///     memory allocations. Note however that if this pointer is to garbage collected
67     ///     memory and no other reference exists in the D code, it could be swept away.
68     ///     Even if it is not to GC memory, if it encapsulates a pointer (e.g., a string )
69     ///     you can still be bitten. Thus trackGC defaults to true. If unneeded, you might
70     ///     obtain a marginal speedup by setting trackGC = false.
71     ///
72     ///     The interval to be contained may also be passed by value. In this case,
73     ///     memory will be allocated with malloc and the pointer will be stored. In this case
74     ///     the same caveats regarding contained GC-pointing objects like D-arrays and strings
75     ///     still apply. For example, passing a struct by value may not necessitate tracking
76     ///     the memory in the GC if it contains only value types, but if the struct contains
77     ///     a D-style array or string, it must be tracked
78     ///
79     ///     Finally, there is a variant missing the first contig parameter. Unlike other trees,
80     ///     cgranges requires a string contig parameter, so a default value of "default"
81     ///     will be supplied.
82     cr_intv_t* insert(S)(S contig, IntervalType* i, bool trackGC = true, bool GCptr = true)
83     if(isSomeString!S || is(S: const(char)*))
84     in ( i !is null )
85     {
86         if (GCptr)  // i is ptr to GC-managed memory
87             GC.addRoot(i);
88         else        // i is ptr to non-GC heap mem but may contain ptr to GC-mem
89             if (trackGC) GC.addRange(i, IntervalType.sizeof, typeid(IntervalType));
90 
91         static if (isSomeString!S)
92             return cr_add(this.cr, toStringz(contig), i.start, i.end, 0, i);
93         else
94             return cr_add(this.cr, contig, i.start, i.end, 0, i);
95     }
96     /// ditto
97     cr_intv_t* insert(S)(S contig, IntervalType i, bool trackGC = true)
98     if(isSomeString!S || is(S: const(char)*))
99     {
100         // TODO: this is a direct leak reported by LSAN (was indirect until i added free(cr->r) in cgranges.c)
101         IntervalType* iheap = cast(IntervalType *) malloc(IntervalType.sizeof);
102         memcpy(iheap, &i, IntervalType.sizeof);
103         if (trackGC) GC.addRange(iheap, IntervalType.sizeof, typeid(IntervalType));
104         
105         static if (isSomeString!S)
106             return cr_add(this.cr, toStringz(contig), i.start, i.end, 0, iheap);
107         else
108             return cr_add(this.cr, contig, i.start, i.end, 0, iheap);
109     }
110     /// ditto
111     cr_intv_t* insert(I)(I i, bool trackGC = true)
112     {
113         pragma(inline,true);
114         immutable(char) *contig = "default";
115         return insert(contig, i, trackGC);
116     }
117     /// ditto
118     cr_intv_t* insert(const(char)* contig, int start, int end)
119     {
120         return cr_add(this.cr, contig, start, end, 0, null);
121     }
122 
123     /// Index the data structure -- required after all inserts completed, before query
124     @nogc nothrow
125     void index()
126     {
127         cr_index(this.cr);
128         debug { this.indexed = true; }
129     }
130 
131     /// Locate and return intervals overlapping parameter qinterval in contig
132     ///
133     /// qinterval must have members "start" and "end" (just like IntervalType
134     /// stored in the tree, but the types needn't be the same).
135     ///
136     /// Note that because the cgranges IITree stores contig this must be included,
137     /// unlike the other interval tree implementations.
138     ///
139     /// findOverlapsWith may also be called with \0-terminated contig, integer start, end
140     auto findOverlapsWith(T)(const(char)[] contig, T qinterval)
141     if (__traits(hasMember, T, "start") &&
142     __traits(hasMember, T, "end"))
143     {
144         version(DigitalMars) pragma(inline);
145         version(GNU) pragma(inline, true);
146         version(LDC) pragma(inline, true);
147         return findOverlapsWith(toStringz(contig), qinterval.start, qinterval.end);
148     }
149     /// ditto
150     const(cr_intv_t)[] findOverlapsWith(const(char)* contig, int start, int end)
151     {
152         debug
153         {
154             if (!this.indexed) {
155                 stderr.writeln("WARNING cgranges: query before index!");
156                 this.index();
157             }
158         }
159 
160         // static to prevent cr_overlap from calling malloc/realloc every time
161         // TODO should move to struct level then free(b) in destructor
162         static int64_t *b;
163         static int64_t m_b;
164         const auto n_b = cr_overlap(this.cr, contig, start, end, &b, &m_b);
165         if (!n_b) return [];
166 
167         if (n_b == 1)
168             return this.cr.r[b[0] .. (b[0] + 1)];
169         else {
170             // can't do this.cr.r[b[0] .. (b[0] + n_b)] because elements of b[] may not be contiguous throughout r
171             // PROOF:
172             // consider intervals (3,10) (4, 6) (5, 12) (6, 20) (7,15) which lie in r[] sorted by start coord
173             // coordinate 7 overlaps first and last
174             /+ WORKS +/
175             cr_intv_t[] ret;
176             ret.length = n_b;
177             for(int i; i<n_b; i++)
178             {
179                 ret[i] = this.cr.r[b[i]];
180             }
181             return ret;
182         }
183         //const(cr_intv_t)[] ret = this.cr.r[b[0] .. (b[0] + n_b)];
184         //return ret;
185     }
186 }