1 /** Interval Tree backed by Implicit Augmented Interval Tree, by Heng Li 2 3 See: https://github.com/lh3/cgranges/ 4 5 Wrapper copyright: Copyright 2019 James S Blachly, MD 6 Wrapper license: MIT 7 */ 8 module intervaltree.iitree; 9 10 import core.stdc.stdlib; // malloc 11 import core.stdc.string; // memcpy 12 import core.stdc.stdint; 13 14 import core.memory; 15 16 import std.bitmanip; 17 import std.string : toStringz; 18 import std.traits; 19 20 debug import std.stdio; 21 22 import intervaltree.cgranges; 23 version(instrument) public import intervaltree.cgranges: _iitree_visited, _iitree_visited_size, _iitree_visited_capacity; 24 25 /** Implicit Interval Tree */ 26 struct IITree(IntervalType) 27 if (__traits(hasMember, IntervalType, "start") && 28 __traits(hasMember, IntervalType, "end")) 29 { 30 cgranges_t* cr; /// encapsulated range 31 debug bool indexed; /// ensure the IITree is indexed before query 32 33 invariant 34 { 35 assert(this.cr !is null); 36 } 37 ~this() 38 { 39 if (this.cr !is null) 40 cr_destroy(this.cr); 41 } 42 @disable this(this); /// if reenable, must add refcounting and check to destructor 43 44 alias add = insert; // prior API 45 46 /// Insert interval for contig 47 /// 48 /// Note this differs from the other trees in the package in inclusion of "contig" parameter, 49 /// because underlying cgranges has built-in hashmap and essentially stores multiple trees. 50 /// Passing a \0-terminated C string contig will be faster than passing a D string or char[], 51 /// due to the need to call toStringz before calling the C API. 52 /// 53 /// last param "label" of cr_add not used by cgranges as of 2019-05-04 54 /// 55 /// Params: 56 /// S = string-like contig name (if absent will pass "default" to cr_add) 57 /// i = IntervalType or IntervalType* 58 /// trackGC = (Default true) add i to the GC scanned ranges. see discussion 59 /// GCptr = (Default true) when i is IntervalType*, is the pointer to GC-mgd memory? 60 /// 61 /// Non-payload variant: params contig, start, end 62 /// 63 /// Discussion: 64 /// This container structure may store an IntervalType in one of two ways. 65 /// First, you can pass a pointer which will be stored directly without additional 66 /// memory allocations. Note however that if this pointer is to garbage collected 67 /// memory and no other reference exists in the D code, it could be swept away. 68 /// Even if it is not to GC memory, if it encapsulates a pointer (e.g., a string ) 69 /// you can still be bitten. Thus trackGC defaults to true. If unneeded, you might 70 /// obtain a marginal speedup by setting trackGC = false. 71 /// 72 /// The interval to be contained may also be passed by value. In this case, 73 /// memory will be allocated with malloc and the pointer will be stored. In this case 74 /// the same caveats regarding contained GC-pointing objects like D-arrays and strings 75 /// still apply. For example, passing a struct by value may not necessitate tracking 76 /// the memory in the GC if it contains only value types, but if the struct contains 77 /// a D-style array or string, it must be tracked 78 /// 79 /// Finally, there is a variant missing the first contig parameter. Unlike other trees, 80 /// cgranges requires a string contig parameter, so a default value of "default" 81 /// will be supplied. 82 cr_intv_t* insert(S)(S contig, IntervalType* i, bool trackGC = true, bool GCptr = true) 83 if(isSomeString!S || is(S: const(char)*)) 84 in ( i !is null ) 85 { 86 if (GCptr) // i is ptr to GC-managed memory 87 GC.addRoot(i); 88 else // i is ptr to non-GC heap mem but may contain ptr to GC-mem 89 if (trackGC) GC.addRange(i, IntervalType.sizeof, typeid(IntervalType)); 90 91 static if (isSomeString!S) 92 return cr_add(this.cr, toStringz(contig), cast(int) i.start, cast(int) i.end, 0, i); 93 else 94 return cr_add(this.cr, contig, cast(int) i.start, cast(int) i.end, 0, i); 95 } 96 /// ditto 97 cr_intv_t* insert(S)(S contig, IntervalType i, bool trackGC = true) 98 if(isSomeString!S || is(S: const(char)*)) 99 { 100 // TODO: this is a direct leak reported by LSAN (was indirect until i added free(cr->r) in cgranges.c) 101 IntervalType* iheap = cast(IntervalType *) malloc(IntervalType.sizeof); 102 memcpy(iheap, &i, IntervalType.sizeof); 103 if (trackGC) GC.addRange(iheap, IntervalType.sizeof, typeid(IntervalType)); 104 105 static if (isSomeString!S) 106 return cr_add(this.cr, toStringz(contig), cast(int) i.start, cast(int) i.end, 0, iheap); 107 else 108 return cr_add(this.cr, contig, cast(int) i.start, cast(int) i.end, 0, iheap); 109 } 110 /// ditto 111 cr_intv_t* insert(I)(I i, bool trackGC = true) 112 { 113 pragma(inline,true); 114 immutable(char) *contig = "default"; 115 return insert(contig, i, trackGC); 116 } 117 /// ditto 118 cr_intv_t* insert(T)(const(char)* contig, T start, T end) 119 if (is(T == int) || is(T == uint) || is(T == long) || is(T == ulong)) 120 { 121 return cr_add(this.cr, cast(int) contig, cast(int) start, end, 0, null); 122 } 123 124 /// Index the data structure -- required after all inserts completed, before query 125 @nogc nothrow 126 void index() 127 { 128 cr_index(this.cr); 129 debug { this.indexed = true; } 130 } 131 132 /// Locate and return intervals overlapping parameter qinterval in contig 133 /// 134 /// qinterval must have members "start" and "end" (just like IntervalType 135 /// stored in the tree, but the types needn't be the same). 136 /// 137 /// Note that because the cgranges IITree stores contig this must be included, 138 /// unlike the other interval tree implementations. 139 /// 140 /// findOverlapsWith may also be called with \0-terminated contig, integer start, end 141 auto findOverlapsWith(T)(const(char)[] contig, T qinterval) 142 if (__traits(hasMember, T, "start") && 143 __traits(hasMember, T, "end")) 144 { 145 version(DigitalMars) pragma(inline); 146 version(GNU) pragma(inline, true); 147 version(LDC) pragma(inline, true); 148 return findOverlapsWith(toStringz(contig), qinterval.start, qinterval.end); 149 } 150 /// ditto 151 const(cr_intv_t)[] findOverlapsWith(T)(const(char)* contig, T start, T end) 152 if (is(T == int) || is(T == uint) || is(T == long) || is(T == ulong)) 153 { 154 debug 155 { 156 if (!this.indexed) { 157 stderr.writeln("WARNING cgranges: query before index!"); 158 this.index(); 159 } 160 } 161 162 // static to prevent cr_overlap from calling malloc/realloc every time 163 // TODO should move to struct level then free(b) in destructor 164 static int64_t *b; 165 static int64_t m_b; 166 const auto n_b = cr_overlap(this.cr, contig, cast(int) start, cast(int) end, &b, &m_b); 167 if (!n_b) return []; 168 169 if (n_b == 1) 170 return this.cr.r[b[0] .. (b[0] + 1)]; 171 else { 172 // can't do this.cr.r[b[0] .. (b[0] + n_b)] because elements of b[] may not be contiguous throughout r 173 // PROOF: 174 // consider intervals (3,10) (4, 6) (5, 12) (6, 20) (7,15) which lie in r[] sorted by start coord 175 // coordinate 7 overlaps first and last 176 /+ WORKS +/ 177 cr_intv_t[] ret; 178 ret.length = n_b; 179 for(int i; i<n_b; i++) 180 { 181 ret[i] = this.cr.r[b[i]]; 182 } 183 return ret; 184 } 185 //const(cr_intv_t)[] ret = this.cr.r[b[0] .. (b[0] + n_b)]; 186 //return ret; 187 } 188 }