1 module intervaltree.cgranges;
2 
3 import core.stdc.stdint;
4 import std.bitmanip;
5 
6 extern(C):
7 @nogc:
8 nothrow:
9 /* The MIT License
10    Copyright (c) 2019 Dana-Farber Cancer Institute
11    Permission is hereby granted, free of charge, to any person obtaining
12    a copy of this software and associated documentation files (the
13    "Software"), to deal in the Software without restriction, including
14    without limitation the rights to use, copy, modify, merge, publish,
15    distribute, sublicense, and/or sell copies of the Software, and to
16    permit persons to whom the Software is furnished to do so, subject to
17    the following conditions:
18    The above copyright notice and this permission notice shall be
19    included in all copies or substantial portions of the Software.
20    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27    SOFTWARE.
28 */
29 
30 version(instrument) {
31     extern __gshared int32_t* _iitree_visited;
32     extern __gshared int32_t _iitree_visited_size;
33     extern __gshared int32_t _iitree_visited_capacity;
34 }
35 
36 /// contig
37 struct cr_ctg_t {   /// a contig
38 	char *name;     /// name of the contig
39 	int32_t len;    /// max length seen in data
40 	int32_t root_k; /// ???
41     /// sum of lengths of previous contigs
42 	int64_t n, off; /// sum of lengths of previous contigs
43 }
44 
45 /// interval
46 struct cr_intv_t {  /// an interval
47 	uint64_t x;     /// prior to cr_index(), x = ctg_id<<32|start_pos; after: x = start_pos<<32|end_pos
48 	//uint32_t y:31, rev:1;
49     mixin(bitfields!(
50         uint32_t, "y", 31,
51         uint32_t, "rev", 1
52     ));
53 	int32_t label;  /// NOT used
54 
55     void * data;    /// Data payload / encapsulated object ( modified also in cgranges.h/.c by JSB)
56     /// since we are building interval trees, have element "interval" for consistency* with avltree and splaytree
57     /// (*actually use of a pointer is inconsistent)
58     alias interval = data;
59 }
60 
61 /// genomic ranges
62 struct cgranges_t {
63     /// number and max number of intervals
64 	int64_t n_r, m_r;     /// number and max number of intervals
65 	cr_intv_t *r;         /// list of intervals (of size _n_r_)
66 	/// number and max number of contigs
67     int32_t n_ctg, m_ctg; /// number and max number of contigs
68 	cr_ctg_t *ctg;        /// list of contigs (of size _n_ctg_)
69 	void *hc;             /// dictionary for converting contig names to integers
70 }
71 
72 pragma(inline, true)
73 {
74 /// retrieve start and end positions from a cr_intv_t object
75 int32_t cr_st(const(cr_intv_t) *r) { return cast(int32_t)(r.x>>32); }
76 /// ditto
77 int32_t cr_en(const(cr_intv_t) *r) { return cast(int32_t)r.x; }
78 /// ditto
79 int32_t cr_start(const(cgranges_t) *cr, int64_t i) { return cr_st(&cr.r[i]); }
80 /// ditto
81 int32_t cr_end(const(cgranges_t) *cr, int64_t i) { return cr_en(&cr.r[i]); }
82 /// ditto
83 int32_t cr_label(const(cgranges_t) *cr, int64_t i) { return cr.r[i].label; }
84 }
85 
86 /// Initialize
87 cgranges_t *cr_init();
88 
89 /// Deallocate
90 void cr_destroy(cgranges_t *cr);
91 
92 /// Add an interval (JSB: data param)
93 cr_intv_t *cr_add(cgranges_t *cr, const(char) *ctg, int32_t st, int32_t en, int32_t label_int, void * data);
94 
95 /// Sort and index intervals
96 void cr_index(cgranges_t *cr);
97 
98 /** Find (and count) overlaps
99 
100     Params:
101         cr   =  cgranges struct
102         ctg  =  contig \0 term Cstring
103         st   =  start coord
104         en   =  end coord
105         b    =  array (returned)
106         m_b_ =  max b
107 */
108 int64_t cr_overlap(const(cgranges_t) *cr, const(char) *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
109 
110 /// Add a contig and length. Call this for desired contig ordering. _len_ can be 0.
111 int32_t cr_add_ctg(cgranges_t *cr, const(char) *ctg, int32_t len);
112 
113 /// Get the contig ID given its name
114 int32_t cr_get_ctg(const cgranges_t *cr, const(char) *ctg);