Skip to content

Wrong Output when using granges filter #13

@riasc

Description

@riasc

Hi Vince,

Thanks for this great library, which has a fantastic speed-up. Although only intended for benchmarking, I used ranges filter (via the command line) with a small dataset with partial overlaps. Haven't tested in using the lib. I want to filter out the intervals in left with no overlaps in right (e.g., filter --genome --left --right)

genome

chr14	251744
chrX	221185
chr18	138573
chr8	101337
chr1	60131

left:

chr1	3278	12709	intvl_83
chr1	13542	16554	intvl_88
chr1	19473	20158	intvl_89
chr1	24738	33373	intvl_94
chr1	37806	41968	intvl_95
chr1	42231	49198	intvl_96
chr1	50917	59900	intvl_98
chr14	4612	5516	intvl_1
chr14	6298	14399	intvl_2
chr14	17468	24457	intvl_3
chr14	25850	32491	intvl_4
chr14	35745	37396	intvl_5
chr14	40978	42521	intvl_6
chr14	44301	47325	intvl_7
chr14	48365	52757	intvl_8
chr14	56885	60612	intvl_9
chr14	64799	70360	intvl_10
chr14	72108	72921	intvl_11
chr14	74661	76902	intvl_14
chr14	81518	86267	intvl_19
chr14	86401	93780	intvl_20
chr14	94121	96687	intvl_21
chr14	98862	107238	intvl_28
chr14	111403	112345	intvl_30
chr14	112992	115624	intvl_33
chr14	120325	128338	intvl_41
chr14	133162	139280	intvl_42
chr14	141008	146707	intvl_43
chr14	149356	155488	intvl_45
chr14	156661	156786	intvl_47
chr14	161666	171494	intvl_48
chr14	175297	176195	intvl_51
chr14	177963	186769	intvl_54
chr14	191180	195284	intvl_66
chr14	196416	202921	intvl_72
chr14	206110	209561	intvl_74
chr14	213977	216445	intvl_76
chr14	218178	224565	intvl_80
chr14	228828	230125	intvl_84
chr14	233387	239248	intvl_85
chr14	240765	249646	intvl_97
chr18	620	3295	intvl_26
chr18	8118	17043	intvl_29
chr18	19129	25587	intvl_31
chr18	28462	36626	intvl_34
chr18	39621	48986	intvl_35
chr18	52910	61822	intvl_39
chr18	65902	74865	intvl_40
chr18	78965	81118	intvl_44
chr18	84550	88421	intvl_46
chr18	89154	96102	intvl_49
chr18	100024	100512	intvl_53
chr18	101234	103890	intvl_55
chr18	107168	114098	intvl_64
chr18	115888	118232	intvl_65
chr18	122338	130188	intvl_71
chr18	131354	133998	intvl_100
chr8	893	1504	intvl_50
chr8	4341	8598	intvl_52
chr8	9181	15018	intvl_56
chr8	19370	28461	intvl_57
chr8	32532	40382	intvl_58
chr8	42610	51910	intvl_68
chr8	56251	60320	intvl_69
chr8	61523	64226	intvl_77
chr8	65064	66768	intvl_78
chr8	67229	70104	intvl_79
chr8	72048	78333	intvl_81
chr8	83261	87591	intvl_87
chr8	89168	93727	intvl_91
chr8	97701	100625	intvl_99
chrX	314	930	intvl_12
chrX	4499	13333	intvl_13
chrX	16055	17261	intvl_15
chrX	18347	25316	intvl_16
chrX	29831	34220	intvl_17
chrX	34414	40682	intvl_18
chrX	40817	46927	intvl_22
chrX	50663	54980	intvl_23
chrX	56366	64902	intvl_24
chrX	65203	69929	intvl_25
chrX	70667	78223	intvl_27
chrX	80105	87922	intvl_32
chrX	88320	97866	intvl_36
chrX	99354	102559	intvl_37
chrX	107355	108213	intvl_38
chrX	109079	113346	intvl_59
chrX	116030	121324	intvl_60
chrX	124967	125450	intvl_61
chrX	126947	133101	intvl_62
chrX	137438	145668	intvl_63
chrX	150354	158187	intvl_67
chrX	161569	170514	intvl_70
chrX	171434	174005	intvl_73
chrX	174498	180190	intvl_75
chrX	180586	185816	intvl_82
chrX	186729	191388	intvl_86
chrX	194540	194872	intvl_90
chrX	197199	205217	intvl_92
chrX	209095	217194	intvl_93

right

chr1	2436	6078	intvl_83_5p
chr1	13340	14735	intvl_88_5p
chr1	18834	19488	intvl_89_5p
chr1	23054	25682	intvl_94_5p
chr1	37741	38490	intvl_95_5p
chr1	42152	45215	intvl_96_5p
chr1	50723	53317	intvl_98_5p
chr14	2322	4948	intvl_1_5p
chr14	6067	8118	intvl_2_5p
chr14	17092	19682	intvl_3_5p
chr14	25535	27832	intvl_4_5p
chr14	34991	36420	intvl_5_5p
chr14	39999	41072	intvl_6_5p
chr14	43737	45540	intvl_7_5p
chr14	48336	50393	intvl_8_5p
chr14	54896	58167	intvl_9_5p
chr14	63414	66466	intvl_10_5p
chr14	71847	72392	intvl_11_5p
chr14	74408	75291	intvl_14_5p
chr14	81107	81735	intvl_19_5p
chr14	86347	88149	intvl_20_5p
chr14	94018	94875	intvl_21_5p
chr14	98294	101816	intvl_28_5p
chr14	110496	111723	intvl_30_5p
chr14	112974	114126	intvl_33_5p
chr14	118947	124165	intvl_41_5p
chr14	130860	134109	intvl_42_5p
chr14	140864	143439	intvl_43_5p
chr14	148052	150350	intvl_45_5p
chr14	156332	156667	intvl_47_5p
chr14	159564	162410	intvl_48_5p
chr14	174008	175662	intvl_51_5p
chr14	177106	178883	intvl_54_5p
chr14	189029	193217	intvl_66_5p
chr14	196371	197736	intvl_72_5p
chr14	205758	207288	intvl_74_5p
chr14	211975	214379	intvl_76_5p
chr14	217617	220748	intvl_80_5p
chr14	228508	229099	intvl_84_5p
chr14	233352	236207	intvl_85_5p
chr14	240397	245173	intvl_97_5p
chr18	530	1736	intvl_26_5p
chr18	7474	9208	intvl_29_5p
chr18	18374	20070	intvl_31_5p
chr18	27338	32333	intvl_34_5p
chr18	39399	41016	intvl_35_5p
chr18	52594	54804	intvl_39_5p
chr18	64876	69784	intvl_40_5p
chr18	78154	79036	intvl_44_5p
chr18	84026	85118	intvl_46_5p
chr18	89103	92191	intvl_49_5p
chr18	98471	100107	intvl_53_5p
chr18	101082	101641	intvl_55_5p
chr18	106442	108286	intvl_64_5p
chr18	115654	115997	intvl_65_5p
chr18	121798	125531	intvl_71_5p
chr18	131233	132134	intvl_100_5p
chr8	782	1024	intvl_50_5p
chr8	3329	5049	intvl_52_5p
chr8	8927	10882	intvl_56_5p
chr8	19114	21355	intvl_57_5p
chr8	30793	35885	intvl_58_5p
chr8	42306	45662	intvl_68_5p
chr8	55850	57123	intvl_69_5p
chr8	60997	62392	intvl_77_5p
chr8	64932	65706	intvl_78_5p
chr8	67102	67470	intvl_79_5p
chr8	71951	74878	intvl_81_5p
chr8	81274	83810	intvl_87_5p
chr8	88704	90285	intvl_91_5p
chr8	96087	97831	intvl_99_5p
chrX	252	534	intvl_12_5p
chrX	4293	8440	intvl_13_5p
chrX	15331	16066	intvl_15_5p
chrX	18289	18949	intvl_16_5p
chrX	29178	30174	intvl_17_5p
chrX	34324	36497	intvl_18_5p
chrX	40775	41962	intvl_22_5p
chrX	49765	52739	intvl_23_5p
chrX	 56052	56921	intvl_24_5p
chrX	65199	65384	intvl_25_5p
chrX	70478	73534	intvl_27_5p
chrX	80073	82712	intvl_32_5p
chrX	88284	90735	intvl_36_5p
chrX	99342	100325	intvl_37_5p
chrX	105339	107384	intvl_38_5p
chrX	108727	110915	intvl_59_5p
chrX	115785	117445	intvl_60_5p
chrX	124573	125121	intvl_61_5p
chrX	126822	128647	intvl_62_5p
chrX	135597	137569	intvl_63_5p
chrX	150302	153749	intvl_67_5p
chrX	161267	162707	intvl_70_5p
chrX	171367	171541	intvl_73_5p
chrX	174427	175390	intvl_75_5p
chrX	180533	183172	intvl_82_5p
chrX	186408	188128	intvl_86_5p
chrX	193490	194699	intvl_90_5p
chrX	196241	198546	intvl_92_5p
chrX	208358	210859	intvl_93_5p

However, this gives me:

chr1	3278	12709	intvl_2
chr1	19473	20158	intvl_3
chr1	24738	33373	intvl_4
chr1	37806	41968	intvl_6
chr1	42231	49198	intvl_8
chr1	50917	59900	intvl_10
chr8	4341	8598	intvl_13
chr8	32532	40382	intvl_16
chr8	42610	51910	intvl_18
chr8	56251	60320	intvl_19
chr8	65064	66768	intvl_20
chr8	72048	78333	intvl_22
chr8	89168	93727	intvl_23
chr8	97701	100625	intvl_26
chr14	6298	14399	intvl_28
chr14	17468	24457	intvl_29
chr14	25850	32491	intvl_31
chr14	40978	42521	intvl_34
chr14	48365	52757	intvl_35
chr14	64799	70360	intvl_39
chr14	81518	86267	intvl_40
chr14	86401	93780	intvl_41
chr14	98862	107238	intvl_49
chr14	120325	128338	intvl_52
chr18	620	3295	intvl_58
chr18	8118	17043	intvl_68
chr18	19129	25587	intvl_69
chr18	28462	36626	intvl_78
chr18	39621	48986	intvl_81
chr18	52910	61822	intvl_83
chr18	65902	74865	intvl_89
chr18	89154	96102	intvl_91
chrX	4499	13333	intvl_94
chrX	18347	25316	intvl_95
chrX	34414	40682	intvl_96
chrX	40817	46927	intvl_98
chrX	50663	54980	intvl_99

Reports only a fraction of the overlaps, and the naming in the name column is wrong. For example, 'intvl83becomesintvl2`.
What am I missing?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions