summaryrefslogtreecommitdiff
path: root/api/aceread.h
blob: 982a304c5254609dcb756e277b876fce8d048a74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#ifndef API_ACEREAD__H
#define API_ACEREAD__H

/*
 * $Id: aceread.h,v 1.13 2010/03/03 18:46:08 bollin Exp $
 *
 * ===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 * Authors:  Colleen Bollin
 *
 */

#include <util/creaders/creaders_export.h>

#ifdef __cplusplus
extern "C" {
#endif

/* defines from ncbistd.h */
#ifndef FAR
#define FAR
#endif
#ifndef PASCAL
#define PASCAL
#endif
#ifndef EXPORT
#define EXPORT
#endif

#ifndef PASCAL
#define PASCAL
#endif
#ifndef EXPORT
#define EXPORT
#endif

#if defined (WIN32)
#    define ASSEMBLY_CALLBACK __stdcall
#else
#    define ASSEMBLY_CALLBACK
#endif

typedef struct gapinfo {
    int num_gaps;
    int *gap_offsets;
} SGapInfo, * TGapInfoPtr;

extern TGapInfoPtr GapInfoNew (void);
extern void GapInfoFree (TGapInfoPtr g);
extern TGapInfoPtr GapInfoFromSequenceString (char *seq_str, char *gap_chars);
extern void RemoveGapCharsFromSequenceString (char *seq_str, char *gap_chars);
extern int SeqPosFromTilingPos (int tiling_pos, TGapInfoPtr gap_info);
extern int TilingPosFromSeqPos (int seq_pos, TGapInfoPtr gap_info);

typedef struct SContigRead {
    char * read_id;
    int    ti;
    char * srr;
    char * read_seq;
    int    read_len;
    char   is_complement;
    int    cons_start;
    int    cons_stop;
    int    read_start;
    int    read_stop;
    int    read_assem_start;
    int    read_assem_stop;
    int    tiling_start;
    int    tiling_stop;
    TGapInfoPtr gaps;
    int    valid;
    int    local;
    char * tag; /* notes, comments, annotation for the read */
    /* quality scores - these are optional, used when recalculating consensus sequence */
    int  * qual_scores;
    int    num_qual_scores;
} SContigRead, * TContigReadPtr;

extern TContigReadPtr ContigReadNew (void);
extern void ContigReadFree (TContigReadPtr r);

typedef struct SConsensusReadAln {
    int numseg;
    int *cons_starts;
    int *read_starts;
    int *lens;
    char is_complement;
} SConsensusReadAln, * TConsensusReadAlnPtr;

extern TConsensusReadAlnPtr ConsensusReadAlnNew (int numseg);
extern TConsensusReadAlnPtr ConsensusReadAlnFree (TConsensusReadAlnPtr a);
extern TConsensusReadAlnPtr GetConsensusReadAln (char *consensus_seq, TContigReadPtr read);


typedef struct SBaseSeg {
    char * read_id;
    int    cons_start;
    int    cons_stop;
} SBaseSeg, * TBaseSegPtr;

extern TBaseSegPtr BaseSegNew (void);
extern void BaseSegFree (TBaseSegPtr b);

typedef struct SContig {
    char  * consensus_id;
    char  * consensus_seq;
    int     consensus_assem_len;
    int     consensus_seq_len;
    char    is_complement;
    int     num_qual_scores;
    int   * qual_scores;
    TGapInfoPtr gaps;
    int     num_reads;
    TContigReadPtr * reads;
    int     num_base_segs;
    TBaseSegPtr *base_segs;
    char  * tag; /* notes, comments, annotation for the contig */
} SContig, * TContigPtr;

extern TContigPtr ContigNew (void);
extern void ContigFree (TContigPtr c);
   
typedef struct SACEFile {
    unsigned int num_contigs;
    TContigPtr * contigs;
} SACEFile, * TACEFilePtr;

extern NCBI_CREADERS_EXPORT TACEFilePtr ACEFileNew (void);
extern NCBI_CREADERS_EXPORT void ACEFileFree (TACEFilePtr afp);

extern NCBI_CREADERS_EXPORT TACEFilePtr ReadACEFile (
  FReadLineFunction    readfunc,      /* function for reading lines of 
                                       * alignment file
                                       */
  void *               fileuserdata,  /* data to be passed back each time
                                       * readfunc is invoked
                                       */
  char                 make_qual_scores, /* false if ignoring 
                                          * known-bad qual scores
                                          */
  char *               has_errors        /* starts false if errors have already been reported
                                          * set to true if errors are encountered
                                          */
);


extern NCBI_CREADERS_EXPORT TACEFilePtr ReadMAQFile (
 FReadLineFunction    readfunc,      /* function for reading lines of 
                                       * alignment file
                                       */
 void *               fileuserdata  /* data to be passed back each time
                                       * readfunc is invoked
                                       */
);


extern void WriteACEFile (FILE *fp, TACEFilePtr afp);

extern TAlignmentFilePtr AlignmentFileFromContig (TContigPtr contig);

extern char * TraceArchiveGapStringFromACESequence (char *seq_str);

extern TContigReadPtr 
ReadContigFromString 
(char  *str,
 char **consensus_id,
 int    id_col,
 int    seq_col, 
 int    contig_id_col,
 int    strand_col,
 int    start_col,
 int    interpret_n_col);
extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromMAQString (char *str, char **consensus_id);
extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandMostCompressed (char *str, char **consensus_id);
extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandSanger (char *str, char **consensus_id);
extern TContigReadPtr ASSEMBLY_CALLBACK ReadFromElandStandalone (char *str, char **consensus_id);

typedef TContigReadPtr (ASSEMBLY_CALLBACK *FReadFromStringFunction) (char *str, char **consensus_id);
extern TACEFilePtr ReadAssemblyFile 
(FReadLineFunction    readfunc,      /* function for reading lines of 
                                       * alignment file
                                       */
 void *               fileuserdata,  /* data to be passed back each time
                                       * readfunc is invoked
                                       */
 FReadFromStringFunction makeread_func); /* function to transform a string into a read */

extern TACEFilePtr ReadMAQFile 
(FReadLineFunction    readfunc,      /* function for reading lines of 
                                       * alignment file
                                       */
 void *               fileuserdata);  /* data to be passed back each time
                                       * readfunc is invoked
                                       */

extern TACEFilePtr ReadElandStandaloneFile 
(FReadLineFunction    readfunc,      /* function for reading lines of 
                                       * alignment file
                                       */
 void *               fileuserdata);  /* data to be passed back each time
                                       * readfunc is invoked
                                       */


extern void 
WriteTraceAssemblyFromAceFile 
(TACEFilePtr afp,
 char      * subref,
 char      * center_name, 
 int         taxid,
 char      * description,
 FILE      * fp);

extern void
WriteTraceAssemblyHeader
(char * assembly_type,
 char * subref,
 char * center_name,
 int    taxid,
 char * description,
 char * assembly,
 int    num_contigs,
 unsigned int    num_conbases,
 int    num_reads,
 unsigned int    num_readbases,
 FILE * fp);

extern void WriteTraceAssemblyTrailer (FILE *fp);


extern void WriteTraceAssemblyFromContig (TContigPtr contig, FILE *fp);

extern void WriteTraceArchiveRead (FILE *fp, TContigReadPtr read);

extern void
WriteFASTAFromAceFile
(TACEFilePtr afp,
 FILE        *fp);

extern void PrintACEFormatErrorXMLStart (char *id, char *has_errors);
extern void PrintACEFormatErrorXMLEnd (void);
extern void PrintACEFormatErrorXML (char *msg, char *id, char *has_errors);

extern int AddReadQualScores (TACEFilePtr afp, FReadLineFunction readfunc, void *userdata, FReadLineFunction fasta_readfunc, void *fasta_userdata);

extern int ReplaceConsensusSequenceFromTraces (TContigPtr contig, char only_ns);
extern void RecalculateConsensusSequences (TACEFilePtr ace_file, char only_ns);

extern void WriteFASTAFromContig (TContigPtr contig, FILE *fp);
extern void WriteContigQualScores (TContigPtr contig, FILE *out);

typedef char (*ProcessContigFunc) (TContigPtr, void *);

extern char
ProcessLargeACEFileForContigFastaAndQualScores
(FReadLineFunction    readfunc,
 void *               userdata,
 char                 make_qual_scores,
 char *               has_errors,
 ProcessContigFunc    process_func,
 void *               process_data);


#ifdef __cplusplus
}
#endif

/*
 * ==========================================================================
 *
 * $Log: aceread.h,v $
 * Revision 1.13  2010/03/03 18:46:08  bollin
 * use unsigned int to keep track of the number of contigs.
 *
 * Revision 1.12  2008/12/02 18:58:24  bollin
 * Added argument to WriteTraceAssemblyHeader for assembly type.
 *
 * Revision 1.11  2008/12/02 18:41:39  bollin
 * Checking in unfinished work on creating pairwise denseg alignment for consensus-read comparison.  Unfinished.
 *
 * Revision 1.10  2008/11/26 18:30:02  bollin
 * Changes to make aceread_tst more efficient when handling large ACE files,
 * added TSA field tags for assembly and taxid.
 *
 * Revision 1.9  2008/11/19 15:21:48  bollin
 * Changes for handling large files.
 *
 * Revision 1.8  2008/11/14 20:16:12  bollin
 * Allow correction of just Ns in consensus sequences.
 *
 * Revision 1.7  2008/11/07 18:28:00  bollin
 * Added functions for reading read FASTA files and quality scores for the read
 * sequences.
 * Also added functions for recalculating the consensus sequence and consensus
 * sequence quality scores based on the reads and read quality scores.
 *
 * Revision 1.6  2008/08/13 15:35:30  bollin
 * Added wrapping header for XML errors during ACE read.
 *
 * Revision 1.5  2008/08/13 14:37:23  bollin
 * Changed error messages to use XML format, removed some unused functions.
 *
 * Revision 1.4  2008/08/13 12:30:01  bollin
 * Changes to allow use of srr numbers in XML and suppress lookups.  Also fixes segfault.
 *
 * Revision 1.3  2008/07/22 19:40:25  kans
 * use brackets instead of quotes on includes, put void in parentheses for no argument prototypes
 *
 * Revision 1.2  2008/07/22 18:45:09  bollin
 * Added function declarations
 *
 * Revision 1.1  2008/07/22 18:10:33  bollin
 * New files for parsing ACE format files.
 *
 *
 * ==========================================================================
 */

#endif /* UTIL_CREADERS___ACEREAD__H */