1 #define _FILE_OFFSET_BITS 64
2 #define _LARGEFILE64_SOURCE
11 #include <sys/types.h>
17 #include <libxml/xmlreader.h>
25 enum { plainFile, gzipFile, bzip2File } type;
27 // needed by bzip2 when decompressing from multiple streams. other
28 // decompressors must ignore it.
32 int buf_ptr, buf_fill;
35 // tries to re-open the bz stream at the next stream start.
36 // returns 0 on success, -1 on failure.
37 int bzReOpen(struct Input *ctx, int *error)
39 // for copying out the last unused part of the block which
40 // has an EOS token in it. needed for re-initialising the
42 unsigned char unused[BZ_MAX_UNUSED];
43 void *unused_tmp_ptr = NULL;
46 BZ2_bzReadGetUnused(error, (BZFILE *)(ctx->fileHandle), &unused_tmp_ptr, &nUnused);
47 if (*error != BZ_OK) return -1;
49 // when bzReadClose is called the unused buffer is deallocated,
50 // so it needs to be copied somewhere safe first.
51 for (i = 0; i < nUnused; ++i)
52 unused[i] = ((unsigned char *)unused_tmp_ptr)[i];
54 BZ2_bzReadClose(error, (BZFILE *)(ctx->fileHandle));
55 if (*error != BZ_OK) return -1;
57 // reassign the file handle
58 ctx->fileHandle = BZ2_bzReadOpen(error, ctx->systemHandle, 0, 0, unused, nUnused);
59 if (ctx->fileHandle == NULL || *error != BZ_OK) return -1;
64 int readFile(void *context, char * buffer, int len)
66 struct Input *ctx = context;
67 void *f = ctx->fileHandle;
70 if (ctx->eof || (len == 0))
76 l = read(*(int *)f, buffer, len);
77 if (l <= 0) ctx->eof = 1;
80 l = gzread((gzFile)f, buffer, len);
81 if (l <= 0) ctx->eof = 1;
84 l = BZ2_bzRead(&error, (BZFILE *)f, buffer, len);
86 // error codes BZ_OK and BZ_STREAM_END are both "OK", but the stream
87 // end means the reader needs to be reset from the original handle.
90 // for stream errors, try re-opening the stream before admitting defeat.
91 if (error != BZ_STREAM_END || bzReOpen(ctx, &error) != 0)
99 fprintf(stderr, "Bad file type\n");
105 fprintf(stderr, "File reader received error %d (%d)\n", l, error);
112 char inputGetChar(void *context)
114 struct Input *ctx = context;
116 if (ctx->buf_ptr == ctx->buf_fill)
118 ctx->buf_fill = readFile(context, &ctx->buf[0], sizeof(ctx->buf));
120 if (ctx->buf_fill == 0)
122 if (ctx->buf_fill < 0)
124 perror("Error while reading file");
128 //readFile(context, &c, 1);
129 return ctx->buf[ctx->buf_ptr++];
132 int inputEof(void *context)
134 return ((struct Input *)context)->eof;
137 void *inputOpen(const char *name)
139 const char *ext = strrchr(name, '.');
140 struct Input *ctx = malloc (sizeof(*ctx));
145 memset(ctx, 0, sizeof(*ctx));
147 ctx->name = strdup(name);
149 if (ext && !strcmp(ext, ".gz"))
151 ctx->fileHandle = (void *)gzopen(name, "rb");
152 ctx->type = gzipFile;
154 else if (ext && !strcmp(ext, ".bz2"))
157 ctx->systemHandle = fopen(name, "rb");
158 if (!ctx->systemHandle)
160 fprintf(stderr, "error while opening file %s\n", name);
164 ctx->fileHandle = (void *)BZ2_bzReadOpen(&error, ctx->systemHandle, 0, 0, NULL, 0);
165 ctx->type = bzip2File;
170 int *pfd = malloc(sizeof(pfd));
173 if (!strcmp(name, "-"))
179 int flags = O_RDONLY;
181 flags |= O_LARGEFILE;
183 *pfd = open(name, flags);
191 ctx->fileHandle = (void *)pfd;
192 ctx->type = plainFile;
194 if (!ctx->fileHandle)
196 fprintf(stderr, "error while opening file %s\n", name);
204 int inputClose(void *context)
206 struct Input *ctx = context;
207 void *f = ctx->fileHandle;
219 BZ2_bzclose((BZFILE *)f);
222 fprintf(stderr, "Bad file type\n");
231 xmlTextReaderPtr inputUTF8(const char *name)
233 void *ctx = inputOpen(name);
237 fprintf(stderr, "Input reader create failed for: %s\n", name);
241 return xmlReaderForIO(readFile, inputClose, (void *)ctx, NULL, NULL, 0);