付録:csv2tsvソースコードほか
tasks.json
{
"version": "2.0.0",
"tasks": [
{
"label": "Clang",
"type": "process",
"command": "make",
"args": [
"build"
],
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
}
},
{
"label": "Test",
"type": "process",
"command": "make",
"args": [
"test"
],
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}
Makefile
CMD= csv2tsv.exe
SRCS= $(wildcard *.c)
OBJS= $(SRCS:.c=.o)
CC= clang
CFLAGS+=-g
build: $(CMD)
$(CMD): $(OBJS)
$(CC) $(CFLAGS) -o $(CMD) $(OBJS)
.c.o:
$(CC) -c $< -o $@
test: $(CMD)
pwsh .\tests\test.ps1
clean:
rm -f *.exe
rm -f *.o
rm -f *.ilk
rm -f *.pdb
main.h
int csv2tsv(const char *, int, char *, int);
char *file2str(const char *)
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "main.h"
int main(int argc, char *argv[]) {
char *csvdata, *tsvdata;
int csvdata_bytes, tsvdata_bytes;
csvdata = file2str(argv[1]);
csvdata_bytes = strlen(csvdata);
tsvdata_bytes = csvdata_bytes;
tsvdata = calloc(tsvdata_bytes + 1, sizeof(char));
csv2tsv(csvdata, csvdata_bytes, tsvdata, tsvdata_bytes);
printf("%s", tsvdata);
return 0;
}
util_csv.c
#include <stdbool.h>
static bool record_outputed;
static char gettsvchar(const char);
int csv2tsv(const char *ibuf, int ibufsize, char *obuf, int obufsize) {
// When the target is empty, no processing is done.
if (0 == ibufsize)
return 0;
const char *p_i, *end_i;
char *p_o;
int tsv_len = 0;
p_i = ibuf;
end_i = &ibuf[ibufsize - 1];
p_o = obuf;
// Indicates the state during parsing.
typedef enum FIELD_STATUS {
FIELD_END,
IN_FIELD,
IN_QUOTED_FIELD
} record_status;
record_status rs = FIELD_END;
record_outputed = false;
while (1) {
if ('\n' == *p_i) {
if (!record_outputed) {
// nothing
}
rs = FIELD_END;
*p_o = gettsvchar('\n');
++p_o;
++tsv_len;
} else {
switch (rs) {
case FIELD_END:
if (',' == *p_i) {
// nothing
} else if ('"' == *p_i) {
rs = IN_QUOTED_FIELD;
} else {
rs = IN_FIELD;
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
case IN_FIELD:
if (',' == *p_i) {
rs = FIELD_END;
} else {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
case IN_QUOTED_FIELD:
if ('"' == *p_i) {
if (p_i == end_i) {
rs = FIELD_END;
} else if (',' == *(p_i + 1)) {
rs = FIELD_END;
++p_i;
} else if ('"' == *(p_i + 1)) {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
++p_i;
}
} else {
*p_o = gettsvchar(*p_i);
++p_o;
++tsv_len;
}
break;
}
switch (rs) {
case FIELD_END:
*p_o = '\t';
++p_o;
++tsv_len;
record_outputed = false;
break;
case IN_FIELD:
case IN_QUOTED_FIELD:
break;
}
}
if (p_i == end_i || tsv_len == obufsize)
break;
else
++p_i;
}
return tsv_len;
}
static char gettsvchar(const char c) {
record_outputed = true;
if ('\t' == c) {
return ' ';
} else {
return c;
}
}
util_file.c
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
char *file2str(const char *filepath) {
struct stat st;
int filesize, c;
char *buf, *p;
FILE *fp;
stat(filepath, &st);
filesize = st.st_size;
buf = calloc(filesize + 1, sizeof(char));
p = buf;
fp = fopen(filepath, "r");
for (int i = 0; i < filesize; i++) {
c = fgetc(fp);
if (EOF == c) {
break;
}
*p = (char)c;
++p;
}
return buf;
}
参考