makedefs: check input files for non-ASCII chars

If MAKEDEFS_FILTER_NONASCII is defined (which config.h now does by
default), it will check data.base, rumors.*, and {various}.txt for
characters outside the range of ' ' through '~'.  If it finds any, it
will warn about them and change them to '#'.

Tab handling is incomplete; the files that use tabs for indentation
will allow tabs anywhere, even though that's not wanted.  That could
be fixed but doesn't seem particularly urgent.  This is more about
spotting and repairing the special 3-char punctuation characters that
crept into data.base fairly recently.
This commit is contained in:
PatR
2023-12-18 17:16:33 -08:00
parent 90fcce8903
commit 5e9358b331
2 changed files with 131 additions and 13 deletions

View File

@@ -1,4 +1,4 @@
/* NetHack 3.7 config.h $NHDT-Date: 1693359531 2023/08/30 01:38:51 $ $NHDT-Branch: keni-crashweb2 $:$NHDT-Revision: 1.175 $ */ /* NetHack 3.7 config.h $NHDT-Date: 1702948586 2023/12/19 01:16:26 $ $NHDT-Branch: NetHack-3.7 $:$NHDT-Revision: 1.179 $ */
/* Copyright (c) Stichting Mathematisch Centrum, Amsterdam, 1985. */ /* Copyright (c) Stichting Mathematisch Centrum, Amsterdam, 1985. */
/*-Copyright (c) Robert Patrick Rankin, 2016. */ /*-Copyright (c) Robert Patrick Rankin, 2016. */
/* NetHack may be freely redistributed. See license for details. */ /* NetHack may be freely redistributed. See license for details. */
@@ -24,7 +24,7 @@
/* #define TOS */ /* define for Atari ST/TT */ /* #define TOS */ /* define for Atari ST/TT */
/* #define STUPID */ /* avoid some complicated expressions if /* #define STUPID */ /* avoid some complicated expressions if
your C compiler chokes on them */ * your C compiler chokes on them */
/* #define MINIMAL_TERM */ /* #define MINIMAL_TERM */
/* if a terminal handles highlighting or tabs poorly, /* if a terminal handles highlighting or tabs poorly,
try this define, used in pager.c and termcap.c */ try this define, used in pager.c and termcap.c */
@@ -96,8 +96,8 @@
#ifdef QT_GRAPHICS #ifdef QT_GRAPHICS
#ifndef DEFAULT_WC_TILED_MAP #ifndef DEFAULT_WC_TILED_MAP
#define DEFAULT_WC_TILED_MAP /* Default to tiles if users doesn't say \ #define DEFAULT_WC_TILED_MAP /* Default to tiles if users doesn't request
wc_ascii_map */ * wc_ascii_map */
#endif #endif
#ifndef USE_XPM #ifndef USE_XPM
#define USE_XPM /* Use XPM format for images (required) */ #define USE_XPM /* Use XPM format for images (required) */
@@ -243,11 +243,11 @@
#ifndef CRASHREPORT #ifndef CRASHREPORT
# ifdef MACOS # ifdef MACOS
/* NB: This needs to be a full path unless it's in the playground. */ /* NB: This needs to be a full path unless it's in the playground. */
//#define CRASHREPORT "NetHackCrashReport.JavaScript" /*#define CRASHREPORT "NetHackCrashReport.JavaScript"*/
# endif # endif
# ifdef __linux__ # ifdef __linux__
/* NB: This expects to find the nhlua binary as "./nhlua" */ /* NB: This expects to find the nhlua binary as "./nhlua" */
//#define CRASHREPORT "nhcrashreport.lua" /*#define CRASHREPORT "nhcrashreport.lua"*/
# endif # endif
#endif #endif
@@ -262,6 +262,21 @@
many players, as it saves the player name and the game start time */ many players, as it saves the player name and the game start time */
/* #define PANICLOG_FMT2 */ /* #define PANICLOG_FMT2 */
/*
* When building the program, whether the 'makedefs' utility
* checks for non-ASCII or non-printable (control) characters
* in various data files (data.base, rumors.tru, rumors.fal,
* {oracles,epitaphs,engravings,bogusmons}.txt and warns about them.
* They also get changed to '#' instead of possibly remaining
* unprintable.
*
* If you modify the data files to intentionally add accented
* letters or something comparable, comment this out. (Such things
* won't necessarily work as intended within nethack but at least
* makedefs wouldn't reject them.)
*/
#define MAKEDEFS_FILTER_NONASCII
/* /*
* PERSMAX, POINTSMIN, ENTRYMAX, PERS_IS_UID: * PERSMAX, POINTSMIN, ENTRYMAX, PERS_IS_UID:
* These control the contents of 'record', the high-scores file. * These control the contents of 'record', the high-scores file.
@@ -599,13 +614,13 @@ typedef unsigned char uchar;
#if defined(DEBUG) && !defined(DEBUG_MIGRATING_MONS) #if defined(DEBUG) && !defined(DEBUG_MIGRATING_MONS)
#define DEBUG_MIGRATING_MONS /* add a wizard-mode command to help debug #define DEBUG_MIGRATING_MONS /* add a wizard-mode command to help debug
migrating monsters */ * migrating monsters */
#endif #endif
/* SCORE_ON_BOTL is neither experimental nor inadequately tested, /* SCORE_ON_BOTL is neither experimental nor inadequately tested,
but doesn't seem to fit in any other section... */ but doesn't seem to fit in any other section... */
/* #define SCORE_ON_BOTL */ /* enable the 'showscore' option to /* #define SCORE_ON_BOTL */ /* enable the 'showscore' option to
show estimated score on status line */ * show estimated score on status line */
/* FREE_ALL_MEMORY is neither experimental nor inadequately tested, /* FREE_ALL_MEMORY is neither experimental nor inadequately tested,
but it isn't necessary for successful operation of the program */ but it isn't necessary for successful operation of the program */

View File

@@ -1,4 +1,4 @@
/* NetHack 3.7 makedefs.c $NHDT-Date: 1693083328 2023/08/26 20:55:28 $ $NHDT-Branch: keni-crashweb2 $:$NHDT-Revision: 1.226 $ */ /* NetHack 3.7 makedefs.c $NHDT-Date: 1702948590 2023/12/19 01:16:30 $ $NHDT-Branch: NetHack-3.7 $:$NHDT-Revision: 1.233 $ */
/* Copyright (c) Stichting Mathematisch Centrum, Amsterdam, 1985. */ /* Copyright (c) Stichting Mathematisch Centrum, Amsterdam, 1985. */
/*-Copyright (c) Kenneth Lorber, Kensington, Maryland, 2015. */ /*-Copyright (c) Kenneth Lorber, Kensington, Maryland, 2015. */
/* Copyright (c) M. Stephenson, 1990, 1991. */ /* Copyright (c) M. Stephenson, 1990, 1991. */
@@ -174,7 +174,10 @@ static void do_rnd_access_file(const char *, const char *, unsigned);
static boolean d_filter(char *); static boolean d_filter(char *);
static boolean h_filter(char *); static boolean h_filter(char *);
static void opt_out_words(char *, int *); static void opt_out_words(char *, int *);
static char *fgetline(FILE*); static char *fgetline(FILE *);
/* doesn't do much (counts lines) if MAKEDEFS_FILTER_NONASCII isn't enabled */
static void filter_nonascii(char *);
static void set_fgetline_context(const char *, boolean, boolean);
#if defined(OLD_MAKEDEFS_OPTIONS) #if defined(OLD_MAKEDEFS_OPTIONS)
static char *tmpdup(const char *); static char *tmpdup(const char *);
@@ -184,6 +187,15 @@ static boolean get_gitinfo(char *, char *);
static boolean use_enum = TRUE; static boolean use_enum = TRUE;
#endif #endif
/* for MAKEDEFS_FILTER_NONASCII, but not conditionalized;
extra input for fgetline(); not-needed for files that don't use that */
struct ascii_filter {
const char *filename;
int linenum, warncnt;
boolean dofilter, tabok;
};
static struct ascii_filter ascii_ctx;
/* input, output, tmp */ /* input, output, tmp */
static FILE *ifp, *ofp, *tfp; static FILE *ifp, *ofp, *tfp;
@@ -1036,6 +1048,7 @@ read_rumors_file(
perror(infile); perror(infile);
return 0L; return 0L;
} }
set_fgetline_context(infile, TRUE, FALSE);
/* copy the rumors */ /* copy the rumors */
while ((line = fgetline(ifp)) != 0) { while ((line = fgetline(ifp)) != 0) {
@@ -1104,6 +1117,8 @@ do_rnd_access_file(
#else #else
ifp = getfp(DATA_TEMPLATE, "grep.tmp", RDTMODE, 0); ifp = getfp(DATA_TEMPLATE, "grep.tmp", RDTMODE, 0);
#endif #endif
set_fgetline_context(NULL, FALSE, FALSE);
while ((line = fgetline(ifp)) != 0) { while ((line = fgetline(ifp)) != 0) {
if (line[0] != '#' && line[0] != '\n') { if (line[0] != '#' && line[0] != '\n') {
(void) padline(line, padlength); (void) padline(line, padlength);
@@ -1162,9 +1177,10 @@ do_rumors(void)
/* record the current position; true rumors will start here */ /* record the current position; true rumors will start here */
true_rumor_offset = (unsigned long) ftell(tfp); true_rumor_offset = (unsigned long) ftell(tfp);
false_rumor_offset = (unsigned long) read_rumors_file(".tru", &true_rumor_count, false_rumor_offset
&true_rumor_size, true_rumor_offset, = (unsigned long) read_rumors_file(".tru", &true_rumor_count,
MD_PAD_RUMORS); &true_rumor_size, true_rumor_offset,
MD_PAD_RUMORS);
if (!false_rumor_offset) if (!false_rumor_offset)
goto rumors_failure; goto rumors_failure;
@@ -1188,6 +1204,8 @@ do_rumors(void)
Fprintf(ofp, rumors_header, Dont_Edit_Data, true_rumor_count, Fprintf(ofp, rumors_header, Dont_Edit_Data, true_rumor_count,
true_rumor_size, true_rumor_offset, false_rumor_count, true_rumor_size, true_rumor_offset, false_rumor_count,
false_rumor_size, false_rumor_offset, eof_offset); false_rumor_size, false_rumor_offset, eof_offset);
set_fgetline_context(NULL, FALSE, FALSE);
/* skip the temp file's dummy header */ /* skip the temp file's dummy header */
if (!(line = fgetline(tfp))) { /* "Don't Edit" */ if (!(line = fgetline(tfp))) { /* "Don't Edit" */
perror(tempfile); perror(tempfile);
@@ -1265,6 +1283,7 @@ do_data(void)
Fprintf(ofp, "%s%08lx\n", Dont_Edit_Data, 0L); Fprintf(ofp, "%s%08lx\n", Dont_Edit_Data, 0L);
entry_cnt = line_cnt = 0; entry_cnt = line_cnt = 0;
set_fgetline_context(infile, TRUE, TRUE);
/* read through the input file and split it into two sections */ /* read through the input file and split it into two sections */
while ((line = fgetline(ifp)) != 0) { while ((line = fgetline(ifp)) != 0) {
if (d_filter(line)) { if (d_filter(line)) {
@@ -1301,6 +1320,7 @@ do_data(void)
if (rewind(tfp) != 0) if (rewind(tfp) != 0)
goto dead_data; goto dead_data;
free((genericptr_t) line); free((genericptr_t) line);
set_fgetline_context(NULL, FALSE, TRUE);
/* copy all lines of text from the scratch file into the output file */ /* copy all lines of text from the scratch file into the output file */
while ((line = fgetline(tfp)) != 0) { while ((line = fgetline(tfp)) != 0) {
(void) fputs(line, ofp); (void) fputs(line, ofp);
@@ -1448,6 +1468,7 @@ do_oracles(void)
Fprintf(ofp, "%05lx\n", offset); /* start pos of first oracle */ Fprintf(ofp, "%05lx\n", offset); /* start pos of first oracle */
in_oracle = FALSE; in_oracle = FALSE;
set_fgetline_context(infile, TRUE, FALSE);
while ((line = fgetline(ifp)) != 0) { while ((line = fgetline(ifp)) != 0) {
SpinCursor(3); SpinCursor(3);
@@ -1490,6 +1511,7 @@ do_oracles(void)
goto dead_data; goto dead_data;
free((genericptr_t) line); free((genericptr_t) line);
/* copy all lines of text from the scratch file into the output file */ /* copy all lines of text from the scratch file into the output file */
set_fgetline_context(tempfile, FALSE, FALSE);
while ((line = fgetline(tfp)) != 0) { while ((line = fgetline(tfp)) != 0) {
(void) fputs(line, ofp); (void) fputs(line, ofp);
free((genericptr_t) line); free((genericptr_t) line);
@@ -1598,11 +1620,90 @@ fgetline(FILE *fd)
(void) memcpy(cprime, c, len); (void) memcpy(cprime, c, len);
free((genericptr_t) c); free((genericptr_t) c);
c = cprime; c = cprime;
*(c + len) = '\0';
len = newlen; len = newlen;
} }
filter_nonascii(c);
return c; return c;
} }
static void
filter_nonascii(char *line)
{
#ifdef MAKEDEFS_FILTER_NONASCII
char warnbuf[BUFSZ];
unsigned char *p;
int warned = 0, prevreason = -1, reason;
#endif
if (!line) /* end of file; uses 'line' for !MAKEDEFS_FILTER_NONASCII */
return;
ascii_ctx.linenum += 1;
if (!ascii_ctx.dofilter)
return;
#ifdef MAKEDEFS_FILTER_NONASCII
for (p = (unsigned char *) line; *p; ++p) {
if (*p == '\n')
break;
if (*p == '\t' && ascii_ctx.tabok)
continue;
reason = (*p > 126) ? 3 : (*p == '\t') ? 2 : (*p < ' ');
if (reason != 0) {
if (!warned)
ascii_ctx.warncnt += 1; /* number of lines warned about */
if (++warned <= 3) { /* show up to 3 warnings for this line */
if (warned == 1) {
/*assert(ascii_ctx.filename != NULL);*/
Sprintf(warnbuf, "? %s:", ascii_ctx.filename);
} else {
Strcpy(warnbuf, ",");
}
Sprintf(eos(warnbuf), " %d.%ld", ascii_ctx.linenum,
(long) ((char *) p - line)); /* column */
if (reason != prevreason) {
Strcat(warnbuf, (reason == 1) ? " non-printable"
: (reason == 3) ? " non-ascii"
: " <tab>"); /* (reason == 2) */
prevreason = reason;
}
Fprintf(stderr, "%s '%03o'", warnbuf, *p);
} else if (warned == 3 + 1) { /* when more than 3 */
Fprintf(stderr, ", ..."); /* show an indicator */
}
*p = '#';
}
}
if (warned > 0)
Fprintf(stderr, "\n");
#endif
return;
}
static void
set_fgetline_context(
const char *current_filename,
boolean do_filtering,
boolean tabs_are_ok) /* moot for !do_filtering */
{
static const char dummyname[] = "[makedefs input]";
#ifndef MAKEDEFS_FILTER_NONASCII
do_filtering = FALSE;
#endif
if (!current_filename)
current_filename = dummyname;
/* change from relative-to-dat to be relative-to-top, iff that's easy */
if (!strncmp(current_filename, "../", 3))
current_filename += 3;
ascii_ctx.filename = current_filename;
ascii_ctx.linenum = ascii_ctx.warncnt = 0;
ascii_ctx.tabok = tabs_are_ok;
ascii_ctx.dofilter = do_filtering;
}
#if defined(OLD_MAKEDEFS_OPTIONS) #if defined(OLD_MAKEDEFS_OPTIONS)
void void
do_date(void) do_date(void)
@@ -1822,6 +1923,7 @@ get_gitinfo(char *githash, char *gitbranch)
/* perror(infile); */ /* perror(infile); */
return FALSE; return FALSE;
} }
set_fgetline_context(infile, TRUE, TRUE);
/* read the gitinfo file */ /* read the gitinfo file */
while ((line = fgetline(gifp)) != 0) { while ((line = fgetline(gifp)) != 0) {
@@ -1983,6 +2085,7 @@ do_dungeon(void)
#else #else
ifp = getfp(DATA_TEMPLATE, "grep.tmp", RDTMODE, 0); ifp = getfp(DATA_TEMPLATE, "grep.tmp", RDTMODE, 0);
#endif #endif
set_fgetline_context(NULL, FALSE, TRUE);
while ((line = fgetline(ifp)) != 0) { while ((line = fgetline(ifp)) != 0) {
SpinCursor(3); SpinCursor(3);