From 03476a7c78f267934a6a527699bdb3cdf9d022cf Mon Sep 17 00:00:00 2001 From: PatR Date: Sat, 27 Nov 2021 12:23:01 -0800 Subject: [PATCH] get_rnd_text() for rumors and other stuff Solve the uneven distribution situation that has been present for picking random rumors for a long time and for random engravings, epitaphs, and hallucinatory monster names since 3.6.0. This relies on the previous partial solution where short lines have been padded to a longer length. When that length is N and random seek lands in a long line of length L, retry if the position is in the first L-N characters. Put differently, it if takes more than N characters to reach the next newline, reject that random seek and try again. This effectively makes long lines behave as if they had the same length of N as the short lines have been padded to and when all lines are the same length, all entries have the same chance to be chosen. --- doc/fixes37.0 | 2 ++ include/extern.h | 2 +- include/global.h | 11 ++++++++++- src/do_name.c | 2 +- src/engrave.c | 4 ++-- src/rumors.c | 34 +++++++++++++++++++++++++++------- util/makedefs.c | 27 +++++++-------------------- 7 files changed, 50 insertions(+), 32 deletions(-) diff --git a/doc/fixes37.0 b/doc/fixes37.0 index 1c6d86c9f..7fd54eb87 100644 --- a/doc/fixes37.0 +++ b/doc/fixes37.0 @@ -701,6 +701,8 @@ selection of random engravings, epitaphs, and hallucinatory monster names had ones which follow shorter than average lines are least likely; use same workaround as for rumors: pad the shortest lines; result isn't uniforn distribution but is better (tradeoff vs size; see makedefs) +make selection of random rumors, engravings, epitaphs, and hallucinatory monst + names have uniform distribution by handling long lines specially Fixes to 3.7.0-x Problems that Were Exposed Via git Repository diff --git a/include/extern.h b/include/extern.h index 16e6482eb..a8bedf7ef 100644 --- a/include/extern.h +++ b/include/extern.h @@ -2263,7 +2263,7 @@ extern const char *Goodbye(void); /* ### rumors.c ### */ extern char *getrumor(int, char *, boolean); -extern char *get_rnd_text(const char *, char *, int(*)(int)); +extern char *get_rnd_text(const char *, char *, int(*)(int), unsigned); extern void outrumor(int, int); extern void outoracle(boolean, boolean); extern void save_oracles(NHFILE *); diff --git a/include/global.h b/include/global.h index 2f38bdd1f..1f510c78a 100644 --- a/include/global.h +++ b/include/global.h @@ -9,7 +9,8 @@ #include /* - * Files expected to exist in the playground directory. + * Files expected to exist in the playground directory (possibly inside + * a dlb container file). */ #define RECORD "record" /* file containing list of topscorers */ @@ -32,6 +33,14 @@ #define TRIBUTEFILE "tribute" /* 3.6 tribute to Terry Pratchett */ #define LEV_EXT ".lua" /* extension for special level files */ +/* padding amounts for files that have lines chosen by fseek to random spot, + advancing to the next line, and using that line; makedefs forces shorter + lines to be padded to these lengths; value of 0 will inhibit any padding, + avoiding an increase in files' sizes, but resulting in biased selection; + used by makedefs while building and by core's callers of get_rnd_text() */ +#define MD_PAD_RUMORS 60u /* for RUMORFILE, EPITAPHFILE, and ENGRAVEFILE */ +#define MD_PAD_BOGONS 20u /* for BOGUSMONFILE */ + /* Assorted definitions that may depend on selections in config.h. */ /* diff --git a/src/do_name.c b/src/do_name.c index f42251f25..b355d30ce 100644 --- a/src/do_name.c +++ b/src/do_name.c @@ -2204,7 +2204,7 @@ bogusmon(char *buf, char *code) if (code) *code = '\0'; /* might fail (return empty buf[]) if the file isn't available */ - get_rnd_text(BOGUSMONFILE, buf, rn2_on_display_rng); + get_rnd_text(BOGUSMONFILE, buf, rn2_on_display_rng, MD_PAD_BOGONS); if (!*mnam) { Strcpy(buf, "bogon"); } else if (index(bogon_codes, *mnam)) { /* strip prefix if present */ diff --git a/src/engrave.c b/src/engrave.c index 9d39b0bda..c9cb75092 100644 --- a/src/engrave.c +++ b/src/engrave.c @@ -18,7 +18,7 @@ random_engraving(char *outbuf) /* a random engraving may come from the "rumors" file, or from the "engrave" file (formerly in an array here) */ if (!rn2(4) || !(rumor = getrumor(0, outbuf, TRUE)) || !*rumor) - (void) get_rnd_text(ENGRAVEFILE, outbuf, rn2); + (void) get_rnd_text(ENGRAVEFILE, outbuf, rn2, MD_PAD_RUMORS); wipeout_text(outbuf, (int) (strlen(outbuf) / 4), 0); return outbuf; @@ -1446,7 +1446,7 @@ make_grave(int x, int y, const char *str) /* Engrave the headstone */ del_engr_at(x, y); if (!str) - str = get_rnd_text(EPITAPHFILE, buf, rn2); + str = get_rnd_text(EPITAPHFILE, buf, rn2, MD_PAD_RUMORS); make_engr_at(x, y, str, 0L, HEADSTONE); return; } diff --git a/src/rumors.c b/src/rumors.c index bd68faac0..347009b51 100644 --- a/src/rumors.c +++ b/src/rumors.c @@ -395,7 +395,11 @@ RESTORE_WARNING_FORMAT_NONLITERAL /* Gets a random line of text from file 'fname', and returns it. rng is the random number generator to use, and should act like rn2 does. */ char * -get_rnd_text(const char* fname, char* buf, int (*rng)(int)) +get_rnd_text( + const char *fname, + char *buf, + int (*rng)(int), + unsigned padlength) { dlb *fh; @@ -404,6 +408,7 @@ get_rnd_text(const char* fname, char* buf, int (*rng)(int)) if (fh) { /* TODO: cache sizetxt, starttxt, endtxt. maybe cache file contents? */ long sizetxt = 0L, starttxt = 0L, endtxt = 0L, tidbit = 0L; + int trylimit; char *endp, line[BUFSZ], xbuf[BUFSZ]; /* skip "don't edit" comment */ @@ -419,14 +424,29 @@ get_rnd_text(const char* fname, char* buf, int (*rng)(int)) that save and restore might fix the problem wouldn't be useful */ if (sizetxt < 1L) return buf; - tidbit = (*rng)(sizetxt); + /* 'rumors' is about 3/4 of the way to the limit on a 16-bit config */ + nhassert(sizetxt <= INT_MAX); /* essential for rn2(sizetxt) */ - /* position randomly which will probably be in the middle of a line; - read the rest of that line, then use the next one; if there's no - next one (ie, end of file), go back to beginning and use first */ - (void) dlb_fseek(fh, starttxt + tidbit, SEEK_SET); - (void) dlb_fgets(line, sizeof line, fh); + /* + * Position randomly which will probably be in the middle of a line. + * Read the rest of that line, then use the next one. If there's no + * next line (ie, end of file), go back to beginning and use first. + * + * When short lines have been padded to length N, only accept long + * lines if we land within last N+1 characters (+1 is for newline + * which hasn't been stripped away yet), effectively shortening + * them to normal length. That yields even selection distribution. + */ + for (trylimit = 5; trylimit > 0; --trylimit) { + tidbit = (long) (*rng)((int) sizetxt); + (void) dlb_fseek(fh, starttxt + tidbit, SEEK_SET); + (void) dlb_fgets(line, sizeof line, fh); + if (!padlength || (unsigned) strlen(line) <= padlength + 1) + break; + } + /* use next line */ if (!dlb_fgets(line, sizeof line, fh)) { + /* assume failure is due to end-of-file; go back to start */ (void) dlb_fseek(fh, starttxt, SEEK_SET); (void) dlb_fgets(line, sizeof line, fh); } diff --git a/util/makedefs.c b/util/makedefs.c index 800d2f935..3316f5613 100644 --- a/util/makedefs.c +++ b/util/makedefs.c @@ -104,21 +104,6 @@ static const char SCCS_Id[] UNUSED = "@(#)makedefs.c\t3.7\t2020/01/18"; #endif /* else !MAC */ #endif /* else !AMIGA */ -/* - * For files where entries are selected by seeking to a random position, - * skipping to newline, and then using the next line, lines that follow - * long ones are more likely to be selected than average and lines that - * follow short ones are less likely to be selected than average. - * We make selection be more evenly distributed by padding the shortest - * lines, at the cost of making the data files bigger. The larger these - * values are, the more uniform the selection will become but the more - * space will be needed for data used for something which is relatively - * inconsequential to actual game play. A value of 0 would suppress the - * padding because every line is already at least that long. - */ -#define PAD_RUMORS_TO 60u /* also used for epitaphs and engravings */ -#define PAD_BOGONS_TO 20u /* hallucinatory monsters */ - static const char *Dont_Edit_Code = "/* This source file is generated by 'makedefs'. Do not edit. */\n", @@ -329,16 +314,16 @@ do_makedefs(char *options) do_rnd_access_file(EPITAPHFILE, /* default epitaph: parody of the default engraving */ "No matter where I went, here I am.", - PAD_RUMORS_TO); + MD_PAD_RUMORS); /* '_RUMORS' is correct here */ do_rnd_access_file(ENGRAVEFILE, /* default engraving: popularized by "The Adventures of Buckaroo Bonzai Across the 8th Dimenstion" but predates that 1984 movie; some attribute it to Confucius */ "No matter where you go, there you are.", - PAD_RUMORS_TO); + MD_PAD_RUMORS); /* '_RUMORS' used here too */ do_rnd_access_file(BOGUSMONFILE, /* default bogusmon: iconic monster that isn't in nethack */ - "grue", PAD_BOGONS_TO); + "grue", MD_PAD_BOGONS); break; case 'h': case 'H': @@ -896,6 +881,8 @@ padline(char *line, unsigned padlength) * follow short-line rumors are least likely to be chosen. * We ameliorate the latter by padding the shortest lines, * increasing the chance of the random seek landing in them. + * The core's get_rnd_text() handles long lines in a way + * that results in even selection distribution. * * Random epitaphs, engravings, and hallucinatory monster * names are in the same boat. @@ -1059,13 +1046,13 @@ do_rumors(void) false_rumor_offset = read_rumors_file(".tru", &true_rumor_count, &true_rumor_size, true_rumor_offset, - PAD_RUMORS_TO); + MD_PAD_RUMORS); if (!false_rumor_offset) goto rumors_failure; eof_offset = read_rumors_file(".fal", &false_rumor_count, &false_rumor_size, false_rumor_offset, - PAD_RUMORS_TO); + MD_PAD_RUMORS); if (!eof_offset) goto rumors_failure;