get_rnd_text() for rumors and other stuff

Solve the uneven distribution situation that has been present for
picking random rumors for a long time and for random engravings,
epitaphs, and hallucinatory monster names since 3.6.0.  This relies
on the previous partial solution where short lines have been padded
to a longer length.  When that length is N and random seek lands in
a long line of length L, retry if the position is in the first L-N
characters.  Put differently, it if takes more than N characters to
reach the next newline, reject that random seek and try again.  This
effectively makes long lines behave as if they had the same length
of N as the short lines have been padded to and when all lines are
the same length, all entries have the same chance to be chosen.
This commit is contained in:
PatR
2021-11-27 12:23:01 -08:00
parent 629c2c4094
commit 03476a7c78
7 changed files with 50 additions and 32 deletions

View File

@@ -2204,7 +2204,7 @@ bogusmon(char *buf, char *code)
if (code)
*code = '\0';
/* might fail (return empty buf[]) if the file isn't available */
get_rnd_text(BOGUSMONFILE, buf, rn2_on_display_rng);
get_rnd_text(BOGUSMONFILE, buf, rn2_on_display_rng, MD_PAD_BOGONS);
if (!*mnam) {
Strcpy(buf, "bogon");
} else if (index(bogon_codes, *mnam)) { /* strip prefix if present */

View File

@@ -18,7 +18,7 @@ random_engraving(char *outbuf)
/* a random engraving may come from the "rumors" file,
or from the "engrave" file (formerly in an array here) */
if (!rn2(4) || !(rumor = getrumor(0, outbuf, TRUE)) || !*rumor)
(void) get_rnd_text(ENGRAVEFILE, outbuf, rn2);
(void) get_rnd_text(ENGRAVEFILE, outbuf, rn2, MD_PAD_RUMORS);
wipeout_text(outbuf, (int) (strlen(outbuf) / 4), 0);
return outbuf;
@@ -1446,7 +1446,7 @@ make_grave(int x, int y, const char *str)
/* Engrave the headstone */
del_engr_at(x, y);
if (!str)
str = get_rnd_text(EPITAPHFILE, buf, rn2);
str = get_rnd_text(EPITAPHFILE, buf, rn2, MD_PAD_RUMORS);
make_engr_at(x, y, str, 0L, HEADSTONE);
return;
}

View File

@@ -395,7 +395,11 @@ RESTORE_WARNING_FORMAT_NONLITERAL
/* Gets a random line of text from file 'fname', and returns it.
rng is the random number generator to use, and should act like rn2 does. */
char *
get_rnd_text(const char* fname, char* buf, int (*rng)(int))
get_rnd_text(
const char *fname,
char *buf,
int (*rng)(int),
unsigned padlength)
{
dlb *fh;
@@ -404,6 +408,7 @@ get_rnd_text(const char* fname, char* buf, int (*rng)(int))
if (fh) {
/* TODO: cache sizetxt, starttxt, endtxt. maybe cache file contents? */
long sizetxt = 0L, starttxt = 0L, endtxt = 0L, tidbit = 0L;
int trylimit;
char *endp, line[BUFSZ], xbuf[BUFSZ];
/* skip "don't edit" comment */
@@ -419,14 +424,29 @@ get_rnd_text(const char* fname, char* buf, int (*rng)(int))
that save and restore might fix the problem wouldn't be useful */
if (sizetxt < 1L)
return buf;
tidbit = (*rng)(sizetxt);
/* 'rumors' is about 3/4 of the way to the limit on a 16-bit config */
nhassert(sizetxt <= INT_MAX); /* essential for rn2(sizetxt) */
/* position randomly which will probably be in the middle of a line;
read the rest of that line, then use the next one; if there's no
next one (ie, end of file), go back to beginning and use first */
(void) dlb_fseek(fh, starttxt + tidbit, SEEK_SET);
(void) dlb_fgets(line, sizeof line, fh);
/*
* Position randomly which will probably be in the middle of a line.
* Read the rest of that line, then use the next one. If there's no
* next line (ie, end of file), go back to beginning and use first.
*
* When short lines have been padded to length N, only accept long
* lines if we land within last N+1 characters (+1 is for newline
* which hasn't been stripped away yet), effectively shortening
* them to normal length. That yields even selection distribution.
*/
for (trylimit = 5; trylimit > 0; --trylimit) {
tidbit = (long) (*rng)((int) sizetxt);
(void) dlb_fseek(fh, starttxt + tidbit, SEEK_SET);
(void) dlb_fgets(line, sizeof line, fh);
if (!padlength || (unsigned) strlen(line) <= padlength + 1)
break;
}
/* use next line */
if (!dlb_fgets(line, sizeof line, fh)) {
/* assume failure is due to end-of-file; go back to start */
(void) dlb_fseek(fh, starttxt, SEEK_SET);
(void) dlb_fgets(line, sizeof line, fh);
}