get_rnd_text() for rumors and other stuff
Solve the uneven distribution situation that has been present for picking random rumors for a long time and for random engravings, epitaphs, and hallucinatory monster names since 3.6.0. This relies on the previous partial solution where short lines have been padded to a longer length. When that length is N and random seek lands in a long line of length L, retry if the position is in the first L-N characters. Put differently, it if takes more than N characters to reach the next newline, reject that random seek and try again. This effectively makes long lines behave as if they had the same length of N as the short lines have been padded to and when all lines are the same length, all entries have the same chance to be chosen.
This commit is contained in:
@@ -2204,7 +2204,7 @@ bogusmon(char *buf, char *code)
|
||||
if (code)
|
||||
*code = '\0';
|
||||
/* might fail (return empty buf[]) if the file isn't available */
|
||||
get_rnd_text(BOGUSMONFILE, buf, rn2_on_display_rng);
|
||||
get_rnd_text(BOGUSMONFILE, buf, rn2_on_display_rng, MD_PAD_BOGONS);
|
||||
if (!*mnam) {
|
||||
Strcpy(buf, "bogon");
|
||||
} else if (index(bogon_codes, *mnam)) { /* strip prefix if present */
|
||||
|
||||
@@ -18,7 +18,7 @@ random_engraving(char *outbuf)
|
||||
/* a random engraving may come from the "rumors" file,
|
||||
or from the "engrave" file (formerly in an array here) */
|
||||
if (!rn2(4) || !(rumor = getrumor(0, outbuf, TRUE)) || !*rumor)
|
||||
(void) get_rnd_text(ENGRAVEFILE, outbuf, rn2);
|
||||
(void) get_rnd_text(ENGRAVEFILE, outbuf, rn2, MD_PAD_RUMORS);
|
||||
|
||||
wipeout_text(outbuf, (int) (strlen(outbuf) / 4), 0);
|
||||
return outbuf;
|
||||
@@ -1446,7 +1446,7 @@ make_grave(int x, int y, const char *str)
|
||||
/* Engrave the headstone */
|
||||
del_engr_at(x, y);
|
||||
if (!str)
|
||||
str = get_rnd_text(EPITAPHFILE, buf, rn2);
|
||||
str = get_rnd_text(EPITAPHFILE, buf, rn2, MD_PAD_RUMORS);
|
||||
make_engr_at(x, y, str, 0L, HEADSTONE);
|
||||
return;
|
||||
}
|
||||
|
||||
34
src/rumors.c
34
src/rumors.c
@@ -395,7 +395,11 @@ RESTORE_WARNING_FORMAT_NONLITERAL
|
||||
/* Gets a random line of text from file 'fname', and returns it.
|
||||
rng is the random number generator to use, and should act like rn2 does. */
|
||||
char *
|
||||
get_rnd_text(const char* fname, char* buf, int (*rng)(int))
|
||||
get_rnd_text(
|
||||
const char *fname,
|
||||
char *buf,
|
||||
int (*rng)(int),
|
||||
unsigned padlength)
|
||||
{
|
||||
dlb *fh;
|
||||
|
||||
@@ -404,6 +408,7 @@ get_rnd_text(const char* fname, char* buf, int (*rng)(int))
|
||||
if (fh) {
|
||||
/* TODO: cache sizetxt, starttxt, endtxt. maybe cache file contents? */
|
||||
long sizetxt = 0L, starttxt = 0L, endtxt = 0L, tidbit = 0L;
|
||||
int trylimit;
|
||||
char *endp, line[BUFSZ], xbuf[BUFSZ];
|
||||
|
||||
/* skip "don't edit" comment */
|
||||
@@ -419,14 +424,29 @@ get_rnd_text(const char* fname, char* buf, int (*rng)(int))
|
||||
that save and restore might fix the problem wouldn't be useful */
|
||||
if (sizetxt < 1L)
|
||||
return buf;
|
||||
tidbit = (*rng)(sizetxt);
|
||||
/* 'rumors' is about 3/4 of the way to the limit on a 16-bit config */
|
||||
nhassert(sizetxt <= INT_MAX); /* essential for rn2(sizetxt) */
|
||||
|
||||
/* position randomly which will probably be in the middle of a line;
|
||||
read the rest of that line, then use the next one; if there's no
|
||||
next one (ie, end of file), go back to beginning and use first */
|
||||
(void) dlb_fseek(fh, starttxt + tidbit, SEEK_SET);
|
||||
(void) dlb_fgets(line, sizeof line, fh);
|
||||
/*
|
||||
* Position randomly which will probably be in the middle of a line.
|
||||
* Read the rest of that line, then use the next one. If there's no
|
||||
* next line (ie, end of file), go back to beginning and use first.
|
||||
*
|
||||
* When short lines have been padded to length N, only accept long
|
||||
* lines if we land within last N+1 characters (+1 is for newline
|
||||
* which hasn't been stripped away yet), effectively shortening
|
||||
* them to normal length. That yields even selection distribution.
|
||||
*/
|
||||
for (trylimit = 5; trylimit > 0; --trylimit) {
|
||||
tidbit = (long) (*rng)((int) sizetxt);
|
||||
(void) dlb_fseek(fh, starttxt + tidbit, SEEK_SET);
|
||||
(void) dlb_fgets(line, sizeof line, fh);
|
||||
if (!padlength || (unsigned) strlen(line) <= padlength + 1)
|
||||
break;
|
||||
}
|
||||
/* use next line */
|
||||
if (!dlb_fgets(line, sizeof line, fh)) {
|
||||
/* assume failure is due to end-of-file; go back to start */
|
||||
(void) dlb_fseek(fh, starttxt, SEEK_SET);
|
||||
(void) dlb_fgets(line, sizeof line, fh);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user