mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-05-11 05:16:19 +03:00
261 lines
7.8 KiB
D
261 lines
7.8 KiB
D
// Compiler implementation of the D programming language
|
|
// Copyright (c) 1999-2015 by Digital Mars
|
|
// All Rights Reserved
|
|
// written by Walter Bright
|
|
// http://www.digitalmars.com
|
|
// Distributed under the Boost Software License, Version 1.0.
|
|
// http://www.boost.org/LICENSE_1_0.txt
|
|
|
|
module ddmd.root.speller;
|
|
|
|
import core.stdc.limits, core.stdc.stdlib, core.stdc.string;
|
|
|
|
alias dg_speller_t = void* delegate(const(char)*, ref int);
|
|
|
|
__gshared const(char)* idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
|
|
|
|
/**************************************************
|
|
* combine a new result from the spell checker to
|
|
* find the one with the closest symbol with
|
|
* respect to the cost defined by the search function
|
|
* Input/Output:
|
|
* p best found spelling (NULL if none found yet)
|
|
* cost cost of p (INT_MAX if none found yet)
|
|
* Input:
|
|
* np new found spelling (NULL if none found)
|
|
* ncost cost of np if non-NULL
|
|
* Returns:
|
|
* true if the cost is less or equal 0
|
|
* false otherwise
|
|
*/
|
|
bool combineSpellerResult(ref void* p, ref int cost, void* np, int ncost)
|
|
{
|
|
if (np && ncost < cost)
|
|
{
|
|
p = np;
|
|
cost = ncost;
|
|
if (cost <= 0)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void* spellerY(const(char)* seed, size_t seedlen, dg_speller_t dg, const(char)* charset, size_t index, int* cost)
|
|
{
|
|
if (!seedlen)
|
|
return null;
|
|
assert(seed[seedlen] == 0);
|
|
char[30] tmp;
|
|
char* buf;
|
|
if (seedlen <= tmp.sizeof - 2)
|
|
buf = tmp.ptr;
|
|
else
|
|
{
|
|
buf = cast(char*)alloca(seedlen + 2); // leave space for extra char
|
|
if (!buf)
|
|
return null; // no matches
|
|
}
|
|
memcpy(buf, seed, index);
|
|
*cost = INT_MAX;
|
|
void* p = null;
|
|
int ncost;
|
|
/* Delete at seed[index] */
|
|
if (index < seedlen)
|
|
{
|
|
memcpy(buf + index, seed + index + 1, seedlen - index);
|
|
assert(buf[seedlen - 1] == 0);
|
|
void* np = dg(buf, ncost);
|
|
if (combineSpellerResult(p, *cost, np, ncost))
|
|
return p;
|
|
}
|
|
if (charset && *charset)
|
|
{
|
|
/* Substitutions */
|
|
if (index < seedlen)
|
|
{
|
|
memcpy(buf, seed, seedlen + 1);
|
|
for (const(char)* s = charset; *s; s++)
|
|
{
|
|
buf[index] = *s;
|
|
//printf("sub buf = '%s'\n", buf);
|
|
void* np = dg(buf, ncost);
|
|
if (combineSpellerResult(p, *cost, np, ncost))
|
|
return p;
|
|
}
|
|
assert(buf[seedlen] == 0);
|
|
}
|
|
/* Insertions */
|
|
memcpy(buf + index + 1, seed + index, seedlen + 1 - index);
|
|
for (const(char)* s = charset; *s; s++)
|
|
{
|
|
buf[index] = *s;
|
|
//printf("ins buf = '%s'\n", buf);
|
|
void* np = dg(buf, ncost);
|
|
if (combineSpellerResult(p, *cost, np, ncost))
|
|
return p;
|
|
}
|
|
assert(buf[seedlen + 1] == 0);
|
|
}
|
|
return p; // return "best" result
|
|
}
|
|
|
|
void* spellerX(const(char)* seed, size_t seedlen, dg_speller_t dg, const(char)* charset, int flag)
|
|
{
|
|
if (!seedlen)
|
|
return null;
|
|
char[30] tmp;
|
|
char* buf;
|
|
if (seedlen <= tmp.sizeof - 2)
|
|
buf = tmp.ptr;
|
|
else
|
|
{
|
|
buf = cast(char*)alloca(seedlen + 2); // leave space for extra char
|
|
if (!buf)
|
|
return null; // no matches
|
|
}
|
|
int cost = INT_MAX, ncost;
|
|
void* p = null, np;
|
|
/* Deletions */
|
|
memcpy(buf, seed + 1, seedlen);
|
|
for (size_t i = 0; i < seedlen; i++)
|
|
{
|
|
//printf("del buf = '%s'\n", buf);
|
|
if (flag)
|
|
np = spellerY(buf, seedlen - 1, dg, charset, i, &ncost);
|
|
else
|
|
np = dg(buf, ncost);
|
|
if (combineSpellerResult(p, cost, np, ncost))
|
|
return p;
|
|
buf[i] = seed[i];
|
|
}
|
|
/* Transpositions */
|
|
if (!flag)
|
|
{
|
|
memcpy(buf, seed, seedlen + 1);
|
|
for (size_t i = 0; i + 1 < seedlen; i++)
|
|
{
|
|
// swap [i] and [i + 1]
|
|
buf[i] = seed[i + 1];
|
|
buf[i + 1] = seed[i];
|
|
//printf("tra buf = '%s'\n", buf);
|
|
if (combineSpellerResult(p, cost, dg(buf, ncost), ncost))
|
|
return p;
|
|
buf[i] = seed[i];
|
|
}
|
|
}
|
|
if (charset && *charset)
|
|
{
|
|
/* Substitutions */
|
|
memcpy(buf, seed, seedlen + 1);
|
|
for (size_t i = 0; i < seedlen; i++)
|
|
{
|
|
for (const(char)* s = charset; *s; s++)
|
|
{
|
|
buf[i] = *s;
|
|
//printf("sub buf = '%s'\n", buf);
|
|
if (flag)
|
|
np = spellerY(buf, seedlen, dg, charset, i + 1, &ncost);
|
|
else
|
|
np = dg(buf, ncost);
|
|
if (combineSpellerResult(p, cost, np, ncost))
|
|
return p;
|
|
}
|
|
buf[i] = seed[i];
|
|
}
|
|
/* Insertions */
|
|
memcpy(buf + 1, seed, seedlen + 1);
|
|
for (size_t i = 0; i <= seedlen; i++) // yes, do seedlen+1 iterations
|
|
{
|
|
for (const(char)* s = charset; *s; s++)
|
|
{
|
|
buf[i] = *s;
|
|
//printf("ins buf = '%s'\n", buf);
|
|
if (flag)
|
|
np = spellerY(buf, seedlen + 1, dg, charset, i + 1, &ncost);
|
|
else
|
|
np = dg(buf, ncost);
|
|
if (combineSpellerResult(p, cost, np, ncost))
|
|
return p;
|
|
}
|
|
buf[i] = seed[i]; // going past end of seed[] is ok, as we hit the 0
|
|
}
|
|
}
|
|
return p; // return "best" result
|
|
}
|
|
|
|
/**************************************************
|
|
* Looks for correct spelling.
|
|
* Currently only looks a 'distance' of one from the seed[].
|
|
* This does an exhaustive search, so can potentially be very slow.
|
|
* Input:
|
|
* seed wrongly spelled word
|
|
* dg search delegate
|
|
* charset character set
|
|
* Returns:
|
|
* NULL no correct spellings found
|
|
* void* value returned by dg() for first possible correct spelling
|
|
*/
|
|
void* speller(const(char)* seed, scope dg_speller_t dg, const(char)* charset)
|
|
{
|
|
size_t seedlen = strlen(seed);
|
|
size_t maxdist = seedlen < 4 ? seedlen / 2 : 2;
|
|
for (int distance = 0; distance < maxdist; distance++)
|
|
{
|
|
void* p = spellerX(seed, seedlen, dg, charset, distance);
|
|
if (p)
|
|
return p;
|
|
// if (seedlen > 10)
|
|
// break;
|
|
}
|
|
return null; // didn't find it
|
|
}
|
|
|
|
unittest
|
|
{
|
|
static __gshared const(char)*** cases =
|
|
[
|
|
["hello", "hell", "y"],
|
|
["hello", "hel", "y"],
|
|
["hello", "ello", "y"],
|
|
["hello", "llo", "y"],
|
|
["hello", "hellox", "y"],
|
|
["hello", "helloxy", "y"],
|
|
["hello", "xhello", "y"],
|
|
["hello", "xyhello", "y"],
|
|
["hello", "ehllo", "y"],
|
|
["hello", "helol", "y"],
|
|
["hello", "abcd", "n"],
|
|
["hello", "helxxlo", "y"],
|
|
["hello", "ehlxxlo", "n"],
|
|
["hello", "heaao", "y"],
|
|
["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"],
|
|
[null, null, null]
|
|
];
|
|
//printf("unittest_speller()\n");
|
|
|
|
void* dgarg;
|
|
|
|
void* speller_test(const(char)* s, ref int cost)
|
|
{
|
|
//printf("speller_test(%s, %s)\n", dgarg, s);
|
|
cost = 0;
|
|
if (strcmp(cast(char*)dgarg, s) == 0)
|
|
return dgarg;
|
|
return null;
|
|
}
|
|
|
|
dgarg = cast(char*)"hell";
|
|
const(void)* p = speller(cast(const(char)*)"hello", &speller_test, idchars);
|
|
assert(p !is null);
|
|
for (int i = 0; cases[i][0]; i++)
|
|
{
|
|
//printf("case [%d]\n", i);
|
|
dgarg = cast(void*)cases[i][1];
|
|
void* p2 = speller(cases[i][0], &speller_test, idchars);
|
|
if (p2)
|
|
assert(cases[i][2][0] == 'y');
|
|
else
|
|
assert(cases[i][2][0] == 'n');
|
|
}
|
|
//printf("unittest_speller() success\n");
|
|
}
|