提问人:rvevau 提问时间:10/19/2023 更新时间:10/20/2023 访问量:103
在 C 中将两个单词合并为一个单词
Combine two words into one in C
问:
任务: 从输入流中读取几行。 第一行包含数字 N - 剩余行数(测试),N < 32。 接下来的 N 行中的每一行都包含两个用空格分隔的单词。(每个单词的长度不超过32个)。 有必要为每对单词获取一个新单词,以便第一个单词的结尾与第二个单词的开头重合,例如,mountain + insane = mountainsane。此字应输出到标准输出流。 如果可以通过多种方式连接单词,则应选择提供最大公共部分的一种,例如 papa + papaha = papaha(不是 papapapaha)。
注意:使用“%c”格式逐个字符输出最终字符串是不可接受的。必须使用“%s”格式形成并输出整个字符串。
代码:
#include <stdio.h>
#include <string.h>
int word_length(char* str)
{
unsigned short int i;
for (i = 0; str[i] != '\0'; i++);
return i;
}
int main() {
char word_1[32], word_2[32], line[64][64];
unsigned short int n, line_number, i, j, k;
scanf("%hu", &n);
for(line_number = 0; line_number < n; line_number++){
scanf("%s %s", word_1, word_2);
if(word_2[0] == word_1[0] && word_2[1] == word_1[1]){
for(i = 0; i < word_length(word_2); i++){
line[line_number][i] = word_2[i];
}
}
else{
for(j = 0; j < word_length(word_1); j++){
if(word_2[0] != word_1[j]){
line[line_number][j] = word_1[j];
}
else{
line[line_number][j] = word_2[j];
break;
}
}
for(k = j; k <= word_length(word_2); k++){
line[line_number][k] = word_2[k - 1];
}
}
}
printf("\n");
for(line_number = 0; line_number < n; line_number++){
printf("%s\n", line[line_number]);
}
return 0;
}
基本思想是按照第一个单词,寻找与第二个单词的第一个字母的匹配。如果字母不同,则写下单词 1 中的字母,如果它们匹配,则写下单词 2 中的所有字母。但是,我不确定这个逻辑是否正确。
尝试 | 期望 | 有 |
---|---|---|
爸爸妈妈 | 爸爸妈妈 | 爸爸 |
山疯了 | 山地 | 山 |
玩瑜伽 | 普拉奥加 | 普拉加 |
爸爸帕帕哈 | 木瓜 | 木瓜 |
答:
我不知道它是否 100% 正确(我不确定任务本身 [描述太简短,没有解释很多极端情况])并且没有太多时间检查,但它适用于您的测试用例。
- 使用函数。
- 不要使简单的任务过于复杂。
char *combine(char *w1, const char *w2)
{
char *dest = w1;
while(*w1)
{
if(*w1 && *w2)
{
if(*w1 == *w2) w2++;
}
if(*w1) w1++;
if(!*w1) strcpy(w1, w2);
}
return dest;
}
int main(void)
{
printf("`%s`\n", combine((char[100]){"papa"}, "mama"));
printf("`%s`\n", combine((char[100]){"mountain"}, "insane"));
printf("`%s`\n", combine((char[100]){"play"}, "yoga"));
printf("`%s`\n", combine((char[100]){"papa"}, "papaha"));
printf("`%s`\n", combine((char[100]){"papa"}, ""));
printf("`%s`\n", combine((char[100]){""}, "papaha"));
printf("`%s`\n", combine((char[100]){""}, ""));
}
https://godbolt.org/z/xP1nsazMG
输出:
`papamama`
`mountainsane`
`playoga`
`papaha`
`papa`
`papaha`
``
评论
if(*w1 && *w2)
简化为 和 --> 。if(*w2)
if(*w1) w1++;
w1++;
关于OP代码:
- 垂直空间过多(缩进 2xSP)使其难以阅读。
- 无缘无故地重写。( 已被收录。
strlen()
string.h
char w1[32];
(和 ) 不考虑尾随的 NUL。w2
char line[64][64];
是完全不合适的。- 不验证来自 的返回代码。
scanf()
- 循环似乎只是对解决方案的猜测。
没有规定在端接之前必须对所有线路进行缓冲才能输出。每行都可以/应该输出,因为它是由两个单词组成的。
下面是有效的带注释的代码(使用编译的“字符串”,而不是从文件中读取输入):
#include <stdio.h>
#include <string.h>
char *combine( char *dest, char *w1, const char *w2 ) {
/* omitting checks for NULL pointers for brevity */
/* all string buffers presumed to be of sufficient size */
strcpy( dest, w1 ); // "word1" is always present in output
size_t len1 = strlen( w1 ); // get both lengths
size_t len2 = strlen( w2 );
// determine minimum of lengths
size_t len = len1 < len2 ? len1 : len2;
// point into destination at that negative offset from trailing end
// this is the point of maximum possible "overlap"
char *cp = dest + len1 - len;
// loop attempting to match that many characters
while( *cp ) {
if( strncmp( cp, w2, len ) == 0 ) {
// matched "suffix" with "prefix"!!
strcpy( cp, w2 ); // overlay all of "word2"
return dest; // all done
}
cp++; // shorten suffix being tested
len--; // reduce length being compared
}
strcat( dest, w2 ); // no commonality, so concatenate
return dest;
}
int main(void) {
char *tests[][3] = {
{ "papa", "mama", "papamama" }, // w1, w2 and expected result
{ "mountain", "insane", "mountainsane" },
{ "play", "yoga", "playoga" },
{ "papa", "papaha", "papaha" },
{ "", "foobar", "foobar" },
{ "foobar", "", "foobar" },
{ "foobarfoo", "bar", "foobarfoobar" },
};
size_t nTest = sizeof tests/sizeof tests[0];
char dest[ 64 + 1 ]; // Account for trailing NUL
for( size_t i = 0; i < nTest; i++ )
printf( "'%s' + '%s'\n\t'%s'\n\t[%s]\n",
tests[i][0], tests[i][1],
combine( dest, tests[i][0], tests[i][1] ),
tests[i][2] );
return 0;
}
输出:
'papa' + 'mama'
'papamama'
[papamama]
'mountain' + 'insane'
'mountainsane'
[mountainsane]
'play' + 'yoga'
'playoga'
[playoga]
'papa' + 'papaha'
'papaha'
[papaha]
'' + 'foobar'
'foobar'
[foobar]
'foobar' + ''
'foobar'
[foobar]
'foobarfoo' + 'bar'
'foobarfoobar'
[foobarfoobar]
挑战的拐杖是识别并从“最大可能的重叠”开始,并从那里开始工作......
新的一天,代码调整:可以缩短函数的循环以简化代码:while()
// while not matching remaining characters
while( *cp && strncmp( cp, w2, len ) ) {
cp++; // shorten suffix being tested
len--; // reduce length being compared
}
strcpy( cp, w2 ); // put w2 here
return dest;
最后我想出了这个程序
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char* mergeWords(char* word1, char* word2) {
int len1 = strlen(word1);
int len2 = strlen(word2);
int maxSize = len1 + len2 + 1;
char* merged = (char*)malloc(maxSize * sizeof(char));
int commonLen = 0;
for (int i = 0; i < len1; i++) {
int match = 1;
for (int j = 0; j < len1 - i; j++) {
if (word1[i + j] != word2[j]) {
match = 0;
break;
}
}
if (match) {
commonLen = len1 - i;
break;
}
}
strncpy(merged, word1, len1 - commonLen);
merged[len1 - commonLen] = '\0';
strcat(merged, word2);
return merged;
}
int main() {
int n;
scanf("%d", &n);
getchar();
for (int i = 0; i < n; i++) {
char word1[33], word2[33];
scanf("%s %s", word1, word2);
getchar();
char* merged = mergeWords(word1, word2);
printf("%s\n", merged);
free(merged);
}
return 0;
}
评论
strncpy
scanf("%s %s", word1, word2);
代码中存在多个问题:
word_length
是一个虚假的重新实现,在长度超过 的字符串上有一个无限循环。你为什么不使用?strlen
USHORT_MAX
strlen
char word_1[32], word_2[32]
定义对于长度为 '32 个字符的单词来说太短的数组,则 null 终止符至少需要一个额外的元素。char line[64][64]
定义了一个二维数组,该数组对于该程序来说不是必需的,因为您可以在读取单词后立即输出组合的单词。此外,第二个维度至少应该处理每个 32 个字符的非重叠单词的串联(你需要空终止符的空间),并且任务规范说你最多有测试用例,因此第一个维度不需要超过 .65
32
32
您应该测试是否无法根据格式字符串转换输入并退出程序。
scanf()
scanf("%s %s", word_1, word_2);
如果输入包含长度超过 32 个字节的字,则不防止缓冲区溢出。假设数组的长度为 ,则应按以下方式指定要存储到目标数组中的最大字符数:33
scanf("%32s %32s", word_1, word_2);
重叠检测环路不正确:
如果前 2 个字节相同,则只需复制并省略 null 终止符,因此组合 and 将产生 而不是 。
word_2
"aa1"
"aa"
"aa"
"aa1aa"
否则,您将复制与第一个字符不同的所有字符,然后尝试复制其余字符,而无需进一步测试。这也是不正确的,因为 和 导致 而不是 。
word_1
word_2
word_2
"ba1"
"ab"
"bab"
"ba1ab"
以下是单元测试的简单替代方案:
#include <stdio.h>
#include <string.h>
char *catenate_no_overlap(char *dest, const char *s1, const char *s2) {
if (!s1) s1 = "";
if (!s2) s2 = "";
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
// determine the maximum overlap between the end of s1 and the beginning of s2
size_t overlap = len1 < len2 ? len1 : len2;
while (overlap > 0 && memcmp(s1 + len1 - overlap, s2, overlap))
overlap--;
// copy the bytes from s1
memcpy(dest, s1, len1);
// copy the non overlapping part of s2 and the null terminator
memcpy(dest + len1, s2 + overlap, len2 - overlap + 1);
return dest;
}
int main(void) {
#ifdef TESTING
const char *tests[][3] = {
// tests from the OP question
{ "papa", "mama", "papamama" },
{ "mountain", "insane", "mountainsane" },
{ "play", "yoga", "playoga" },
{ "papa", "papaha", "papaha" },
// tests for corner cases
{ "", "", "" },
{ "", "hello", "hello" },
{ "hello", "", "hello" },
{ "hello", "hello", "hello" },
{ NULL, NULL, "" },
{ NULL, "hello", "hello" },
{ "hello", NULL, "hello" },
{ "ababa", "babab", "ababab" },
};
int numTests = sizeof(tests) / sizeof(tests[0]);
int fails = 0;
for (int i = 0; i < numTests; i++) {
char buf[64 + 1];
char *dest = catenate_no_overlap(buf, tests[i][0], tests[i][1]);
if (strcmp(dest, tests[i][2])) {
printf("test %d failed: \"%s\", \"%s\" -> \"%s\", expected \"%s\"\n",
i, tests[i][0], tests[i][1], dest, tests[i][2]);
fails++;
}
}
if (fails) {
printf("Test failures: %d / %d\n", fail, numTests);
return 1;
} else {
printf("All tests passed: %d / %d\n", numTests, numTests);
return 0;
}
#else
char word_1[32 + 1], word_2[32 + 1], dest[64 + 1];
int n;
if (scanf("%d", &n) != 1)
return 1;
while (n-- > 0) {
if (scanf("%32s %32s", word_1, word_2) != 2)
return 1;
printf("%s\n", catenate_no_overlap(dest, word_1, word_2));
}
return 0;
#endif
}
评论
char word_1[32], word_2[32]
if(word_2[0] == word_1[0] && word_2[1] == word_1[1])