提问人:Fakir 提问时间:11/16/2023 最后编辑:chux - Reinstate MonicaFakir 更新时间:11/16/2023 访问量:68
解析命令行输入文件名以检查内容的正确性
Parsing command line input filename to check the correctness of the content
问:
想要从命令行解析文件名并检查其正确性,例如 (1) 总长度、(2) 预期扩展名、(3) “_”位置和其他输入值。
顺序应如下:
$check.exe input_file L2A30000_0102051303042026_0001.dat
它应该检查输出文件 (L2A30000_0102051303042026_0001.dat) 是否按应有的方式键入(不是按确切的值,而是按类型和长度)。
// Function to check if a string consists of digits
int isNumeric(const char *str) {
while (*str) {
if (!isdigit(*str)) {
return 0; // Not a digit
}
str++;
}
return 1; // All characters are digits
}
int main(int argc, char *argv[]) {
// Check if the correct number of command line arguments is
provided
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
// Extract the output file name from the command line arguments
const char *outputFileName = argv[2];
// Define the expected format
char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5],
underscore1, numChar4[17], underscore2, numChar5[5],
numChar6[4], extension[4];
int result = sscanf(outputFileName,
"%c%c%c%c%4[0-9]%c%16[0-9]%c%1[0-9]%3[0-9]_%3[0-9]%4[.dat]",
&asciiChar1, &numChar1, &asciiChar2,
&numChar2, numChar3, &underscore1, numChar4, &underscore2,
numChar5, numChar6, extension);
// Debugging print statement
printf("Debug: sscanf result: %d\n", result);
printf("Debug: asciiChar1: %c\n", asciiChar1);
printf("Debug: numChar1: %c\n", numChar1);
printf("Debug: asciiChar2: %c\n", asciiChar2);
printf("Debug: numChar2: %c\n", numChar2);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: underscore1: %c\n", underscore1);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: underscore2: %c\n", underscore2);
printf("Debug: numChar5: %s\n", numChar5);
printf("Debug: numChar6: %s\n", numChar6);
printf("Debug: extension: %s\n", extension);
// Check if the extracted values match the expected format
if (result != 12 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
!isalpha(asciiChar2) || !isdigit(numChar2) ||
strlen(numChar3) != 4 || !isNumeric(numChar3) ||
strlen(numChar4) != 16 || !isNumeric(numChar4) ||
strlen(numChar5) != 4 || !isNumeric(numChar5) ||
strlen(numChar6) != 3 || !isNumeric(numChar6) ||
strlen(extension) != 3 || strcmp(extension, ".dat") != 0) {
printf("Error: Output file format is incorrect.\n");
return 1;
}
// If all checks pass, the output file format is correct
printf("Output file format is correct.\n");
return 0;
}
命令行输入:
.\check.exe inputfile L2A30000_0102051303042026_0001.dat
这是我得到的输出:
Debug: sscanf result: 9
...
Debug: numChar5: 0001
Debug: extension:
Error: Output file format is incorrect.
这是我期望的输出:
Debug: extension:.dat
这部分不工作。其他部分都OK。 想要检查文件名是否为。如果没有,它将打印错误消息并退出。extension
.dat
答:
1赞
Allan Wind
11/16/2023
#1
我建议您在格式字符串中引入一些额外的空格,并按照以下行匹配参数:
int result = sscanf(outputFileName,
"%c%c"
"%c%c"
"%4[0-9]"
"%c"
"%16[0-9]"
"%c" // underscore2
"%1[0-9]"
"%3[0-9]_%3[0-9]%4[.dat]",
&asciiChar1, &numChar1,
&asciiChar2, &numChar2,
numChar3,
&underscore1,
numChar4,
&underscore2,
numChar5,
numChar6,
extension
);
因此,我们一直到第二个下划线。然后,您需要一个数字 (),但这与变量的大小不匹配。然后再增加 3 个数字 () 就可以了。然后是输入中没有的第三个下划线。还有 3 个没有匹配参数的数字。“%4[.dat]”,这会导致缓冲区溢出,因为扩展变量是 .总共有 12 个格式指令和 11 个参数,这是未定义的行为。char numChar5[5]
char numChat6[4]
char extension[4]
您可以通过对固定字符串进行硬编码来简化它:
#include <ctype.h>
#include <stdio.h>
#include <string.h>
int isNumeric(const char *str) {
for(; isdigit(*str); str++);
return !*str;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
const char *outputFileName = argv[2];
char asciiChar1, numChar1, asciiChar2, numChar2, numChar3[5], numChar4[17], numChar5[5], extension[4];
int result = sscanf(outputFileName,
"%c%c"
"%c%c"
"%4[0-9]"
"_"
"%16[0-9]"
"_"
"%4[0-9]"
".dat",
&asciiChar1, &numChar1,
&asciiChar2, &numChar2,
numChar3,
numChar4,
numChar5
);
printf("Debug: sscanf result: %d\n", result);
printf("Debug: asciiChar1: %c\n", asciiChar1);
printf("Debug: numChar1: %c\n", numChar1);
printf("Debug: asciiChar2: %c\n", asciiChar2);
printf("Debug: numChar2: %c\n", numChar2);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: numChar5: %s\n", numChar5);
if (result != 7 || !isalpha(asciiChar1) || !isdigit(numChar1) ||
!isalpha(asciiChar2) || !isdigit(numChar2) ||
strlen(numChar3) != 4 || !isNumeric(numChar3) ||
strlen(numChar4) != 16 || !isNumeric(numChar4) ||
strlen(numChar5) != 4 || !isNumeric(numChar5)
) {
printf("Error: Output file format is incorrect.\n");
return 1;
}
printf("Output file format is correct.\n");
return 0;
}
使用示例运行:
./a.out input_file L2A30000_0102051303042026_0001.dat
Debug: sscanf result: 7
Debug: asciiChar1: L
Debug: numChar1: 2
Debug: asciiChar2: A
Debug: numChar2: 3
Debug: numChar3: 0000
Debug: numChar4: 0102051303042026
Debug: numChar5: 0001
Output file format is correct.
另一种方法是通过一个小解释器来解析文件名:is_valid_format()
is_valid_format2()
#include <ctype.h>
#include <stdio.h>
#include <string.h>
const char *alpha(const char *s) {
if(!s) return NULL;
if(!isalpha(*s)) return NULL;
return s + 1;
}
const char *digits(const char *s, size_t n) {
if(!s) return NULL;
for(size_t i = 0; i < n; i++)
if(!isdigit(s[i])) return NULL;
return s + n;
}
const char *str(const char *s, const char *s2) {
if(!s) return NULL;
size_t n = strlen(s2);
if(strncmp(s, s2, n)) return NULL;
return s + n;
}
int is_valid_filename(const char *s) {
s = alpha(s);
s = digits(s, 1);
s = alpha(s);
s = digits(s, 5);
s = str(s, "_");
s = digits(s, 16);
s = str(s, "_");
s = digits(s, 4);
s = str(s, ".dat");
return s && !*s;
}
int is_valid_filename2(const char *s) {
struct {
enum { ALPHA, DIGITS, STR } type;
union {
int n;
const char *s;
};
} format[] = {
{ ALPHA },
{ DIGITS, .n = 1 },
{ ALPHA },
{ DIGITS, .n = 5 },
{ STR, .s = "_" },
{ DIGITS, .n = 16 },
{ STR, .s = "_" },
{ DIGITS, .n = 4 },
{ STR, .s = ".dat" },
};
size_t n = sizeof format / sizeof *format;
for(size_t i = 0; s && i < n; i++) {
switch(format[i].type) {
case ALPHA:
s = alpha(s);
break;
case DIGITS:
s = digits(s, format[i].n);
break;
case STR:
s = str(s, format[i].s);
break;
}
}
return s && !*s;
}
int main(int argc, char *argv[]) {
if (argc != 3) {
printf("Usage: %s inputfile outputfile\n", argv[0]);
return 1;
}
char *result[] = { "invalid", "valid" };
printf("%s\n", result[is_valid_filename(argv[2])]);
printf("%s\n", result[is_valid_filename2(argv[2])]);
}
评论
0赞
Fakir
11/16/2023
非常感谢。经过测试,它们都工作正常。我更正了您在评论中提到的部分。但是我没有放置额外的字符 (%c) 以及 if (result != ) 值是错误的。我需要在获取数字后将数字转换为二进制。为此,您的第一个解决方案会更好吗?
0赞
Allan Wind
11/16/2023
确保通过单击复选标记来接受答案。不知道如何为您评估“更好”。您可以展开解析器以返回找到的二进制值。在解释器中,我可能会使参数使用您希望将其转换为的二进制值扩展结构。struct .. format
1赞
chux - Reinstate Monica
11/16/2023
#2
考虑简化:
"%[]"
以限制有效输入。保存到数组中。char
"%n"
保存扫描偏移量并确定字符串是否成功。- 使用字符串文字连接可以更清晰地显示复杂格式。
- 将名称更改为代码,在此处查找 A-Z、a-z。
asciiChar1
alphaChar1
- 除非首先确定扫描成功,否则不要尝试打印字符串。
请注意,要使扫描长度正确,唯一的方法是使所有字符串成功扫描到其最大宽度,并且要扫描的下一个字符为空字符。
这大大简化了测试。[]
char alphaChar1[2], numChar1[2], alphaChar2[2], numChar2[2], //
numChar3[5], numChar4[17], numChar5[5];
/* numChar6[4] Apparently not in OP's sample fileanme */
#define FMT_ALPHA "%1[A-Za-z]"
#define FMT_DIGIT "%1[0-9]"
#define FMT_EXT ".dat"
char sample[] = "L2A30000_0102051303042026_0001.dat";
#define FMT_N (sizeof sample - 1)
int n = 0;
sscanf(outputFileName, //
FMT_ALPHA FMT_DIGIT FMT_ALPHA FMT_DIGIT
"%4[0-9]" "_" "%16[0-9]" "_" "%4[0-9]" FMT_EXT "%n", //
alphaChar1, numChar1, alphaChar2, numChar2, //
numChar3, numChar4, numChar5, &n);
// Only this test needed.
if (n == FMT_N && outputFileName[FMT_N] == '\0') {
// Success
printf("Debug: alphaChar1: %c\n", alphaChar1[0]);
printf("Debug: numChar1: %c\n", numChar1[0]);
printf("Debug: alphaChar2: %c\n", alphaChar2[0]);
printf("Debug: numChar2: %c\n", numChar2[0]);
printf("Debug: numChar3: %s\n", numChar3);
printf("Debug: numChar4: %s\n", numChar4);
printf("Debug: numChar5: %s\n", numChar5);
} else {
puts("Failure");
}
输出
Debug: alphaChar1: L
Debug: numChar1: 2
Debug: alphaChar2: A
Debug: numChar2: 3
Debug: numChar3: 0000
Debug: numChar4: 0102051303042026
Debug: numChar5: 0001
评论
0赞
Fakir
11/16/2023
谢谢。测试过了。按照你说的去做。感谢你的所有建议,它也将帮助我学习。多年后使用 C。大部分都忘记了。
评论
numChar6
numChar5
001
numChar6
numChar4
4029
2026
main()
sscanf()
isalpha()