C（范围 1-100,000,000）最大循环的 Collatz 猜想-解网

问：

我正在制作一个程序，用于计算数字范围（1 - 100,000,000）之间 Collatz 猜想的最多循环，我需要它在 4 分钟内在 linux 系统上运行，并且

command gcc -O0 -m32 -Wall -Wextra -Werror -pedantic -o collatz collatz.c.

关于如何改进算法的任何想法，因为在 10,000,000 之后，程序需要很长时间才能完成，请记住，我不能使用多线程或预先计算的数据。

欢迎所有回答，感谢您抽出宝贵时间接受采访。 😀

// Lets find the longest collatz
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

// This function finds how many times it took for a number to reach 1
unsigned long int collatz(uint32_t First_num) {
    // steps measures how many times a number needs to reach 1
    uint32_t steps = 1;
    // This is the loop that applies the rules of the collatz conjecture
    while (First_num != 1) {
        (First_num % 2 == 0) ? (First_numm >>= 1) : (First_num = (3 * First_num + 1) / 2);
        steps++;
    }
    return steps;
}

// This is the function that calculates the maximum loops a number needs to reach 1
int max(uint32_t First_num, uint32_t Second_num) {
    // This "if" determines if the number is correct
    if (First_num <= 0 || Second_num <= 0) {
        printf("0\n");
    }
    uint32_t max = 0;
    uint32_t max_num = 0;
    uint32_t i = First_num;
    // This loop is used to find the maximum times the collatz function needs to be used
    for (i; i <= Second_num; i++) {
        printf("The num %u took the most %ld!!!\n", i, collatz(i));
        // This block finds the number with the most loops
        if ((collatz(i)) >= max) {                                                                                                                                                                                                               
            max = collatz(i);                                                                                              
            max_num = i;
        }
    }
    printf("The num %u took the most %u !!!\n", max_num, max);                                                                                                                                                                        
    return 0;                                                                                                                                                                                                       
}
                                                                                                                                                                                                                               
// This is the main function in which the "max" function is used and the user gives his input and gets his output                                                                                                               
int main(int argc, char **argv) {                                                                                                                                                                                                        
    if (argc != 3) {                                                                                                                                                                                                                         
        printf("Program needs to be called as './prog num1 num2'\n");                                                                                                                                                                   
        return 1;                                                                                                                                                                                                               
    }                                                                                                                                                                                                                       
    //Users input                                                                                                                                                                                                                   
    uint32_t num1 = atoi(argv[1]);                                                                                                                                                                                                  
    uint32_t num2 = atoi(argv[2]);                                                                                                                                                                                                      
    // Users input                                                                                                                                                                                                                  
    max(num1, num2);                                                                                                                                                                                                                 
    return 0;                                                                                                                                                                                                                   
}                                                                                                                                                                                                                               
//End of the program

C 处理效率 Collatz

你怎么知道它花了太长时间太完整——你是真的运行它完成，还是你只是让它运行了一段时间，有时超过 4 分钟，然后决定它花了太长时间？因为许多 Collatz 序列在变小之前会变大，所以可以想象您的程序超出了，由于换行而得到了不正确的结果，并因此进入了无限循环。你检查过吗？uint32_t

2赞 Eric Postpischil 10/21/2023

if ((collatz(i)) >= max){                                                                                                                                                                                                                            max = collatz(i);

不好，因为它评估了两次，重复工作。记住第一次调用的结果。collatz(i)

1赞 Jonathan Leffler 10/22/2023

@EricPostpischil：我发现，令我完全懊恼的是，我使用的makefile根本没有优化代码。使用，我的代码和您的代码的运行时间分别约为 27-28 秒，而记忆版本大约需要 5 秒。您的代码通常快约 1 秒，我将这归因于我做了更多的工作，因为它跟踪了更多的统计数据。-O3

答：

5赞 chux - Reinstate Monica 10/21/2023 #1

错误

3 * First_num + 1容易溢出。需要更多的工作（更广泛的类型）。@Eric Postpischil

删除调试输出

for (i;i<=Second_num;i++){
  // printf("The num %u took the most %ld!!!\n",i,collatz(i))

关于如何改进算法的任何想法，因为在 10,000,000 之后，程序需要很长时间才能完成

避免重新计算

// if ((collatz(i)) >= max) {
//   max = collatz(i);
unsigned long cc = collatz(i);
if (cc >= max) {                                                                                                                                                                                                               
  max = cc

保存以前的工作

（或至少其中的一部分。

#define PRIOR_N (100u*1000*1000)
static uint32_t prior[PRIOR_N];

// This function finds how many times it took for a number to reach 1
unsigned long int collatz(uint32_t First_num) {
  if (First_num < PRIOR_N && prior[First_num] && First_num > 1) {
    return prior[First_num];
  }
  ...

返工后防止溢出：

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>

#define PRIOR_N (100u*1000*1000)
static uint32_t prior[PRIOR_N];

// This function finds how many times it took for a number to reach 1
unsigned collatz(uint64_t n) {
  if (n < PRIOR_N && prior[n]) {
    return prior[n]; // Use prior work if available.
  }
  // Applies the rules of the Collatz conjecture.
  if (n > 1) {
    unsigned cc;
    if(n % 2 == 0) {
      cc = collatz(n >> 1) + 1;
    } else {
      if (n >= (ULLONG_MAX - 1)/3) {
        fprintf(stderr, "Overflow\n");
        return UINT_MAX/2;
      }
      cc = collatz((3 * n + 1) / 2) + 1;
    }
    if (n < PRIOR_N) {
      prior[n] = cc;  // Save work.
    }
    return cc;
  }
  return n;
}

// 4.5 seconds.
The num 63728127 took the most 593 !!!

经验：反向迭代顺序

我把顺序颠倒了，从多到少，花了 1/6 的时间。（总计 3.75 秒）待定原因。

题外话：OP 的代码因类型更改而松散。改进的代码可以避免这种情况。

用调试输出对程序进行计时是虚假的：生成输出可能需要比计算本身更长的时间。
如发布的那样，计算 3 次：每次迭代计算一次值并将其存储在临时变量中，将毫不费力地减少时间。collatz(i)
在禁用优化的情况下对程序进行计时仅具有教学价值：在我的 M2 macbook 上，1 亿的执行时间是 -O3 的 37.2 秒，而 -O0 的执行时间略长，这非常令人惊讶。当 gcc 生成 32 位代码时，时序差异可能更为重要。
你做了一个沉默的假设，即没有中间值超过的范围，这对于大于 1 亿的起点来说并不明显。事实上，不少于 961803 个数字会产生至少一个数字大于或等于 2³² 的链！uint32_t

使用 64 位算术会生成不同的输出：
```
The num 63728127 took the most 593 !!!
```
在奇数情况下要更新的公式可以简化为。First_numFirst_num += (First_num + 1) / 2
您甚至可以使用无分支公式来更新：First_num
```
First_num = (First_num & -(First_num % 2)) + (First_num + 1) / 2;
```
但是，这似乎不会产生任何运行时收益。

考虑到上述备注，以下修改版本大约在 28 秒内完成：

// This function finds how many times it took for a number to reach 1
unsigned long int collatz(uint32_t First_num) {
    // steps measures how many times a number needs to reach 1
    unsigned long int steps = 1;
    // This is the loop that applies the rules of the collatz conjecture
    unsigned long long n = First_num;
    while (n > 1) {
        if (n % 2 == 0) {
            n >>= 1;
        } else {
            // Detect potential overflow
            if (n > ULLONG_MAX - ULLONG_MAX / 2) {
                printf("overflow: %u -> %llu\n", First_num, n);
                return 0;
            }
            n += (n + 1) / 2;
        }
        steps++;
    }
    return steps;
}

添加数组来缓存以前的结果可将时间缩短 10 倍，即 1 亿：

// This function finds how many times it took for a number to reach 1
unsigned long int collatz(uint32_t First_num) {
#define CACHE_SIZE  100000000
#if CACHE_SIZE
    static uint32_t cache[CACHE_SIZE];
#endif
    // steps measures how many times a number needs to reach 1
    unsigned long int steps = 1;
    // This is the loop that applies the rules of the collatz conjecture
    unsigned long long n = First_num;
    while (n > 1) {
#if CACHE_SIZE
        if (n < CACHE_SIZE && cache[n]) {
            steps += cache[n] - 1;
            break;
        }
#endif
        if (n % 2 == 0) {
            n >>= 1;
        } else {
            // Detect potential overflow
            if (n > ULLONG_MAX - ULLONG_MAX / 2) {
                printf("overflow: %u -> %llu\n", First_num, n);
                return 0;
            }
            n += (n + 1) / 2;
        }
        steps++;
    }
#if CACHE_SIZE
    if (First_num < CACHE_SIZE) {
        cache[First_num] = steps;
    }
#endif
    return steps;
}

C（范围 1-100,000,000）最大循环的 Collatz 猜想

Collatz conjecture with C (range 1-100,000,000) maximum loops

评论

评论

评论