diff --git a/base.c b/base.c index 914bbfa..804d9e7 100644 --- a/base.c +++ b/base.c @@ -3,17 +3,18 @@ #include #include #include +#include typedef struct { char *content; double *weights; } Node; -int main(void) { +int main(int argc, char *argv[]) { // Count number of characters char cchar; int size = 0; - FILE *fptr = fopen("moby.txt", "r"); + FILE *fptr = fopen(argv[1], "r"); if (fptr == NULL) { perror("Error opening file"); return EXIT_FAILURE; @@ -25,7 +26,7 @@ int main(void) { fseek(fptr, 0, SEEK_SET); // Reset file pointer to the beginning // Load file - char *raw_file = malloc(size + 1); // +1 for null terminator + char *raw_file = malloc(size + 1); // <\0> if (raw_file == NULL) { perror("Memory allocation failed"); fclose(fptr); @@ -33,16 +34,16 @@ int main(void) { } fread(raw_file, 1, size, fptr); - raw_file[size] = '\0'; // Null terminate the string + raw_file[size] = '\0'; fclose(fptr); - for (int i = 0; i < size; i++) { - if (ispunct((unsigned char) raw_file[i]) && !(raw_file[i] == '\'')) { - raw_file[i] = ' '; - } - } + //for (int i = 0; i < size; i++) { + // if (ispunct((unsigned char) raw_file[i]) && !(raw_file[i] == '\'')) { + // raw_file[i] = ' '; + // } + //} - // Count number of words and longest word + // Get nb of words int nb_words = 0; int longest = 0; int c_size = 0; @@ -88,17 +89,6 @@ int main(void) { } // Count unique words - int *freq_words = calloc(nb_words, sizeof(int)); - if (freq_words == NULL) { - perror("Memory allocation failed"); - free(raw_file); - for (int j = 0; j < word_index; j++) { - free(words[j]); - } - free(words); - return EXIT_FAILURE; - } - int *uni_i = calloc(nb_words, sizeof(int)); if (uni_i == NULL) { perror("Memory allocation failed"); @@ -107,7 +97,6 @@ int main(void) { free(words[j]); } free(words); - free(freq_words); return EXIT_FAILURE; } @@ -116,11 +105,9 @@ int main(void) { if (uni_i[i] == 0) { uni_word++; uni_i[i] = uni_word; - freq_words[uni_word - 1] = 1; // Initialize frequency count for (int l = i + 1; l < nb_words; l++) { if (strcmp(words[i], words[l]) == 0) { - uni_i[l] = uni_word; // Mark as the same unique word - freq_words[uni_word - 1]++; // Increment frequency + uni_i[l] = uni_word; } } } @@ -135,15 +122,14 @@ int main(void) { free(words[j]); } free(words); - free(freq_words); free(uni_i); return EXIT_FAILURE; } // Initialize nodes for (int i = 0; i < uni_word; i++) { - nodes[i].content = NULL; // Initialize content to NULL - nodes[i].weights = calloc(uni_word, sizeof(double)); // Allocate weights + nodes[i].content = NULL; + nodes[i].weights = calloc(uni_word, sizeof(double)); if (nodes[i].weights == NULL) { perror("Memory allocation failed"); free(raw_file); @@ -151,7 +137,6 @@ int main(void) { free(words[j]); } free(words); - free(freq_words); free(uni_i); free(nodes); return EXIT_FAILURE; @@ -167,7 +152,6 @@ int main(void) { nodes[index].weights[uni_i[i + 1] - 1]++; } - // Normalize weights for (int i = 0; i < uni_word; i++) { double total = 0; for (int l = 0; l < uni_word; l++) { @@ -175,14 +159,16 @@ int main(void) { } if (total > 0) { for (int l = 0; l < uni_word; l++) { - nodes[i].weights[l] /= total; // Normalize weights + nodes[i].weights[l] /= total; } } } - srand(time(NULL)); int stop = 0; - int next_words = 15556; // Starting index for the next word + unsigned int next_words; + getentropy(&next_words, sizeof(next_words)); + next_words = (next_words % uni_word); + printf("Starting node: %d\n", next_words); double *working_memory = malloc(uni_word * sizeof(double)); int *context = malloc(50 * sizeof(int)); @@ -195,8 +181,7 @@ int main(void) { printf("Unique words: %d\n", uni_word); printf("Longest word length: %d\n", longest); - while (stop < 50) { - // Reset working_memory for the current iteration + while (stop < 100) { for (int i = 0; i < uni_word; i++) { working_memory[i] = 1.0; } @@ -204,44 +189,30 @@ int main(void) { context[stop] = next_words; printf("%s ", nodes[next_words].content); - // Update working_memory based on context for (int i = 0; i <= stop; i++) { for (int l = 0; l < uni_word; l++) { - working_memory[l] *= nodes[context[i]].weights[l]; + working_memory[l] += nodes[context[i]].weights[l]; + if (context[i] == l){ + working_memory[l] /= uni_word; + } } } - // Find the index of the maximum value in working_memory - int max = 0; // Reset max for each iteration - for (int i = 1; i < uni_word; i++) { // Start from 1 since max is initialized to 0 + int max = 0; + for (int i = 1; i < uni_word; i++) { if (working_memory[i] > working_memory[max]) { max = i; } } - next_words = max; // Update next_words to the index of the max value + if(working_memory[max] <= 0.0){ + break; + } + next_words = max; stop++; } printf("\n"); - // Free allocated memory - free(working_memory); - free(context); - - // Free allocated memory - free(raw_file); - for (int i = 0; i < word_index; i++) { - free(words[i]); - } - free(words); - free(freq_words); - free(uni_i); - for (int i = 0; i < uni_word; i++) { - free(nodes[i].weights); - free(nodes[i].content); // Free the content strings - } - free(nodes); - return 0; }