#include #include #include #include #include #include typedef struct { char *content; double *weights; } Node; int main(int argc, char *argv[]) { // Count number of characters char cchar; int size = 0; FILE *fptr = fopen(argv[1], "r"); if (fptr == NULL) { perror("Error opening file"); return EXIT_FAILURE; } while ((cchar = fgetc(fptr)) != EOF) { size++; } fseek(fptr, 0, SEEK_SET); // Reset file pointer to the beginning // Load file char *raw_file = malloc(size + 1); // <\0> if (raw_file == NULL) { perror("Memory allocation failed"); fclose(fptr); return EXIT_FAILURE; } fread(raw_file, 1, size, fptr); raw_file[size] = '\0'; fclose(fptr); //for (int i = 0; i < size; i++) { // if (ispunct((unsigned char) raw_file[i]) && !(raw_file[i] == '\'')) { // raw_file[i] = ' '; // } //} // Get nb of words int nb_words = 0; int longest = 0; int c_size = 0; for (int i = 0; i < size; i++) { if (isspace((unsigned char) raw_file[i]) || raw_file[i] == '\0') { if (c_size > 0) { nb_words++; if (c_size > longest) { longest = c_size; } c_size = 0; } } else { c_size++; } } // Load words char **words = malloc(nb_words * sizeof(char *)); if (words == NULL) { perror("Memory allocation failed"); free(raw_file); return EXIT_FAILURE; } int word_index = 0; char *token = strtok(raw_file, " \n"); while (token != NULL) { words[word_index] = malloc(strlen(token) + 1); if (words[word_index] == NULL) { perror("Memory allocation failed"); free(raw_file); for (int j = 0; j < word_index; j++) { free(words[j]); } free(words); return EXIT_FAILURE; } strcpy(words[word_index], token); word_index++; token = strtok(NULL, " \n"); } // Count unique words int *uni_i = calloc(nb_words, sizeof(int)); if (uni_i == NULL) { perror("Memory allocation failed"); free(raw_file); for (int j = 0; j < word_index; j++) { free(words[j]); } free(words); return EXIT_FAILURE; } int uni_word = 0; for (int i = 0; i < nb_words; i++) { if (uni_i[i] == 0) { uni_word++; uni_i[i] = uni_word; for (int l = i + 1; l < nb_words; l++) { if (strcmp(words[i], words[l]) == 0) { uni_i[l] = uni_word; } } } } // Init nodes Node *nodes = malloc(uni_word * sizeof(Node)); if (nodes == NULL) { perror("Memory allocation failed"); free(raw_file); for (int j = 0; j < word_index; j++) { free(words[j]); } free(words); free(uni_i); return EXIT_FAILURE; } // Initialize nodes for (int i = 0; i < uni_word; i++) { nodes[i].content = NULL; nodes[i].weights = calloc(uni_word, sizeof(double)); if (nodes[i].weights == NULL) { perror("Memory allocation failed"); free(raw_file); for (int j = 0; j < word_index; j++) { free(words[j]); } free(words); free(uni_i); free(nodes); return EXIT_FAILURE; } } // Assign words to nodes for (int i = 0; i < nb_words - 1; i++) { int index = uni_i[i] - 1; if (nodes[index].content == NULL) { nodes[index].content = words[i]; } nodes[index].weights[uni_i[i + 1] - 1]++; } for (int i = 0; i < uni_word; i++) { double total = 0; for (int l = 0; l < uni_word; l++) { total += nodes[i].weights[l]; } if (total > 0) { for (int l = 0; l < uni_word; l++) { nodes[i].weights[l] /= total; } } } int stop = 0; unsigned int next_words; getentropy(&next_words, sizeof(next_words)); next_words = (next_words % uni_word); printf("Starting node: %d\n", next_words); double *working_memory = malloc(uni_word * sizeof(double)); int *context = malloc(50 * sizeof(int)); if (working_memory == NULL || context == NULL) { fprintf(stderr, "Memory allocation failed\n"); return EXIT_FAILURE; } printf("Total words: %d\n", nb_words); printf("Unique words: %d\n", uni_word); printf("Longest word length: %d\n", longest); while (stop < 100) { for (int i = 0; i < uni_word; i++) { working_memory[i] = 1.0; } context[stop] = next_words; printf("%s ", nodes[next_words].content); for (int i = 0; i <= stop; i++) { for (int l = 0; l < uni_word; l++) { working_memory[l] += nodes[context[i]].weights[l]; if (context[i] == l){ working_memory[l] /= uni_word; } } } int max = 0; for (int i = 1; i < uni_word; i++) { if (working_memory[i] > working_memory[max]) { max = i; } } if(working_memory[max] <= 0.0){ break; } next_words = max; stop++; } printf("\n"); return 0; }