From f78215f4cafbe9f1170dc3f996d56393777658e8 Mon Sep 17 00:00:00 2001 From: Joachim Date: Mon, 19 May 2025 15:48:25 +0200 Subject: [PATCH] Actualiser base.c --- base.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 209 insertions(+), 50 deletions(-) diff --git a/base.c b/base.c index 854abd3..914bbfa 100644 --- a/base.c +++ b/base.c @@ -1,88 +1,247 @@ #include #include #include +#include +#include -int main(void){ +typedef struct { + char *content; + double *weights; +} Node; - // Count nb of char +int main(void) { + // Count number of characters char cchar; int size = 0; - FILE *fptr = fopen("test.txt", "r"); - while ((cchar = fgetc(fptr)) != EOF){ - size += 1; + FILE *fptr = fopen("moby.txt", "r"); + if (fptr == NULL) { + perror("Error opening file"); + return EXIT_FAILURE; } + while ((cchar = fgetc(fptr)) != EOF) { + size++; + } + fseek(fptr, 0, SEEK_SET); // Reset file pointer to the beginning + // Load file - char raw_file[size]; - fclose(fptr); - fptr = fopen("test.txt", "r"); - int i = 0; - while ((cchar = fgetc(fptr)) != EOF){ - raw_file[i] = cchar; - i++; + char *raw_file = malloc(size + 1); // +1 for null terminator + if (raw_file == NULL) { + perror("Memory allocation failed"); + fclose(fptr); + return EXIT_FAILURE; } + + fread(raw_file, 1, size, fptr); + raw_file[size] = '\0'; // Null terminate the string fclose(fptr); - // Count nb of words and longest word + for (int i = 0; i < size; i++) { + if (ispunct((unsigned char) raw_file[i]) && !(raw_file[i] == '\'')) { + raw_file[i] = ' '; + } + } + + // Count number of words and longest word int nb_words = 0; int longest = 0; - int c_size; - for (i = 0; i < size - 1; i++){ - if ( raw_file[i] == 0 && raw_file[i + 1] != ' ' ) { - nb_words += 1; - if (c_size > longest){ - longest = c_size; + int c_size = 0; + + for (int i = 0; i < size; i++) { + if (isspace((unsigned char) raw_file[i]) || raw_file[i] == '\0') { + if (c_size > 0) { + nb_words++; + if (c_size > longest) { + longest = c_size; + } + c_size = 0; } - c_size = 0; } else { c_size++; } } // Load words - char words[nb_words][longest]; - int word = 0; - int chr = 0; - for (i = 0; i < size - 1; i++){ - if ( raw_file[i] == 0 && raw_file[i + 1] != ' ' ) { - word++; - chr = 0; - } else { - char words[word][chr] = raw_file[i]; - chr++; + char **words = malloc(nb_words * sizeof(char *)); + if (words == NULL) { + perror("Memory allocation failed"); + free(raw_file); + return EXIT_FAILURE; + } + + int word_index = 0; + char *token = strtok(raw_file, " \n"); + while (token != NULL) { + words[word_index] = malloc(strlen(token) + 1); + if (words[word_index] == NULL) { + perror("Memory allocation failed"); + free(raw_file); + for (int j = 0; j < word_index; j++) { + free(words[j]); + } + free(words); + return EXIT_FAILURE; } + strcpy(words[word_index], token); + word_index++; + token = strtok(NULL, " \n"); } // Count unique words - int freq_words[nb_words]; - memset(freq_words, 0, nb_words * sizeof(int)); + int *freq_words = calloc(nb_words, sizeof(int)); + if (freq_words == NULL) { + perror("Memory allocation failed"); + free(raw_file); + for (int j = 0; j < word_index; j++) { + free(words[j]); + } + free(words); + return EXIT_FAILURE; + } + + int *uni_i = calloc(nb_words, sizeof(int)); + if (uni_i == NULL) { + perror("Memory allocation failed"); + free(raw_file); + for (int j = 0; j < word_index; j++) { + free(words[j]); + } + free(words); + free(freq_words); + return EXIT_FAILURE; + } + int uni_word = 0; - for (i = 0; i < nb_words; i++){ - if (freq_words[i] == 0){ + for (int i = 0; i < nb_words; i++) { + if (uni_i[i] == 0) { uni_word++; - freq_words[i] = uni_word; - for (int l = 0; l < nb_words; l++){ - int k = 0; - while ( words[i][k] == words[l][k] ){ - k += 1; - } - if (k == longest){ - freq_words[l] = uni_word; + uni_i[i] = uni_word; + freq_words[uni_word - 1] = 1; // Initialize frequency count + for (int l = i + 1; l < nb_words; l++) { + if (strcmp(words[i], words[l]) == 0) { + uni_i[l] = uni_word; // Mark as the same unique word + freq_words[uni_word - 1]++; // Increment frequency } } } } - // Init node - Node nodes[uni_word]; - int c = 0; - for (i = 0; i < uni_word; i++){ - nodes[uni_word].weights[uni_word]; - memset(nodes[uni_word].weights, 0, uni_word * sizeof(int)); + // Init nodes + Node *nodes = malloc(uni_word * sizeof(Node)); + if (nodes == NULL) { + perror("Memory allocation failed"); + free(raw_file); + for (int j = 0; j < word_index; j++) { + free(words[j]); + } + free(words); + free(freq_words); + free(uni_i); + return EXIT_FAILURE; } - for (i = 0; i < nb_words; i++){ - nodes[freq_words[i]].content = words[i]; + + // Initialize nodes + for (int i = 0; i < uni_word; i++) { + nodes[i].content = NULL; // Initialize content to NULL + nodes[i].weights = calloc(uni_word, sizeof(double)); // Allocate weights + if (nodes[i].weights == NULL) { + perror("Memory allocation failed"); + free(raw_file); + for (int j = 0; j < word_index; j++) { + free(words[j]); + } + free(words); + free(freq_words); + free(uni_i); + free(nodes); + return EXIT_FAILURE; + } } + // Assign words to nodes + for (int i = 0; i < nb_words - 1; i++) { + int index = uni_i[i] - 1; + if (nodes[index].content == NULL) { + nodes[index].content = words[i]; + } + nodes[index].weights[uni_i[i + 1] - 1]++; + } + + // Normalize weights + for (int i = 0; i < uni_word; i++) { + double total = 0; + for (int l = 0; l < uni_word; l++) { + total += nodes[i].weights[l]; + } + if (total > 0) { + for (int l = 0; l < uni_word; l++) { + nodes[i].weights[l] /= total; // Normalize weights + } + } + } + + srand(time(NULL)); + int stop = 0; + int next_words = 15556; // Starting index for the next word + double *working_memory = malloc(uni_word * sizeof(double)); + int *context = malloc(50 * sizeof(int)); + + if (working_memory == NULL || context == NULL) { + fprintf(stderr, "Memory allocation failed\n"); + return EXIT_FAILURE; + } + + printf("Total words: %d\n", nb_words); + printf("Unique words: %d\n", uni_word); + printf("Longest word length: %d\n", longest); + + while (stop < 50) { + // Reset working_memory for the current iteration + for (int i = 0; i < uni_word; i++) { + working_memory[i] = 1.0; + } + + context[stop] = next_words; + printf("%s ", nodes[next_words].content); + + // Update working_memory based on context + for (int i = 0; i <= stop; i++) { + for (int l = 0; l < uni_word; l++) { + working_memory[l] *= nodes[context[i]].weights[l]; + } + } + + // Find the index of the maximum value in working_memory + int max = 0; // Reset max for each iteration + for (int i = 1; i < uni_word; i++) { // Start from 1 since max is initialized to 0 + if (working_memory[i] > working_memory[max]) { + max = i; + } + } + + next_words = max; // Update next_words to the index of the max value + stop++; + } + + printf("\n"); + + // Free allocated memory + free(working_memory); + free(context); + + // Free allocated memory + free(raw_file); + for (int i = 0; i < word_index; i++) { + free(words[i]); + } + free(words); + free(freq_words); + free(uni_i); + for (int i = 0; i < uni_word; i++) { + free(nodes[i].weights); + free(nodes[i].content); // Free the content strings + } + free(nodes); + return 0; }