CAI/base.c
2025-05-19 15:48:25 +02:00

247 lines
6.6 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
typedef struct {
char *content;
double *weights;
} Node;
int main(void) {
// Count number of characters
char cchar;
int size = 0;
FILE *fptr = fopen("moby.txt", "r");
if (fptr == NULL) {
perror("Error opening file");
return EXIT_FAILURE;
}
while ((cchar = fgetc(fptr)) != EOF) {
size++;
}
fseek(fptr, 0, SEEK_SET); // Reset file pointer to the beginning
// Load file
char *raw_file = malloc(size + 1); // +1 for null terminator
if (raw_file == NULL) {
perror("Memory allocation failed");
fclose(fptr);
return EXIT_FAILURE;
}
fread(raw_file, 1, size, fptr);
raw_file[size] = '\0'; // Null terminate the string
fclose(fptr);
for (int i = 0; i < size; i++) {
if (ispunct((unsigned char) raw_file[i]) && !(raw_file[i] == '\'')) {
raw_file[i] = ' ';
}
}
// Count number of words and longest word
int nb_words = 0;
int longest = 0;
int c_size = 0;
for (int i = 0; i < size; i++) {
if (isspace((unsigned char) raw_file[i]) || raw_file[i] == '\0') {
if (c_size > 0) {
nb_words++;
if (c_size > longest) {
longest = c_size;
}
c_size = 0;
}
} else {
c_size++;
}
}
// Load words
char **words = malloc(nb_words * sizeof(char *));
if (words == NULL) {
perror("Memory allocation failed");
free(raw_file);
return EXIT_FAILURE;
}
int word_index = 0;
char *token = strtok(raw_file, " \n");
while (token != NULL) {
words[word_index] = malloc(strlen(token) + 1);
if (words[word_index] == NULL) {
perror("Memory allocation failed");
free(raw_file);
for (int j = 0; j < word_index; j++) {
free(words[j]);
}
free(words);
return EXIT_FAILURE;
}
strcpy(words[word_index], token);
word_index++;
token = strtok(NULL, " \n");
}
// Count unique words
int *freq_words = calloc(nb_words, sizeof(int));
if (freq_words == NULL) {
perror("Memory allocation failed");
free(raw_file);
for (int j = 0; j < word_index; j++) {
free(words[j]);
}
free(words);
return EXIT_FAILURE;
}
int *uni_i = calloc(nb_words, sizeof(int));
if (uni_i == NULL) {
perror("Memory allocation failed");
free(raw_file);
for (int j = 0; j < word_index; j++) {
free(words[j]);
}
free(words);
free(freq_words);
return EXIT_FAILURE;
}
int uni_word = 0;
for (int i = 0; i < nb_words; i++) {
if (uni_i[i] == 0) {
uni_word++;
uni_i[i] = uni_word;
freq_words[uni_word - 1] = 1; // Initialize frequency count
for (int l = i + 1; l < nb_words; l++) {
if (strcmp(words[i], words[l]) == 0) {
uni_i[l] = uni_word; // Mark as the same unique word
freq_words[uni_word - 1]++; // Increment frequency
}
}
}
}
// Init nodes
Node *nodes = malloc(uni_word * sizeof(Node));
if (nodes == NULL) {
perror("Memory allocation failed");
free(raw_file);
for (int j = 0; j < word_index; j++) {
free(words[j]);
}
free(words);
free(freq_words);
free(uni_i);
return EXIT_FAILURE;
}
// Initialize nodes
for (int i = 0; i < uni_word; i++) {
nodes[i].content = NULL; // Initialize content to NULL
nodes[i].weights = calloc(uni_word, sizeof(double)); // Allocate weights
if (nodes[i].weights == NULL) {
perror("Memory allocation failed");
free(raw_file);
for (int j = 0; j < word_index; j++) {
free(words[j]);
}
free(words);
free(freq_words);
free(uni_i);
free(nodes);
return EXIT_FAILURE;
}
}
// Assign words to nodes
for (int i = 0; i < nb_words - 1; i++) {
int index = uni_i[i] - 1;
if (nodes[index].content == NULL) {
nodes[index].content = words[i];
}
nodes[index].weights[uni_i[i + 1] - 1]++;
}
// Normalize weights
for (int i = 0; i < uni_word; i++) {
double total = 0;
for (int l = 0; l < uni_word; l++) {
total += nodes[i].weights[l];
}
if (total > 0) {
for (int l = 0; l < uni_word; l++) {
nodes[i].weights[l] /= total; // Normalize weights
}
}
}
srand(time(NULL));
int stop = 0;
int next_words = 15556; // Starting index for the next word
double *working_memory = malloc(uni_word * sizeof(double));
int *context = malloc(50 * sizeof(int));
if (working_memory == NULL || context == NULL) {
fprintf(stderr, "Memory allocation failed\n");
return EXIT_FAILURE;
}
printf("Total words: %d\n", nb_words);
printf("Unique words: %d\n", uni_word);
printf("Longest word length: %d\n", longest);
while (stop < 50) {
// Reset working_memory for the current iteration
for (int i = 0; i < uni_word; i++) {
working_memory[i] = 1.0;
}
context[stop] = next_words;
printf("%s ", nodes[next_words].content);
// Update working_memory based on context
for (int i = 0; i <= stop; i++) {
for (int l = 0; l < uni_word; l++) {
working_memory[l] *= nodes[context[i]].weights[l];
}
}
// Find the index of the maximum value in working_memory
int max = 0; // Reset max for each iteration
for (int i = 1; i < uni_word; i++) { // Start from 1 since max is initialized to 0
if (working_memory[i] > working_memory[max]) {
max = i;
}
}
next_words = max; // Update next_words to the index of the max value
stop++;
}
printf("\n");
// Free allocated memory
free(working_memory);
free(context);
// Free allocated memory
free(raw_file);
for (int i = 0; i < word_index; i++) {
free(words[i]);
}
free(words);
free(freq_words);
free(uni_i);
for (int i = 0; i < uni_word; i++) {
free(nodes[i].weights);
free(nodes[i].content); // Free the content strings
}
free(nodes);
return 0;
}