Actualiser base.c
This commit is contained in:
parent
f50ab4512c
commit
f78215f4ca
1 changed files with 209 additions and 50 deletions
255
base.c
255
base.c
|
@ -1,88 +1,247 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
int main(void){
|
typedef struct {
|
||||||
|
char *content;
|
||||||
|
double *weights;
|
||||||
|
} Node;
|
||||||
|
|
||||||
// Count nb of char
|
int main(void) {
|
||||||
|
// Count number of characters
|
||||||
char cchar;
|
char cchar;
|
||||||
int size = 0;
|
int size = 0;
|
||||||
FILE *fptr = fopen("test.txt", "r");
|
FILE *fptr = fopen("moby.txt", "r");
|
||||||
while ((cchar = fgetc(fptr)) != EOF){
|
if (fptr == NULL) {
|
||||||
size += 1;
|
perror("Error opening file");
|
||||||
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while ((cchar = fgetc(fptr)) != EOF) {
|
||||||
|
size++;
|
||||||
|
}
|
||||||
|
fseek(fptr, 0, SEEK_SET); // Reset file pointer to the beginning
|
||||||
|
|
||||||
// Load file
|
// Load file
|
||||||
char raw_file[size];
|
char *raw_file = malloc(size + 1); // +1 for null terminator
|
||||||
|
if (raw_file == NULL) {
|
||||||
|
perror("Memory allocation failed");
|
||||||
fclose(fptr);
|
fclose(fptr);
|
||||||
fptr = fopen("test.txt", "r");
|
return EXIT_FAILURE;
|
||||||
int i = 0;
|
|
||||||
while ((cchar = fgetc(fptr)) != EOF){
|
|
||||||
raw_file[i] = cchar;
|
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fread(raw_file, 1, size, fptr);
|
||||||
|
raw_file[size] = '\0'; // Null terminate the string
|
||||||
fclose(fptr);
|
fclose(fptr);
|
||||||
|
|
||||||
// Count nb of words and longest word
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (ispunct((unsigned char) raw_file[i]) && !(raw_file[i] == '\'')) {
|
||||||
|
raw_file[i] = ' ';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count number of words and longest word
|
||||||
int nb_words = 0;
|
int nb_words = 0;
|
||||||
int longest = 0;
|
int longest = 0;
|
||||||
int c_size;
|
int c_size = 0;
|
||||||
for (i = 0; i < size - 1; i++){
|
|
||||||
if ( raw_file[i] == 0 && raw_file[i + 1] != ' ' ) {
|
for (int i = 0; i < size; i++) {
|
||||||
nb_words += 1;
|
if (isspace((unsigned char) raw_file[i]) || raw_file[i] == '\0') {
|
||||||
if (c_size > longest){
|
if (c_size > 0) {
|
||||||
|
nb_words++;
|
||||||
|
if (c_size > longest) {
|
||||||
longest = c_size;
|
longest = c_size;
|
||||||
}
|
}
|
||||||
c_size = 0;
|
c_size = 0;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
c_size++;
|
c_size++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load words
|
// Load words
|
||||||
char words[nb_words][longest];
|
char **words = malloc(nb_words * sizeof(char *));
|
||||||
int word = 0;
|
if (words == NULL) {
|
||||||
int chr = 0;
|
perror("Memory allocation failed");
|
||||||
for (i = 0; i < size - 1; i++){
|
free(raw_file);
|
||||||
if ( raw_file[i] == 0 && raw_file[i + 1] != ' ' ) {
|
return EXIT_FAILURE;
|
||||||
word++;
|
|
||||||
chr = 0;
|
|
||||||
} else {
|
|
||||||
char words[word][chr] = raw_file[i];
|
|
||||||
chr++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int word_index = 0;
|
||||||
|
char *token = strtok(raw_file, " \n");
|
||||||
|
while (token != NULL) {
|
||||||
|
words[word_index] = malloc(strlen(token) + 1);
|
||||||
|
if (words[word_index] == NULL) {
|
||||||
|
perror("Memory allocation failed");
|
||||||
|
free(raw_file);
|
||||||
|
for (int j = 0; j < word_index; j++) {
|
||||||
|
free(words[j]);
|
||||||
|
}
|
||||||
|
free(words);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
strcpy(words[word_index], token);
|
||||||
|
word_index++;
|
||||||
|
token = strtok(NULL, " \n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count unique words
|
// Count unique words
|
||||||
int freq_words[nb_words];
|
int *freq_words = calloc(nb_words, sizeof(int));
|
||||||
memset(freq_words, 0, nb_words * sizeof(int));
|
if (freq_words == NULL) {
|
||||||
int uni_word = 0;
|
perror("Memory allocation failed");
|
||||||
for (i = 0; i < nb_words; i++){
|
free(raw_file);
|
||||||
if (freq_words[i] == 0){
|
for (int j = 0; j < word_index; j++) {
|
||||||
uni_word++;
|
free(words[j]);
|
||||||
freq_words[i] = uni_word;
|
|
||||||
for (int l = 0; l < nb_words; l++){
|
|
||||||
int k = 0;
|
|
||||||
while ( words[i][k] == words[l][k] ){
|
|
||||||
k += 1;
|
|
||||||
}
|
}
|
||||||
if (k == longest){
|
free(words);
|
||||||
freq_words[l] = uni_word;
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int *uni_i = calloc(nb_words, sizeof(int));
|
||||||
|
if (uni_i == NULL) {
|
||||||
|
perror("Memory allocation failed");
|
||||||
|
free(raw_file);
|
||||||
|
for (int j = 0; j < word_index; j++) {
|
||||||
|
free(words[j]);
|
||||||
|
}
|
||||||
|
free(words);
|
||||||
|
free(freq_words);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int uni_word = 0;
|
||||||
|
for (int i = 0; i < nb_words; i++) {
|
||||||
|
if (uni_i[i] == 0) {
|
||||||
|
uni_word++;
|
||||||
|
uni_i[i] = uni_word;
|
||||||
|
freq_words[uni_word - 1] = 1; // Initialize frequency count
|
||||||
|
for (int l = i + 1; l < nb_words; l++) {
|
||||||
|
if (strcmp(words[i], words[l]) == 0) {
|
||||||
|
uni_i[l] = uni_word; // Mark as the same unique word
|
||||||
|
freq_words[uni_word - 1]++; // Increment frequency
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init node
|
// Init nodes
|
||||||
Node nodes[uni_word];
|
Node *nodes = malloc(uni_word * sizeof(Node));
|
||||||
int c = 0;
|
if (nodes == NULL) {
|
||||||
for (i = 0; i < uni_word; i++){
|
perror("Memory allocation failed");
|
||||||
nodes[uni_word].weights[uni_word];
|
free(raw_file);
|
||||||
memset(nodes[uni_word].weights, 0, uni_word * sizeof(int));
|
for (int j = 0; j < word_index; j++) {
|
||||||
|
free(words[j]);
|
||||||
}
|
}
|
||||||
for (i = 0; i < nb_words; i++){
|
free(words);
|
||||||
nodes[freq_words[i]].content = words[i];
|
free(freq_words);
|
||||||
|
free(uni_i);
|
||||||
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize nodes
|
||||||
|
for (int i = 0; i < uni_word; i++) {
|
||||||
|
nodes[i].content = NULL; // Initialize content to NULL
|
||||||
|
nodes[i].weights = calloc(uni_word, sizeof(double)); // Allocate weights
|
||||||
|
if (nodes[i].weights == NULL) {
|
||||||
|
perror("Memory allocation failed");
|
||||||
|
free(raw_file);
|
||||||
|
for (int j = 0; j < word_index; j++) {
|
||||||
|
free(words[j]);
|
||||||
|
}
|
||||||
|
free(words);
|
||||||
|
free(freq_words);
|
||||||
|
free(uni_i);
|
||||||
|
free(nodes);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign words to nodes
|
||||||
|
for (int i = 0; i < nb_words - 1; i++) {
|
||||||
|
int index = uni_i[i] - 1;
|
||||||
|
if (nodes[index].content == NULL) {
|
||||||
|
nodes[index].content = words[i];
|
||||||
|
}
|
||||||
|
nodes[index].weights[uni_i[i + 1] - 1]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize weights
|
||||||
|
for (int i = 0; i < uni_word; i++) {
|
||||||
|
double total = 0;
|
||||||
|
for (int l = 0; l < uni_word; l++) {
|
||||||
|
total += nodes[i].weights[l];
|
||||||
|
}
|
||||||
|
if (total > 0) {
|
||||||
|
for (int l = 0; l < uni_word; l++) {
|
||||||
|
nodes[i].weights[l] /= total; // Normalize weights
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
srand(time(NULL));
|
||||||
|
int stop = 0;
|
||||||
|
int next_words = 15556; // Starting index for the next word
|
||||||
|
double *working_memory = malloc(uni_word * sizeof(double));
|
||||||
|
int *context = malloc(50 * sizeof(int));
|
||||||
|
|
||||||
|
if (working_memory == NULL || context == NULL) {
|
||||||
|
fprintf(stderr, "Memory allocation failed\n");
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Total words: %d\n", nb_words);
|
||||||
|
printf("Unique words: %d\n", uni_word);
|
||||||
|
printf("Longest word length: %d\n", longest);
|
||||||
|
|
||||||
|
while (stop < 50) {
|
||||||
|
// Reset working_memory for the current iteration
|
||||||
|
for (int i = 0; i < uni_word; i++) {
|
||||||
|
working_memory[i] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
context[stop] = next_words;
|
||||||
|
printf("%s ", nodes[next_words].content);
|
||||||
|
|
||||||
|
// Update working_memory based on context
|
||||||
|
for (int i = 0; i <= stop; i++) {
|
||||||
|
for (int l = 0; l < uni_word; l++) {
|
||||||
|
working_memory[l] *= nodes[context[i]].weights[l];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the index of the maximum value in working_memory
|
||||||
|
int max = 0; // Reset max for each iteration
|
||||||
|
for (int i = 1; i < uni_word; i++) { // Start from 1 since max is initialized to 0
|
||||||
|
if (working_memory[i] > working_memory[max]) {
|
||||||
|
max = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
next_words = max; // Update next_words to the index of the max value
|
||||||
|
stop++;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
// Free allocated memory
|
||||||
|
free(working_memory);
|
||||||
|
free(context);
|
||||||
|
|
||||||
|
// Free allocated memory
|
||||||
|
free(raw_file);
|
||||||
|
for (int i = 0; i < word_index; i++) {
|
||||||
|
free(words[i]);
|
||||||
|
}
|
||||||
|
free(words);
|
||||||
|
free(freq_words);
|
||||||
|
free(uni_i);
|
||||||
|
for (int i = 0; i < uni_word; i++) {
|
||||||
|
free(nodes[i].weights);
|
||||||
|
free(nodes[i].content); // Free the content strings
|
||||||
|
}
|
||||||
|
free(nodes);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue