You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

44 lines
1.2 KiB

import numpy as np
import json
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten, Dropout, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.regularizers import l2
import pickle
from db import *
db = read()
words = db['0'] + db['1']
labels = [0]*len(db['0']) + [1]*len(db['1'])
# Tokenize the words
tokenizer = Tokenizer(num_words=1000, lower=True)
tokenizer.fit_on_texts(words)
sequences = tokenizer.texts_to_sequences(words)
# Padding sequences to ensure uniform input size
word_sequences = pad_sequences(sequences, maxlen=1)
# Define the model
model = Sequential([
Embedding(input_dim=1000, output_dim=8, input_length=1),
Flatten(),
Dense(1, activation='sigmoid')
])
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(word_sequences, np.array(labels), epochs=30, verbose=2)
# Save the tokenizer and model
import pickle
with open('tokenizer.pkl', 'wb') as handle:
pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
model.save('word_classifier_model.keras')