up

1 week ago · 1b4cea6739
parent 5d25f3ac8e
commit 1b4cea6739
7 changed files with 51536 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+__pycache__
+
--- a/db.json
+++ b/db.json
--- a/db.py
+++ b/db.py
@ -0,0 +1,19 @@
+import json
+import os
+
+if not os.path.exists('db.json'):
+	db = {'0': [], '1': []}
+	js = json.dumps(db, indent=2)
+	with open("db.json", "w") as outfile:
+		outfile.write(js)
+	print('Created new db.json')
+
+def read(file = 'db.json'):
+	with open(file, "r", encoding="utf-8") as openfile:
+		db = json.load(openfile)
+	return db
+
+def write(db, file = 'db.json'):
+    js = json.dumps(db, indent=2, ensure_ascii=False)
+    with open(file, "w", encoding="utf-8") as outfile:
+        outfile.write(js)
--- a/json_edit.py
+++ b/json_edit.py
@ -0,0 +1,103 @@
+import tkinter as tk
+from tkinter import messagebox, simpledialog, filedialog, CENTER, font
+import json
+from db import *
+
+FIRST = 1000
+
+class JsonEditorApp:
+    def __init__(self, root):
+        self.root = root
+        self.root.title("JSON Editor")
+
+        self.custom_font = font.Font(family="Helvetica", size=14)
+        
+        self.data = read()
+        self.setup_ui()
+
+    def setup_ui(self):
+        self.key_var = tk.StringVar(value='0')
+        self.key_var.trace_add("write", self.on_key_change)
+
+        tk.Label(self.root, text="Select Type:", font=self.custom_font).pack(pady=5)
+        self.okButton = tk.Radiobutton(self.root, text="Words"
+        , variable=self.key_var, value='0', font=self.custom_font).pack(side=tk.TOP)
+        self.badButton = tk.Radiobutton(self.root, text="Bad Words"
+        , variable=self.key_var, value='1', font=self.custom_font).pack(side=tk.TOP)
+
+
+        self.search_var = tk.StringVar()
+        self.search_var.trace_add("write", self.on_search_change)
+        self.search_entry = tk.Entry(self.root, textvariable=self.search_var, font=self.custom_font)
+        self.search_entry.pack(pady=5)
+        self.search_entry.bind('<Control-a>', self.select_call)
+        self.search_entry.bind('<Return>', self.add_word)
+        self.search_entry.bind('<Control-s>', self.search_word)
+
+        self.root.bind('<Delete>', self.delete_word)
+        
+        #button_frame = tk.Frame(self.root)
+        #button_frame.pack(pady=5)
+        #tk.Button(button_frame, text="Search Word", command=self.search_word, font=self.custom_font).pack(side=tk.LEFT, padx=5)
+        #tk.Button(button_frame, text="Delete Word", command=self.delete_word, font=self.custom_font).pack(side=tk.LEFT, padx=5)
+
+        self.listbox = tk.Listbox(self.root, height=10, width=50, font=self.custom_font)
+        self.listbox.pack(pady=5, padx=30, fill=tk.BOTH, expand=True)
+        self.listbox.configure(justify=CENTER)
+        self.update_listbox()
+
+    
+    def select_call(self, event):
+        self.search_var.set('')
+
+    def on_search_change(self, *args):
+        if self.search_var.get() == '':
+            self.update_listbox()
+        
+    def on_key_change(self, *args):
+        self.update_listbox()
+
+    def update_listbox(self):
+        self.listbox.delete(0, tk.END)
+        words = self.data.get(self.key_var.get(), [])
+        for word in words[:FIRST]:
+            self.listbox.insert(tk.END, word)
+
+    def search_word(self, *args):
+        query = self.search_entry.get().strip().lower()
+        if not query:
+            messagebox.showwarning("Warning", "Please enter a word to search.")
+            return
+
+        words = self.data.get(self.key_var.get(), [])
+        results = [word for word in words if query in word]
+        self.listbox.delete(0, tk.END)
+        for result in results[:FIRST]:
+            self.listbox.insert(tk.END, result)
+
+    def add_word(self, *args):
+        word = self.search_entry.get().strip().lower()
+        if not word:
+            messagebox.showwarning("Warning", "Please enter a word.")
+            return
+        if word not in self.data[self.key_var.get()]:
+            self.data[self.key_var.get()].append(word)
+            write(self.data)
+            self.update_listbox()
+            self.search_var.set('')
+
+    def delete_word(self, *args):
+        selected_index = self.listbox.curselection()
+        if not selected_index:
+            return
+
+        word = self.listbox.get(selected_index)
+        self.data[self.key_var.get()].remove(word)
+        write(self.data)
+        self.listbox.delete(selected_index)
+        #self.update_listbox()
+
+if __name__ == "__main__":
+    root = tk.Tk()
+    app = JsonEditorApp(root)
+    root.mainloop()
--- a/run.py
+++ b/run.py
@ -0,0 +1,27 @@
+import numpy as np
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import pickle
+    
+# Load the tokenizer and model
+with open('tokenizer.pkl', 'rb') as handle:
+    tokenizer = pickle.load(handle)
+model = load_model('word_classifier_model.keras')
+    
+def classify_word(word):
+    # Tokenize and pad the input word
+    sequence = tokenizer.texts_to_sequences([word])
+    padded_sequence = pad_sequences(sequence, maxlen=1)
+        
+    # Predict using the model
+    prediction = model.predict(padded_sequence)
+    #return 1 if prediction >= 0.5 else 0
+    #return 1 if prediction >= 0.4 else 0
+    return f'{round(prediction[0][0]*100,3)}%'
+    
+while True:
+    word = input('>> ')
+    if word == 'exit':
+        break
+    result = classify_word(word)
+    print(f"The word '{word}' is a: {result}")
--- a/train.py
+++ b/train.py
@ -0,0 +1,43 @@
+import numpy as np
+import json
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Embedding, Flatten, Dropout, LSTM
+from tensorflow.keras.preprocessing.text import Tokenizer
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from tensorflow.keras.regularizers import l2
+import pickle    
+
+from db import *
+
+db = read()
+words = db['0'] + db['1']
+labels = [0]*len(db['0']) + [1]*len(db['1'])
+
+# Tokenize the words
+tokenizer = Tokenizer(num_words=1000, lower=True)
+tokenizer.fit_on_texts(words)
+sequences = tokenizer.texts_to_sequences(words)
+    
+# Padding sequences to ensure uniform input size
+word_sequences = pad_sequences(sequences, maxlen=1)
+    
+# Define the model
+model = Sequential([
+    Embedding(input_dim=1000, output_dim=8, input_length=1),
+    Flatten(),
+    Dense(1, activation='sigmoid')
+])
+    
+# Compile the model
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+    
+# Train the model
+model.fit(word_sequences, np.array(labels), epochs=30, verbose=2)
+    
+# Save the tokenizer and model
+import pickle
+with open('tokenizer.pkl', 'wb') as handle:
+    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
+model.save('word_classifier_model.keras')
+
+
--- a/txt_export.py
+++ b/txt_export.py
@ -0,0 +1,19 @@
+from db import *
+
+mode = input('mode: ')
+
+words = []
+while True:
+    temp = input()
+    if temp == 'exit':
+        break
+    else:
+        words.append(temp)
+
+db = read()
+db[mode] = db[mode] + words
+write(db)
+
+print("--------------")
+print("ADDED")
+print("--------------")