diff --git a/capectracer/capec_specs_dict.py b/capectracer/capec_specs_dict.py
index 12aedcd32cc9e237ee3d814b48a13ee82007b41a..c3c414b49e733eba9e0f2b14fbb0cb2b71960026 100644
--- a/capectracer/capec_specs_dict.py
+++ b/capectracer/capec_specs_dict.py
@@ -1,72 +1,25 @@
-import xmltodict
-import json
 import re
+import xmltodict
 
 from tokens_dict import TokensDict
 
-from gensim.models.phrases import Phrases, ENGLISH_CONNECTOR_WORDS, Phraser
-
+# Define the CapecSpecsDict class, inheriting from TokensDict
 class CapecSpecsDict(TokensDict):
-    def __init__(self, capec_file, training_corpus_file):
+    def __init__(self, capec_file, spec_file):
         super().__init__()
-        
-        self.__capec_names = []
-        self.__capec_descriptions = []
-        self.__capec_execution_flows = []
-        self.__capec_mitigations = []
-        self.__capec_tokens = []
-        self.__capec_tokens_lemm = []
-        self.__system_spec_tokens = []
-        self.__system_spec_tokens_lemm = []
-        self.__training_tokens = []
-        # self.__bigram_model = None
-        # self.__trigram_model = None
-
-        self.parse_capecs(capec_file)
-        self.parse_training_corpus(training_corpus_file)
-
-    def get_capec_names(self):
-        return self.__capec_names
-
-    def get_capec_descs(self):
-        return self.__capec_descriptions
-    
-    def get_capec_execution_flows(self):
-        return self.__capec_execution_flows
-    
-    def get_capec_mitigations(self):
-        return self.__capec_mitigations
-    
-    def get_capec_tokens(self):
-        return self.__capec_tokens
-    
-    def get_capec_tokens_lemm(self):
-        return self.__capec_tokens_lemm
-    
-    def get_system_spec_tokens(self):
-        return self.__system_spec_tokens
-    
-    def get_system_spec_tokens_lemm(self):
-        return self.__system_spec_tokens_lemm
-    
-    def get_training_tokens(self):
-        return self.__training_tokens
-    
-    def get_training_word_doc_count(self):
-        word_document_count = {}  # Dictionary to store word-document counts
-
-        # Iterate over each document in the corpus
-        for document in self.__training_tokens:
-            unique_words_in_doc = set(document)
 
-            # Update the word-document count for each unique word in the document
-            for word in unique_words_in_doc:
-                # Increment the count for the word if it exists, otherwise initialize it to 1
-                word_document_count[word] = word_document_count.get(word, 0) + 1
+        self.capec_names = []
+        self.capec_descriptions = []
+        self.capec_execution_flows = []
+        self.capec_mitigations = []
+        self.capec_tokens = []
+        self.capec_sentences = []
 
-        sorted_counts = sorted(word_document_count.items(), key=lambda x: (-x[1], x[0]))
+        self.system_spec_tokens = []
+        self.system_spec_sentences = []
 
-        return sorted_counts
+        self.parse_capecs(capec_file)
+        self.parse_system_specs(spec_file)
     
     def parse_capecs(self, capec_file):
         capec_descriptions = []
@@ -83,33 +36,36 @@ class CapecSpecsDict(TokensDict):
         attack_patterns = capec_dict['Attack_Pattern_Catalog']['Attack_Patterns']['Attack_Pattern']
 
         for attack_pattern in attack_patterns:
-            if (attack_pattern['@Abstraction'] == 'Standard' or \
-                attack_pattern['@Abstraction'] == 'Detailed') and \
+            if attack_pattern['@Abstraction'] == 'Detailed' and \
                 attack_pattern['@Status'] != 'Obsolete' and \
                 attack_pattern['@Status'] !='Deprecated' and \
-                "Execution_Flow" in attack_pattern:
+                "Description" in attack_pattern and \
+                attack_pattern["Description"] != None:
                 
                 capec_names.append(attack_pattern['@Name'])
                 
-                description = ""
                 description = attack_pattern["Description"]
 
                 if "Extended_Description" in attack_pattern:
                     description = description + " " + attack_pattern["Extended_Description"]
-                
-                capec_descriptions.append(description)
+
+                cleaned_desc = super().clean_text(description)
+                capec_descriptions.append(cleaned_desc)
                 
                 attack_steps_list = []
-                attack_steps = attack_pattern["Execution_Flow"]["Attack_Step"]
+
+                if "Execution_Flow" in attack_pattern:
+                    attack_steps = attack_pattern["Execution_Flow"]["Attack_Step"]
                 
-                if isinstance(attack_steps, list):
-                    for idx, attack_step in enumerate(attack_steps):
-                        cleaned_attack_step = "Step " + str(idx + 1) + ". " + \
-                            super().clean_text(attack_step["Description"])
+                    if isinstance(attack_steps, list):
+                        for idx, attack_step in enumerate(attack_steps):
+                            attack_stp = "Step " + str(idx + 1) + ". " + attack_step["Description"] + "."
+                            cleaned_attack_step = super().clean_text(attack_stp)
+                            attack_steps_list.append(cleaned_attack_step)
+                    else:
+                        attack_stp = "Step 1. " + attack_steps["Description"] + "."
+                        cleaned_attack_step = super().clean_text(attack_stp)
                         attack_steps_list.append(cleaned_attack_step)
-                else:
-                    cleaned_attack_step = "Step 1. " + super().clean_text(attack_steps["Description"])
-                    attack_steps_list.append(cleaned_attack_step)
 
                 capec_execution_flows.append(attack_steps_list)
 
@@ -128,94 +84,34 @@ class CapecSpecsDict(TokensDict):
 
                 capec_mitigations.append(mitigations_list)
 
-        capec_descriptions = super().clean_texts(capec_descriptions)
-
-        self.__capec_names = capec_names
-        self.__capec_descriptions = capec_descriptions
-        self.__capec_execution_flows = capec_execution_flows
-        self.__capec_mitigations = capec_mitigations
-
-        capec_tokens = super().tokenize_text(capec_descriptions)
-        capec_tokens = super().lowercase_tokens(capec_tokens)
-        capec_tokens = super().remove_numbers(capec_tokens)
-        
-        capec_tokens_lemm = super().lemmatize_tokens(capec_tokens)
-
-        self.__capec_tokens = capec_tokens
-        self.__capec_tokens_lemm = capec_tokens_lemm
-
-        # bigram = Phrases(training_tokens, min_count=7, threshold=18, 
-        #                  connector_words=ENGLISH_CONNECTOR_WORDS)
-        # self.__bigram_model = Phraser(bigram)
-
-        # trigram = Phrases(bigram[training_tokens], min_count=16, threshold=26, 
-        #                   connector_words=ENGLISH_CONNECTOR_WORDS)
-        # self.__trigram_model = Phraser(trigram)
-
-        # # Apply the bigram model to each document in the training tokens corpus
-        # training_tokens = [self.__bigram_model[doc] for doc in training_tokens]
-        # # Apply the trigram model to the resulting bigram training tokens corpus
-        # training_tokens = [self.__trigram_model[doc] for doc in training_tokens]
-
-    def parse_training_corpus(self, training_corpus_file):
-        unduped_training_tokens = []
+        self.capec_names = capec_names
+        self.capec_descriptions = capec_descriptions
+        self.capec_execution_flows = capec_execution_flows
+        self.capec_mitigations = capec_mitigations
         
-        # Open the JSON file
-        with open(training_corpus_file, 'r') as file:
-            # Load the JSON data
-            data = json.load(file)
+        capec_data = []
 
-        for source in ["MS-Bulletin", "Metasploit", "NVD"]:
-            for _, value in data[source].items():
-                corpora_tokens = []
+        for capec_index, capec_description in enumerate(self.capec_descriptions):
+            capec_desc_ef = capec_description
 
-                for word_label in value:
-                    word = word_label[0]
+            for attack_step in self.capec_execution_flows[capec_index]:
+                attack_stp = re.sub(r'^Step [0-9]+\. ','', attack_step)
+                attack_stp = re.sub(r'^\[.*\] ','', attack_stp)
+                capec_desc_ef = capec_desc_ef + " " + attack_stp
 
-                    if word.isalnum():
-                        corpora_tokens.append(word)
+            capec_data.append(capec_desc_ef)
 
-                unduped_training_tokens.append(corpora_tokens)
+        capec_sentences, capec_tokens = super().preprocess(capec_data)
 
-        unduped_training_tokens = super().lowercase_tokens(unduped_training_tokens)
-        unduped_training_tokens = super().remove_numbers(unduped_training_tokens)
-
-        unduped_training_tokens += self.__capec_tokens
-
-        training_tokens_dict = {}
-
-        # Iterate through each array in the outer array
-        for index, token_set in enumerate(unduped_training_tokens):
-            # Sort the elements of the array to ignore the order
-            sorted_token_set = sorted(token_set)
-
-            # Convert the sorted array to a tuple to make it hashable
-            token_set_tuple = tuple(sorted_token_set)
-
-            # Compute the hash value of the sorted array
-            token_set_hash = hash(token_set_tuple)
-
-            # Check if the hash value already exists in the dictionary
-            if token_set_hash not in training_tokens_dict:
-                # If the hash value doesn't exist, add the sorted array to the dictionary
-                training_tokens_dict[token_set_hash] = index            
-
-        training_tokens = [unduped_training_tokens[index] for index in list(training_tokens_dict.values())]
-
-        self.__training_tokens = training_tokens
+        self.capec_tokens = capec_tokens
+        self.capec_sentences = capec_sentences
 
     def parse_system_specs(self, spec_file):
         with open(spec_file, 'r') as file:
-            # Read the contents of the file
             file_contents = file.read()
 
-        system_spec_texts = super().clean_texts([file_contents])
-
-        system_spec_tokens = super().tokenize_text(system_spec_texts)
-        system_spec_tokens = super().lowercase_tokens(system_spec_tokens)
-        system_spec_tokens = super().remove_numbers(system_spec_tokens)
-        
-        system_spec_tokens_lemm = super().lemmatize_tokens(system_spec_tokens)
+        system_spec_texts = [super().clean_text(file_contents)]
+        system_spec_sentences, system_spec_tokens = super().preprocess(system_spec_texts)
 
-        self.__system_spec_tokens = system_spec_tokens[0]
-        self.__system_spec_tokens_lemm = system_spec_tokens_lemm[0]
\ No newline at end of file
+        self.system_spec_tokens = system_spec_tokens[0]
+        self.system_spec_sentences = system_spec_sentences[0]
diff --git a/capectracer/capec_tracer.py b/capectracer/capec_tracer.py
index e3f862936a3d64e86ab6d2d5cd7b85000b2821a9..2670d5e84aebdefaf15b387c2b72b7935764db79 100644
--- a/capectracer/capec_tracer.py
+++ b/capectracer/capec_tracer.py
@@ -1,14 +1,14 @@
 import requests
 import os
+import torch
+import decimal
 
 from capec_specs_dict import CapecSpecsDict
 
-from gensim.corpora import Dictionary
-from gensim.models import LdaModel
-from gensim.matutils import hellinger
 from gensim import similarities
+from gensim.corpora.dictionary import Dictionary
 from gensim.models import TfidfModel
-from sklearn.utils import shuffle
+from sentence_transformers import SentenceTransformer, util
 
 def get_capec_file(abs_path):
     # Send a GET request to the URL
@@ -22,36 +22,48 @@ def get_capec_file(abs_path):
 
     return response
 
+def round_dec(x, place, round_up):
+    context = decimal.getcontext()
+    original_rounding = context.rounding
+
+    if round_up == True:
+        context.rounding = decimal.ROUND_CEILING
+    else:
+        context.rounding = decimal.ROUND_FLOOR
+
+    rounded = round(decimal.Decimal(str(x)), place)
+    context.rounding = original_rounding
+
+    return float(rounded)
+
 def trace_capecs(abs_path):
     try:
-        tokens_dicti = CapecSpecsDict(abs_path + "capec_latest.xml", abs_path + 'full_corpus.json')
+        dicti = CapecSpecsDict(abs_path + "capec_latest.xml", abs_path + "system_specs.txt")
             
-        capec_names = tokens_dicti.get_capec_names()
-        capec_descs = tokens_dicti.get_capec_descs()
-        capec_attack_steps = tokens_dicti.get_capec_execution_flows()
-        capec_mitigations = tokens_dicti.get_capec_mitigations()
-        capec_tokens = tokens_dicti.get_capec_tokens()
-        capec_tokens_lemm = tokens_dicti.get_capec_tokens_lemm()
-
-        tokens_dicti.parse_system_specs(abs_path + "system_specs.txt")
-        spec_tokens = tokens_dicti.get_system_spec_tokens()
-        spec_tokens_lemm = tokens_dicti.get_system_spec_tokens_lemm()
+        capec_names = dicti.capec_names
+        capec_descs = dicti.capec_descriptions
+        capec_attack_steps = dicti.capec_execution_flows
+        capec_mitigations = dicti.capec_mitigations
+        all_capec_sentences = dicti.capec_sentences
+        capec_tokens = dicti.capec_tokens
+        spec_sentences = dicti.system_spec_sentences
+        spec_tokens = dicti.system_spec_tokens
 
         # Create a dictionary from the corpus documents
-        capec_lemm_dictionary = Dictionary(capec_tokens_lemm)
+        capec_dictionary = Dictionary(capec_tokens)
 
         # Create a bag-of-words corpus from the corpus documents
-        capec_lemm_corpus = [capec_lemm_dictionary.doc2bow(doc) for doc in capec_tokens_lemm]
+        capec_corpus = [capec_dictionary.doc2bow(doc) for doc in capec_tokens]
 
         # Create a TF-IDF model
-        tfidf_model = TfidfModel(capec_lemm_corpus, id2word=capec_lemm_dictionary)
+        tfidf_model = TfidfModel(capec_corpus, id2word=capec_dictionary)
 
         # Convert the checked document to TF-IDF representation
-        spec_lemm_bow = capec_lemm_dictionary.doc2bow(spec_tokens_lemm)
-        spec_tfidf = tfidf_model[spec_lemm_bow]
+        spec_bow = capec_dictionary.doc2bow(spec_tokens)
+        spec_tfidf = tfidf_model[spec_bow]
 
         # Create a similarity index
-        index = similarities.MatrixSimilarity(tfidf_model[capec_lemm_corpus], num_features=len(capec_lemm_dictionary))
+        index = similarities.MatrixSimilarity(tfidf_model[capec_corpus], num_features=len(capec_dictionary))
 
         # Calculate similarity scores
         similarity_scores = index[spec_tfidf]
@@ -67,57 +79,51 @@ def trace_capecs(abs_path):
         #     print(score)
         #     print(capec_descs[idx])
 
-        if sorted_doc_similarity_pairs[1][1] > 0:
-            training_tokens = tokens_dicti.get_training_tokens()
-            training_tokens = shuffle(training_tokens)
-            
-            training_dictionary = Dictionary(training_tokens)
-            training_dictionary.filter_extremes(no_below=2, no_above=0.3)
-
-            training_corpus = [training_dictionary.doc2bow(text) for text in training_tokens]
-
-            lda = LdaModel(corpus=training_corpus,
-                            num_topics=18,
-                            id2word=training_dictionary,
-                            passes=12,
-                            alpha="auto",
-                            eta="auto",
-                            iterations=6,   
-                            update_every=1,
-                            decay=0.745786579640169,
-                            offset=3.20092862358715
-                            )
-            
-            system_specs_bow = training_dictionary.doc2bow(spec_tokens)
-            system_spec_topic_dist = lda[system_specs_bow]
-
-            difference_scores = []
+        if sorted_doc_similarity_pairs[0][1] > .16:
+            model = SentenceTransformer('basel/ATTACK-BERT')
 
-            for index, capec_token_set in enumerate(capec_tokens):
-                capec_desc_bow = training_dictionary.doc2bow(capec_token_set)
-                capec_topic_dist = lda[capec_desc_bow]
+            spec_embeddings = model.encode(spec_sentences, convert_to_tensor=True)
 
-                score = [index, hellinger(capec_topic_dist, system_spec_topic_dist)]
+            difference_scores = []
+            
+            for index, capec_sentences in enumerate(all_capec_sentences):
+                capec_embeddings = model.encode(capec_sentences, convert_to_tensor=True)
+                cos_scores = util.cos_sim(spec_embeddings, capec_embeddings)
+                mean_cos_score = torch.mean(cos_scores).item()
 
+                score = [index, mean_cos_score]
                 difference_scores.append(score)
 
             # Sort the arrays based on the value of the second index in each array
-            difference_scores = sorted(difference_scores, key=lambda x: x[1], reverse=False)
+            difference_scores = sorted(difference_scores, key=lambda x: x[1], reverse=True)
+            max_score = round_dec(difference_scores[0][1], 2, True)
+            min_score = min(score[1] for score in difference_scores if score[1] > 0)
+            min_score = round_dec(min_score, 2, False)
 
             with open(abs_path + 'traced_capecs.txt', 'w') as output:
                 for score in difference_scores:
-                    output.write(f'Confidence score: {100 - int(100 * score[1])}%\n')
-                    output.write(f'Name: {capec_names[score[0]]}\n')
-                    output.write(f'Description:\n')
-                    output.write(f'{capec_descs[score[0]]}\n')
-                    output.write(f'Attack Steps:\n')
-                    for attack_step in capec_attack_steps[score[0]]:
-                        output.write(f'{attack_step}\n')
-                    if capec_mitigations[score[0]]:
-                        output.write(f"Mitigations:\n")
-                        for mitigation in capec_mitigations[score[0]]:
-                            output.write(f'{mitigation}\n')
-                    output.write('\n')
+                    if score[1] > 0:
+                        normalized_score = (score[1] - min_score) / (max_score - min_score)
+                        confidence_score = int(100 * normalized_score)
+
+                        if confidence_score > 0:
+                            output.write(f'Confidence score: {confidence_score}%\n')
+                            output.write(f'Name: {capec_names[score[0]]}\n')
+                            output.write(f'Description:\n')
+                            output.write(f'{capec_descs[score[0]]}\n')
+
+                            if capec_attack_steps[score[0]]:
+                                output.write(f'Attack Steps:\n')
+
+                                for attack_step in capec_attack_steps[score[0]]:
+                                    output.write(f'{attack_step}\n')
+
+                            if capec_mitigations[score[0]]:
+                                output.write(f"Mitigations:\n")
+
+                                for mitigation in capec_mitigations[score[0]]:
+                                    output.write(f'{mitigation}\n')
+                            output.write('\n')
         else:
             with open(abs_path + 'traced_capecs.txt', 'w') as output:
                 output.write("No attack patterns were able to be identified with the provided system specifications. " + 
@@ -138,3 +144,5 @@ if __name__ == "__main__":
     else:
         with open(abs_path + 'traced_capecs.txt', 'w') as output:
                 output.write(f"Failed to download the list of CAPECs from MITRE. Status code: {response.status_code}.")
+
+    print("Results (or errors if any were encountered) have been published to traced_capecs.txt.")
diff --git a/capectracer/full_corpus.json b/capectracer/full_corpus.json
deleted file mode 100755
index b7c038819c6f10f173b44b6a01e3da0c5c8b1086..0000000000000000000000000000000000000000
Binary files a/capectracer/full_corpus.json and /dev/null differ
diff --git a/capectracer/requirements_capec_tracer.txt b/capectracer/requirements_capec_tracer.txt
index 4e44f6d8edd320faa0753aec8e287bb940ec58c8..5ddadcd67395243087b307c71fa6dfbfe8148ebd 100644
--- a/capectracer/requirements_capec_tracer.txt
+++ b/capectracer/requirements_capec_tracer.txt
@@ -1,7 +1,7 @@
-scikit-learn==1.2
 gensim==4.1.2
 spacy==3.7.2
 xmltodict==0.13.0
 nltk==3.7
-scipy==1.10.1
 requests==2.25.1
+torch==2.3.0
+sentence_transformers==2.7.0
\ No newline at end of file
diff --git a/capectracer/requirements_model_training.txt b/capectracer/requirements_model_training.txt
deleted file mode 100644
index 9a9fd05fbada70484ea69ad1979dd039d47fb895..0000000000000000000000000000000000000000
--- a/capectracer/requirements_model_training.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-tqdm==4.64.0
-numpy==1.22.4
-bayesian-optimization==1.4.3
-pandas==1.4.2
diff --git a/capectracer/tokens_dict.py b/capectracer/tokens_dict.py
index a2aa8ebeda84f2b7eca0d2260cea8d0ac86dd2aa..52204b6b531179eaa80ded364b399f9b3c6032e1 100644
--- a/capectracer/tokens_dict.py
+++ b/capectracer/tokens_dict.py
@@ -1,9 +1,10 @@
 import re
 import nltk
+import spacy
 
 from nltk.stem import WordNetLemmatizer
-from nltk.tokenize import word_tokenize
-import spacy
+from nltk.tokenize import word_tokenize, sent_tokenize
+from nltk.corpus import wordnet as wn
 
 class TokensDict:
     def __init__(self):
@@ -13,71 +14,73 @@ class TokensDict:
         nltk.download('averaged_perceptron_tagger')
         nltk.download('omw-1.4')
 
-    def tokenize_text(self, texts):
-        tokens_list = []
-        
-        for text in texts:
+        self.en = spacy.load('en_core_web_sm', enable=[""])
+        self.stop_words = {}
+        self.lemmatizer = WordNetLemmatizer()
+
+        for stop_word in self.en.Defaults.stop_words:
+            self.stop_words[hash(stop_word)] = stop_word            
+
+        wn.ensure_loaded()
+
+    def lemm_and_rem_sw(self, tokens):
+        preprocessed_tokens = []
+        tagged_tokens = nltk.pos_tag(tokens)
+
+        for token, pos in tagged_tokens:
+            hashed_token = hash(token)
+
+            if not token.isnumeric() and self.stop_words.get(hashed_token) == None:
+                if pos.startswith("N"):
+                    preprocessed_tokens.append(self.lemmatizer.lemmatize(token, "n"))
+                elif pos.startswith("V"):
+                    preprocessed_tokens.append(self.lemmatizer.lemmatize(token, "v"))
+                elif pos.startswith('J'):
+                    preprocessed_tokens.append(self.lemmatizer.lemmatize(token, "a"))
+                elif pos.startswith('R'):
+                    preprocessed_tokens.append(self.lemmatizer.lemmatize(token, "r"))
+                else:
+                    preprocessed_tokens.append(token)
+
+        return preprocessed_tokens
+
+    def tokenize_text(self, text):
+        sentences = sent_tokenize(text)
+        prepro_sentences = []
+
+        for sentence in sentences:
             # Remove punctuation using regular expression
-            prepro_text = re.sub(r'[^a-zA-Z0-9\s/-]', '', text)
+            prepro_text = re.sub(r'[^a-zA-Z0-9\s/-]', '', sentence)
             prepro_text = re.sub(r'[-/]', ' ', prepro_text)
-            # Tokenization using NLTK
-            tokens = word_tokenize(prepro_text)
+            prepro_text = re.sub(r'\s+', ' ', prepro_text)
+            prepro_text = prepro_text.strip()
+            prepro_sentences.append(prepro_text)
 
-            tokens_list.append(tokens)
-        
-        return tokens_list
-    
-    def remove_numbers(self, tokens_list):
-        filtered_tokens_list = [[token for token in token_set if not token.isnumeric()] \
-                                for token_set in tokens_list]
-
-        return filtered_tokens_list
-
-    def lemmatize_tokens(self, tokens_list):
-        en = spacy.load('en_core_web_sm')
-        stop_words = en.Defaults.stop_words
-
-        # Initialize WordNet Lemmatizer
-        lemmatizer = WordNetLemmatizer() 
-        lemmatized_token_list = []
-
-        for token_set in tokens_list:
-            lemmatized_tokens = []
-            tagged_tokens = nltk.pos_tag(token_set)
-
-            for token, pos in tagged_tokens:
-                if token not in stop_words:
-                    if pos.startswith("N"):
-                        lemmatized_tokens.append(lemmatizer.lemmatize(token, "n"))
-                    elif pos.startswith("V"):
-                        lemmatized_tokens.append(lemmatizer.lemmatize(token, "v"))
-                    elif pos.startswith('J'):
-                        lemmatized_tokens.append(lemmatizer.lemmatize(token, "a"))
-                    elif pos.startswith('R'):
-                        lemmatized_tokens.append(lemmatizer.lemmatize(token, "r"))
-                    else:
-                        lemmatized_tokens.append(token)
-
-            lemmatized_token_list.append(lemmatized_tokens)
+        word_tokens = ' '.join(prepro_sentences)
+        word_tokens = word_tokens.lower()
+        word_tokens = word_tokenize(word_tokens)
             
-        return lemmatized_token_list
-    
-    def lowercase_tokens(self, tokens_list):
-        return [[token.lower() for token in token_set] for token_set in tokens_list]
+        # Tokenization using NLTK
+        preprocessed_tokens = self.lemm_and_rem_sw(word_tokens)
+
+        return prepro_sentences, preprocessed_tokens
+
+    def preprocess(self, corpus):
+        prepro_corpus_sentences = []
+        prepro_corpus_tokens = []
+        
+        for doc in corpus:
+            prepro_sentences, prepro_tokens = self.tokenize_text(doc)
+            prepro_corpus_sentences.append(prepro_sentences)
+            prepro_corpus_tokens.append(prepro_tokens)
+
+        return prepro_corpus_sentences, prepro_corpus_tokens
     
     def clean_text(self, text):
-        cleaned_text = re.sub(r'\n', '', text)
+        cleaned_text = text.strip()
+        cleaned_text = re.sub(r'\n', ' ', cleaned_text)
+        cleaned_text = re.sub(r'\.+', '.', cleaned_text)
         cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
         cleaned_text = re.sub(r'^\[[^\]]*\]\s*', '', cleaned_text)
 
         return cleaned_text
-
-    def clean_texts(self, texts):
-        cleaned_texts = []
-
-        for text in texts:
-            cleaned_text = self.clean_text(text)
-
-            cleaned_texts.append(cleaned_text)
-
-        return cleaned_texts
\ No newline at end of file
diff --git a/capectracer/training_lda_model.py b/capectracer/training_lda_model.py
deleted file mode 100644
index 29e4d95b0e011463948bd04e5885d7b417d53e13..0000000000000000000000000000000000000000
--- a/capectracer/training_lda_model.py
+++ /dev/null
@@ -1,206 +0,0 @@
-import tqdm
-import logging
-import numpy as np
-import os.path
-import requests
-
-from capec_specs_dict import CapecSpecsDict
-
-from gensim.corpora import Dictionary
-from gensim.models import LdaModel
-from gensim.models.callbacks import CoherenceMetric, PerplexityMetric, ConvergenceMetric
-from bayes_opt import BayesianOptimization
-from pandas import DataFrame
-from bayes_opt.logger import JSONLogger
-from bayes_opt.event import Events
-from sklearn.model_selection import KFold
-from functools import partial
-    
-# Train model 
-def train_lda(training_tokens, dictionary, num_topics, passes, iterations, update_every, 
-              training_logs, training_results_csv, eval_every, decay, offset):
-    model_dict = {'Topics': [],
-                'Passes': [],
-                'Iterations': [],
-                'Update_Every': [],
-                'Decays': [],
-                'Offsets': [],
-                'Avg_Coherences': [],
-                'Avg_Perplexities': [],
-                'Avg_Convergences': [],
-                'Dev_Coherences': [],
-                'Dev_Perplexities': [],
-                'Dev_Convergences': [],
-                'Epochs': []}
-    
-    # Initialize k-fold cross-validation
-    k = 4
-    kf = KFold(n_splits=k, shuffle=True, random_state=1)
-
-    # Initialize a list to store evaluation metrics for each fold
-    coherence_scores = []
-    perplexity_scores = []
-    convergence_scores = []
-
-    k_index = 1
-
-    print("Training with", num_topics, "topics,", passes, 
-          "max passes,", iterations, "max iterations,", 
-          update_every, "update every,", decay, "decay,", offset, "offset:", sep=" ")
-
-    pbar = tqdm.trange(k)
-
-    # Iterate over each fold
-    for train_index, test_index in kf.split(training_tokens):
-        train_tokens = [training_tokens[i] for i in train_index]
-        val_tokens = [training_tokens[i] for i in test_index]
-        
-        train_corpus = [dictionary.doc2bow(doc) for doc in train_tokens]
-        val_corpus = [dictionary.doc2bow(doc) for doc in val_tokens]
-
-        # Configure logging
-        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG,
-                        filename=training_logs)
-        
-        logging.info(
-            "Training with %s topics, %s max passes, %s max iterations, %s update every, %s decay, %s offset at %s fold:", 
-            num_topics, passes, iterations, update_every, decay, offset, k_index)
-        
-        coherence_metric = CoherenceMetric(
-            texts=train_tokens, coherence='c_npmi', window_size=10)
-        perplexity_metric = PerplexityMetric(corpus=val_corpus)
-        convergence_metric = ConvergenceMetric(distance="hellinger")
-        
-        lda_model = LdaModel(corpus=train_corpus,
-                            num_topics=num_topics,
-                            id2word=dictionary,
-                            passes=passes,
-                            alpha="auto",
-                            eta="auto",
-                            random_state=1,
-                            iterations=iterations,   
-                            update_every=update_every,
-                            decay=decay,
-                            offset=offset,
-                            eval_every=eval_every,
-                            callbacks=[perplexity_metric, coherence_metric, convergence_metric],
-                            chunksize=2000)
-        
-        logging.shutdown()
-
-        coherence_scores.append(lda_model.metrics['Coherence'])
-        perplexity_scores.append(lda_model.metrics['Perplexity'])
-        convergence_scores.append(lda_model.metrics['Convergence'])
-
-        k_index += 1
-
-        pbar.update(1)
-    
-    pbar.close()
-
-    for epoch, _ in enumerate(coherence_scores[0]):
-        model_dict['Topics'].append(num_topics)
-        model_dict['Passes'].append(passes)
-        model_dict['Iterations'].append(iterations)
-        model_dict['Update_Every'].append(update_every)
-        model_dict['Decays'].append(decay)
-        model_dict['Offsets'].append(offset)
-        model_dict['Epochs'].append(epoch + 1)
-
-    coherence_score_averages = np.mean(coherence_scores, axis=0)
-    perplexity_score_averages = np.mean(perplexity_scores, axis=0)
-    convergence_score_averages = np.mean(convergence_scores, axis=0)
-
-    # Compute the absolute deviation of each element from its corresponding column mean
-    coherence_absolute_deviations = np.abs(coherence_scores - coherence_score_averages)
-    perplexity_absolute_deviations = np.abs(perplexity_scores - perplexity_score_averages)
-    convergence_absolute_deviations = np.abs(convergence_scores - convergence_score_averages)
-
-    # Compute the average of these absolute deviations for each column
-    coherece_absolute_average_deviations = np.mean(coherence_absolute_deviations, axis=0)
-    perplexity_absolute_average_deviations = np.mean(perplexity_absolute_deviations, axis=0)
-    convergence_absolute_average_deviations = np.mean(convergence_absolute_deviations, axis=0)
-
-    model_dict['Avg_Coherences'] = coherence_score_averages
-    model_dict['Avg_Perplexities'] = perplexity_score_averages
-    model_dict['Avg_Convergences'] = convergence_score_averages
-    model_dict['Dev_Coherences'] = coherece_absolute_average_deviations
-    model_dict['Dev_Perplexities'] = perplexity_absolute_average_deviations
-    model_dict['Dev_Convergences'] = convergence_absolute_average_deviations
-
-    model_df = DataFrame.from_dict(model_dict)
-
-    if os.path.isfile(training_results_csv):
-        model_df.to_csv(training_results_csv, mode='a', header=False)
-    else:
-        model_df.to_csv(training_results_csv, mode='w', header=True)
-
-    return coherence_score_averages[len(coherence_score_averages) - 1]
-    # return -perplexity_score_averages[len(perplexity_score_averages) - 1]
-
-def train_lda_wrapper(training_tokens, dictionary, num_topics, passes, iterations, update_every, 
-              training_logs, training_results_csv, eval_every, decay=0.5, offset=1):
-    num_topics = int(num_topics)
-    passes = int(passes)
-    iterations = int(iterations)
-    update_every = int(update_every)
-
-    return train_lda(training_tokens, dictionary, num_topics, passes, iterations, update_every, 
-              training_logs, training_results_csv, eval_every, decay, offset)
-
-def get_capec_file():
-    # Send a GET request to the URL
-    response = requests.get("https://capec.mitre.org/data/xml/capec_latest.xml")
-    
-    # Check if the request was successful (status code 200)
-    if response.status_code == 200:
-        # Open the file in binary write mode and write the content
-        with open("capec_latest.xml", 'wb') as file:
-            file.write(response.content)
-
-    return response
-
-if __name__ == "__main__": 
-    response = get_capec_file()
-
-    if response.status_code == 200:
-        tokens_dicti = CapecSpecsDict("capec_latest.xml", 'full_corpus.json')
-
-        training_tokens = tokens_dicti.get_training_tokens()
-
-        # Create dictionary and train/test corpus
-        dictionary = Dictionary(training_tokens)
-        dictionary.filter_extremes(no_below=2, no_above=0.3)
-
-        train_lda_partial = partial(train_lda_wrapper, 
-                                    training_tokens=training_tokens,
-                                    dictionary=dictionary,
-                                    training_logs="lda_training4.log",
-                                    training_results_csv="lda_training_results4.csv", 
-                                    eval_every=10
-                                    )
-
-        # train_lda_partial(num_topics=181, 
-        #                   passes=20, iterations=len(tokens_dicti.get_capec_names()), update_every=1, decay=0.7, offset=10)
-
-        optimizer = BayesianOptimization(
-            f=train_lda_partial,
-            pbounds={'num_topics': (2, 30), 
-                    'passes': (10, 20),
-                    'iterations': (1, 100),
-                    'update_every': (1, 1),
-                    'decay': (0.5, 1),
-                    'offset': (1, 16)},
-            verbose=2,
-            random_state=1,
-        )
-
-        optimizer.set_gp_params(alpha=1e-3)
-
-        logger = JSONLogger(path="./lda_optimization4_log")
-        optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
-
-        optimizer.maximize(init_points=10, n_iter=50)
-        print(optimizer.max)
-    else:
-        print(f"Failed to download the list of CAPECs from MITRE. Status code: {response.status_code}.")
\ No newline at end of file
diff --git a/src/main/java/ai/AIAttackPatternTree3.java b/src/main/java/ai/AIAttackPatternTree3.java
index 254981413e76ea5554ca5e778fd74a81c0e50a7f..c1d714bc90d732973b5e98fbad3233152871bcfc 100644
--- a/src/main/java/ai/AIAttackPatternTree3.java
+++ b/src/main/java/ai/AIAttackPatternTree3.java
@@ -29,9 +29,9 @@ public class AIAttackPatternTree3 extends AIInteract {
             "specification in \"description\". " +
             "# Respect: All words in \"description\" must be separated with spaces.";
 
-    private static final String KNOWLEDGE_ON_JSON_FOR_ATTACK_SCEN = "When you are asked to identify all the possible attack scenarios " +
-            "that an attacker needs to complete or could possibly perform to successfully achieve an attack, " +
-            "return them as a JSON specification formatted as follows: " +
+    private static final String KNOWLEDGE_ON_JSON_FOR_ATTACK_SCEN = "When you are asked to identify all the possible " +
+            "attack scenarios that an attacker would perform to successfully achieve an " +
+            "attack, return them as a JSON specification formatted as follows: " +
             "{\"attack\": \"NameOfAttack\", \"attackscenarios\": [{\"name\":  \"NameOfAttackScenario\", \"description\": \"" +
             "The description of the attack scenario and how it brings an attacker closer to the attack.\", " +
             "\"operator\": \"OR or AND\"} ...]} " +
@@ -45,7 +45,7 @@ public class AIAttackPatternTree3 extends AIInteract {
             "# Respect: \"operator\" must be only \"AND\" or \"OR\". Use \"AND\" to denote that an attacker must " +
             "complete all of the attack scenarios with an AND operator simultaneously to get " +
             "closer to the attack. Use \"OR\" to denote that an attacker only needs to complete one of the attack " +
-            "scenarios amongst all of the attack scenarios with an OR operator to get closer to the attack." +
+            "scenarios amongst all of the attack scenarios with an OR operator to get closer to the attack. " +
             "# Respect: The number of attack scenarios with the AND operator should be either zero, or greater than one. " +
             "# Respect: The number of attack scenarios with the OR operator should be either zero, or greater than one. " +
             "# Respect: If there are no attack scenarios that are able to be identified, have \"attackscenarios\" be " +
@@ -63,7 +63,7 @@ public class AIAttackPatternTree3 extends AIInteract {
             "# Respect: Include what the attack step is and how it needs to be completed by an attacker for " +
             "the attack scenario in \"description\". " +
             "# Respect: All words in \"description\" must be separated with spaces. " +
-            "# Respect: The elements of \"attacksteps\" need to be ordered sequentially. That is, the first indexed element" +
+            "# Respect: The elements of \"attacksteps\" need to be ordered sequentially. That is, the first indexed element " +
             "in \"attacksteps\" is the step that an attacker needs to complete first while the last indexed element is the step " +
             "that an attacker needs to complete last. " +
             "# Respect: There must be at least two attack steps in \"attacksteps\".";
@@ -102,15 +102,17 @@ public class AIAttackPatternTree3 extends AIInteract {
 
     private final String[] QUESTION_IDENTIFY_ATD = {"From the provided system specification " +
             "and using the specified JSON format, identify a possible objective that an attacker would " +
-            "want to and could feasibly achieve from exploiting the system using the list of provided attack " +
-            "patterns. Do respect the JSON format, and " +
-            "provide only JSON (no explanation before or after).\n",
-
-            "Using the list of attack patterns (if provided) and the specified JSON format, " +
-                    "identify all of the attack scenarios that an attacker can perform or needs to perform " +
-                    "simultaneously to achieve the provided attack. If provided with a system specification, " +
-                    "make sure to associate the specification with the attack scenarios. " +
-                    "Do respect the JSON format, and provide only JSON (no explanation before or after).\n",
+            "want to and could feasibly achieve from exploiting the system. If provided with a list of attack " +
+            "patterns, use at least one attack pattern from this list to identify a possible objective. " +
+            "Do respect the JSON format, and provide only JSON (no explanation before or after).\n",
+
+            "Using the specified JSON format, " +
+                    "identify all of the attack scenarios that an attacker would perform, " +
+                    "either simultaneously or separately, to achieve the provided attack. If provided with a " +
+                    "system specification, make sure to associate the specification with the attack scenarios. " +
+                    "In addition, if provided with a list of attack patterns, use at least one attack pattern " +
+                    "from this list to identify the attack scenarios. Do respect the JSON format, and provide " +
+                    "only JSON (no explanation before or after).\n",
 
             "Identify all of the attack steps that an attacker needs to conduct to " +
                     "achieve the provided attack scenario. Do respect the JSON format, and " +
@@ -181,7 +183,7 @@ public class AIAttackPatternTree3 extends AIInteract {
             chatData.aiinterface.addKnowledge("The system specification is: " + _spec, "ok");
         }
 
-        if (_attackPatterns != null) {
+        if (_attackPatterns != null && _attackPatterns.length > 0) {
             StringBuilder builder = new StringBuilder();
 
             for (int i = 0; i <= _attackPatterns.length - 1; i++) {
@@ -197,7 +199,7 @@ public class AIAttackPatternTree3 extends AIInteract {
             }
 
             TraceManager.addDev("\nKnowledge added: " + builder);
-            chatData.aiinterface.addKnowledge("The attack pattern is: " + builder, "ok");
+            chatData.aiinterface.addKnowledge("The attack patterns are: " + builder, "ok");
         }
 
         if (previousRootAtts != null && !previousRootAtts.isEmpty()) {
@@ -446,7 +448,7 @@ public class AIAttackPatternTree3 extends AIInteract {
     }
 
     private JSONArray checkAttackScenarios(String _spec, String attack, boolean isRoot,
-                                        Collection<String> _errors) throws org.json.JSONException {
+                                           Collection<String> _errors) throws org.json.JSONException {
         String attackName;
 
         if (isRoot) {
diff --git a/src/main/java/ai/CAPECTracer.java b/src/main/java/ai/CAPECTracer.java
index 931271fe85b5bd9fddd45bab63abe032ff307c15..bc76d56948e914ade45fa9ef9a3bf2a39d7c4407 100644
--- a/src/main/java/ai/CAPECTracer.java
+++ b/src/main/java/ai/CAPECTracer.java
@@ -89,9 +89,10 @@ public class CAPECTracer extends AIInteract {
                 characterInt = data.read();
             }
 
-            TraceManager.addDev(output.toString());
-            byte[] bytes = Files.readAllBytes(Path.of(capecTracerFolder + "/traced_capecs.txt"));
-            traces = new String(bytes);
+            if (!output.toString().contains("Error")) {
+                byte[] bytes = Files.readAllBytes(Path.of(capecTracerFolder + "/traced_capecs.txt"));
+                traces = new String(bytes);
+            }
         } catch (IOException | LauncherException e) {
             TraceManager.addDev(e.getMessage());
         }