diff --git a/.gitignore b/.gitignore
index e81733f..2c56417 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,6 @@ checkpoints/*
 cache/*
 save/*
 log/*
-
+models/*
 # file
 *.pyc
diff --git a/0.1.3 b/0.1.3
new file mode 100644
index 0000000..2407f31
--- /dev/null
+++ b/0.1.3
@@ -0,0 +1,4 @@
+Requirement already satisfied: dgl in /home2/jainit/miniconda3/envs/mistral/lib/python3.10/site-packages (0.1.3)
+Requirement already satisfied: numpy>=1.14.0 in /home2/jainit/miniconda3/envs/mistral/lib/python3.10/site-packages (from dgl) (1.26.3)
+Requirement already satisfied: scipy>=1.1.0 in /home2/jainit/miniconda3/envs/mistral/lib/python3.10/site-packages (from dgl) (1.11.4)
+Requirement already satisfied: networkx>=2.1 in /home2/jainit/miniconda3/envs/mistral/lib/python3.10/site-packages (from dgl) (3.2.1)
diff --git a/HiGraph.py b/HiGraph.py
index 9548618..e76c87a 100644
--- a/HiGraph.py
+++ b/HiGraph.py
@@ -67,7 +67,7 @@ def __init__(self, hps, embed):
         # sent -> word
         self.sent2word = WSWGAT(in_dim=hps.hidden_size,
                                 out_dim=embed_size,
-                                num_heads=6,
+                                num_heads=hps.n_head,
                                 attn_drop_out=hps.atten_dropout_prob,
                                 ffn_inner_hidden_size=hps.ffn_inner_hidden_size,
                                 ffn_drop_out=hps.ffn_dropout_prob,
diff --git a/evaluation.py b/evaluation.py
index f22cef7..e0a41b2 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -84,7 +84,7 @@ def run_test(model, dataset, loader, model_name, hps):
 
         for i, (G, index) in enumerate(loader):
             if hps.cuda:
-                G.to(torch.device("cuda"))
+                G.to(torch.device(0))
             tester.evaluation(G, index, dataset, blocking=hps.blocking)
 
     running_avg_loss = tester.running_avg_loss
diff --git a/logs/train_20240304_232832 b/logs/train_20240304_232832
new file mode 100644
index 0000000..0aad79c
--- /dev/null
+++ b/logs/train_20240304_232832
@@ -0,0 +1,12 @@
+2024-03-04 23:28:32,376 INFO    : Pytorch 2.1.2+cu121
+2024-03-04 23:28:32,376 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-04 23:28:32,441 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-04 23:28:32,442 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-04 23:28:33,288 INFO    : [INFO] Loading external word embedding...
+2024-03-04 23:28:55,669 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-04 23:28:56,428 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='model', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-04 23:28:56,521 INFO    : [MODEL] HeterSumGraph 
+2024-03-04 23:28:56,521 INFO    : [INFO] Start reading ExampleSet
+2024-03-04 23:29:04,974 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.453032, Total size is 287084
+2024-03-04 23:29:04,975 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-04 23:29:05,195 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240304_233425 b/logs/train_20240304_233425
new file mode 100644
index 0000000..3f1d616
--- /dev/null
+++ b/logs/train_20240304_233425
@@ -0,0 +1,12 @@
+2024-03-04 23:34:25,750 INFO    : Pytorch 2.1.2+cu121
+2024-03-04 23:34:25,750 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-04 23:34:25,820 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-04 23:34:25,820 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-04 23:34:26,728 INFO    : [INFO] Loading external word embedding...
+2024-03-04 23:34:48,983 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-04 23:34:49,800 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='model', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-04 23:34:49,894 INFO    : [MODEL] HeterSumGraph 
+2024-03-04 23:34:49,894 INFO    : [INFO] Start reading ExampleSet
+2024-03-04 23:34:58,247 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.353097, Total size is 287084
+2024-03-04 23:34:58,247 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-04 23:34:58,468 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240304_233757 b/logs/train_20240304_233757
new file mode 100644
index 0000000..a57693b
--- /dev/null
+++ b/logs/train_20240304_233757
@@ -0,0 +1,12 @@
+2024-03-04 23:37:57,657 INFO    : Pytorch 2.1.2+cu121
+2024-03-04 23:37:57,657 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-04 23:37:57,709 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-04 23:37:57,709 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-04 23:37:58,621 INFO    : [INFO] Loading external word embedding...
+2024-03-04 23:38:20,757 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-04 23:38:21,573 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='model', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-04 23:38:21,676 INFO    : [MODEL] HeterSumGraph 
+2024-03-04 23:38:21,676 INFO    : [INFO] Start reading ExampleSet
+2024-03-04 23:38:30,203 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.526127, Total size is 287084
+2024-03-04 23:38:30,203 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-04 23:38:30,435 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240307_224013 b/logs/train_20240307_224013
new file mode 100644
index 0000000..7ed0165
--- /dev/null
+++ b/logs/train_20240307_224013
@@ -0,0 +1,19 @@
+2024-03-07 22:40:13,242 INFO    : Pytorch 2.1.2+cu121
+2024-03-07 22:40:13,242 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-07 22:40:13,313 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-07 22:40:13,313 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-07 22:40:13,619 INFO    : [INFO] Loading external word embedding...
+2024-03-07 22:40:32,029 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-07 22:40:32,809 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-07 22:40:32,906 INFO    : [MODEL] HeterSumGraph 
+2024-03-07 22:40:32,906 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 22:40:39,757 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.850199, Total size is 287084
+2024-03-07 22:40:39,757 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 22:40:39,965 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-07 22:42:27,874 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 22:42:28,182 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.307474, Total size is 13367
+2024-03-07 22:42:28,182 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 22:42:28,436 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-07 22:42:41,313 INFO    : [INFO] Use cuda
+2024-03-07 22:42:41,314 INFO    : [INFO] Create new model for training...
+2024-03-07 22:42:41,314 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240307_224949 b/logs/train_20240307_224949
new file mode 100644
index 0000000..f99b531
--- /dev/null
+++ b/logs/train_20240307_224949
@@ -0,0 +1,19 @@
+2024-03-07 22:49:49,256 INFO    : Pytorch 2.1.2+cu121
+2024-03-07 22:49:49,256 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-07 22:49:49,322 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-07 22:49:49,322 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-07 22:49:49,519 INFO    : [INFO] Loading external word embedding...
+2024-03-07 22:50:07,671 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-07 22:50:08,462 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-07 22:50:08,564 INFO    : [MODEL] HeterSumGraph 
+2024-03-07 22:50:08,565 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 22:50:15,644 INFO    : [INFO] Finish reading ExampleSet. Total time is 7.079171, Total size is 287084
+2024-03-07 22:50:15,644 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 22:50:15,854 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-07 22:51:59,319 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 22:51:59,541 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.220881, Total size is 13367
+2024-03-07 22:51:59,541 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 22:51:59,754 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-07 22:52:09,072 INFO    : [INFO] Use cuda
+2024-03-07 22:52:09,073 INFO    : [INFO] Create new model for training...
+2024-03-07 22:52:09,073 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240307_230618 b/logs/train_20240307_230618
new file mode 100644
index 0000000..a71b3f2
--- /dev/null
+++ b/logs/train_20240307_230618
@@ -0,0 +1,19 @@
+2024-03-07 23:06:18,345 INFO    : Pytorch 2.1.2+cu121
+2024-03-07 23:06:18,346 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-07 23:06:18,412 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-07 23:06:18,412 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-07 23:06:18,591 INFO    : [INFO] Loading external word embedding...
+2024-03-07 23:06:37,131 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-07 23:06:37,935 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-07 23:06:38,036 INFO    : [MODEL] HeterSumGraph 
+2024-03-07 23:06:38,037 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 23:06:45,035 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.998097, Total size is 287084
+2024-03-07 23:06:45,035 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 23:06:45,273 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-07 23:08:30,094 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 23:08:30,322 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.227785, Total size is 13367
+2024-03-07 23:08:30,322 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 23:08:30,548 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-07 23:08:37,672 INFO    : [INFO] Use cuda
+2024-03-07 23:08:37,673 INFO    : [INFO] Create new model for training...
+2024-03-07 23:08:37,673 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240307_231149 b/logs/train_20240307_231149
new file mode 100644
index 0000000..6a97aa7
--- /dev/null
+++ b/logs/train_20240307_231149
@@ -0,0 +1,19 @@
+2024-03-07 23:11:49,495 INFO    : Pytorch 2.1.2+cu121
+2024-03-07 23:11:49,496 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-07 23:11:49,567 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-07 23:11:49,567 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-07 23:11:49,724 INFO    : [INFO] Loading external word embedding...
+2024-03-07 23:12:08,091 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-07 23:12:08,910 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-07 23:12:09,064 INFO    : [MODEL] HeterSumGraph 
+2024-03-07 23:12:09,064 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 23:12:17,707 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.642636, Total size is 287084
+2024-03-07 23:12:17,707 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 23:12:17,932 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-07 23:14:02,867 INFO    : [INFO] Start reading ExampleSet
+2024-03-07 23:14:05,248 INFO    : [INFO] Finish reading ExampleSet. Total time is 2.381086, Total size is 13367
+2024-03-07 23:14:05,248 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-07 23:14:05,481 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-07 23:14:12,124 INFO    : [INFO] Use cuda
+2024-03-07 23:14:12,124 INFO    : [INFO] Create new model for training...
+2024-03-07 23:14:12,125 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240307_235940 b/logs/train_20240307_235940
new file mode 100644
index 0000000..18c6079
--- /dev/null
+++ b/logs/train_20240307_235940
@@ -0,0 +1,19 @@
+2024-03-07 23:59:40,422 INFO    : Pytorch 2.1.2+cu121
+2024-03-07 23:59:40,423 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-07 23:59:40,479 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-07 23:59:40,479 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-07 23:59:40,631 INFO    : [INFO] Loading external word embedding...
+2024-03-08 00:00:03,782 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-08 00:00:04,691 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-08 00:00:04,796 INFO    : [MODEL] HeterSumGraph 
+2024-03-08 00:00:04,797 INFO    : [INFO] Start reading ExampleSet
+2024-03-08 00:00:13,417 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.620221, Total size is 287084
+2024-03-08 00:00:13,417 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-08 00:00:13,633 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-08 00:01:58,838 INFO    : [INFO] Start reading ExampleSet
+2024-03-08 00:02:02,842 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.002773, Total size is 13367
+2024-03-08 00:02:02,842 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-08 00:02:03,065 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-08 00:02:09,908 INFO    : [INFO] Use cuda
+2024-03-08 00:02:09,908 INFO    : [INFO] Create new model for training...
+2024-03-08 00:02:09,908 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240308_112151 b/logs/train_20240308_112151
new file mode 100644
index 0000000..4807462
--- /dev/null
+++ b/logs/train_20240308_112151
@@ -0,0 +1,19 @@
+2024-03-08 11:21:51,832 INFO    : Pytorch 2.1.2+cu121
+2024-03-08 11:21:51,833 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-08 11:21:51,906 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-08 11:21:51,907 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-08 11:21:52,062 INFO    : [INFO] Loading external word embedding...
+2024-03-08 11:22:15,261 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-08 11:22:16,068 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=64, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-08 11:22:16,167 INFO    : [MODEL] HeterSumGraph 
+2024-03-08 11:22:16,167 INFO    : [INFO] Start reading ExampleSet
+2024-03-08 11:22:24,647 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.479810, Total size is 287084
+2024-03-08 11:22:24,647 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-08 11:22:24,864 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-08 11:24:12,145 INFO    : [INFO] Start reading ExampleSet
+2024-03-08 11:24:19,772 INFO    : [INFO] Finish reading ExampleSet. Total time is 7.626825, Total size is 13367
+2024-03-08 11:24:19,772 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-08 11:24:20,012 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-08 11:24:31,287 INFO    : [INFO] Use cuda
+2024-03-08 11:24:31,288 INFO    : [INFO] Create new model for training...
+2024-03-08 11:24:31,288 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240308_140918 b/logs/train_20240308_140918
new file mode 100644
index 0000000..bda252c
--- /dev/null
+++ b/logs/train_20240308_140918
@@ -0,0 +1,19 @@
+2024-03-08 14:09:18,485 INFO    : Pytorch 2.1.2+cu121
+2024-03-08 14:09:18,485 INFO    : [INFO] Create Vocab, vocab path is /scratch/jainit/cache/CNNDM/vocab
+2024-03-08 14:09:18,538 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-08 14:09:18,538 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-08 14:09:18,735 INFO    : [INFO] Loading external word embedding...
+2024-03-08 14:09:41,286 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-08 14:09:42,065 INFO    : Namespace(data_dir='/scratch/jainit/cnndm', cache_dir='/scratch/jainit/cache/CNNDM', embedding_path='/scratch/jainit/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=300, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-08 14:09:42,173 INFO    : [MODEL] HeterSumGraph 
+2024-03-08 14:09:42,173 INFO    : [INFO] Start reading ExampleSet
+2024-03-08 14:09:50,610 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.436390, Total size is 287084
+2024-03-08 14:09:50,610 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-08 14:09:50,831 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-08 14:11:57,051 INFO    : [INFO] Start reading ExampleSet
+2024-03-08 14:12:01,182 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.130599, Total size is 13367
+2024-03-08 14:12:01,182 INFO    : [INFO] Loading filter word File /scratch/jainit/cache/CNNDM/filter_word.txt
+2024-03-08 14:12:01,404 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-08 14:12:13,354 INFO    : [INFO] Use cuda
+2024-03-08 14:12:13,354 INFO    : [INFO] Create new model for training...
+2024-03-08 14:12:13,354 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240310_140934 b/logs/train_20240310_140934
new file mode 100644
index 0000000..9183b0e
--- /dev/null
+++ b/logs/train_20240310_140934
@@ -0,0 +1,19 @@
+2024-03-10 14:09:34,340 INFO    : Pytorch 2.1.1+cu121
+2024-03-10 14:09:34,341 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
+2024-03-10 14:09:34,394 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-10 14:09:34,395 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-10 14:09:34,664 INFO    : [INFO] Loading external word embedding...
+2024-03-10 14:09:51,431 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-10 14:09:52,190 INFO    : Namespace(data_dir='/scratch/hitesh.goel/cnndm', cache_dir='/scratch/hitesh.goel/cache/CNNDM', embedding_path='/scratch/hitesh.goel/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=2, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=128, hidden_size=300, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-10 14:09:52,299 INFO    : [MODEL] HeterSumGraph 
+2024-03-10 14:09:52,299 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 14:09:58,440 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.141213, Total size is 287084
+2024-03-10 14:09:58,440 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 14:09:58,597 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-10 14:11:37,296 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 14:11:37,492 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.195708, Total size is 13367
+2024-03-10 14:11:37,492 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 14:11:37,653 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-10 14:11:46,202 INFO    : [INFO] Use cuda
+2024-03-10 14:11:46,203 INFO    : [INFO] Create new model for training...
+2024-03-10 14:11:46,203 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240310_142149 b/logs/train_20240310_142149
new file mode 100644
index 0000000..6b91630
--- /dev/null
+++ b/logs/train_20240310_142149
@@ -0,0 +1,19 @@
+2024-03-10 14:21:49,429 INFO    : Pytorch 2.1.1+cu121
+2024-03-10 14:21:49,430 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
+2024-03-10 14:21:49,479 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-10 14:21:49,479 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-10 14:21:49,685 INFO    : [INFO] Loading external word embedding...
+2024-03-10 14:22:06,497 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-10 14:22:07,264 INFO    : Namespace(data_dir='/scratch/hitesh.goel/cnndm', cache_dir='/scratch/hitesh.goel/cache/CNNDM', embedding_path='/scratch/hitesh.goel/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=2, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-10 14:22:07,370 INFO    : [MODEL] HeterSumGraph 
+2024-03-10 14:22:07,370 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 14:22:13,646 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.276629, Total size is 287084
+2024-03-10 14:22:13,647 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 14:22:13,805 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-10 14:23:53,305 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 14:23:53,503 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.196912, Total size is 13367
+2024-03-10 14:23:53,503 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 14:23:53,661 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-10 14:24:01,106 INFO    : [INFO] Use cuda
+2024-03-10 14:24:01,106 INFO    : [INFO] Create new model for training...
+2024-03-10 14:24:01,106 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240310_142553 b/logs/train_20240310_142553
new file mode 100644
index 0000000..73c2b02
--- /dev/null
+++ b/logs/train_20240310_142553
@@ -0,0 +1,19 @@
+2024-03-10 14:25:53,776 INFO    : Pytorch 2.1.1+cu121
+2024-03-10 14:25:53,776 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
+2024-03-10 14:25:53,825 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-10 14:25:53,825 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-10 14:25:54,019 INFO    : [INFO] Loading external word embedding...
+2024-03-10 14:26:10,977 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-10 14:26:11,736 INFO    : Namespace(data_dir='/scratch/hitesh.goel/cnndm', cache_dir='/scratch/hitesh.goel/cache/CNNDM', embedding_path='/scratch/hitesh.goel/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=2, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=150, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=8, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-10 14:26:11,856 INFO    : [MODEL] HeterSumGraph 
+2024-03-10 14:26:11,856 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 14:26:17,975 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.118471, Total size is 287084
+2024-03-10 14:26:17,975 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 14:26:18,129 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-10 14:27:55,952 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 14:27:56,149 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.196935, Total size is 13367
+2024-03-10 14:27:56,150 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 14:27:56,308 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-10 14:28:03,807 INFO    : [INFO] Use cuda
+2024-03-10 14:28:03,810 INFO    : [INFO] Create new model for training...
+2024-03-10 14:28:03,810 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240310_161935 b/logs/train_20240310_161935
new file mode 100644
index 0000000..b54459e
--- /dev/null
+++ b/logs/train_20240310_161935
@@ -0,0 +1,23 @@
+2024-03-10 16:19:35,597 INFO    : Pytorch 2.1.1+cu121
+2024-03-10 16:19:35,598 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
+2024-03-10 16:19:35,645 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-10 16:19:35,645 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-10 16:19:35,795 INFO    : [INFO] Loading external word embedding...
+2024-03-10 16:19:56,039 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-10 16:19:56,810 INFO    : Namespace(data_dir='/scratch/hitesh.goel/cnndm', cache_dir='/scratch/hitesh.goel/cache/CNNDM', embedding_path='/scratch/hitesh.goel/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=2, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-10 16:19:56,977 INFO    : [MODEL] HeterSumGraph 
+2024-03-10 16:19:56,978 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 16:20:05,946 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.968457, Total size is 287084
+2024-03-10 16:20:05,946 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 16:20:06,148 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-10 16:21:43,800 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 16:21:44,000 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.200338, Total size is 13367
+2024-03-10 16:21:44,000 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 16:21:44,160 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-10 16:21:51,842 INFO    : [INFO] Use cuda
+2024-03-10 16:21:51,842 INFO    : [INFO] Create new model for training...
+2024-03-10 16:21:51,842 INFO    : [INFO] Starting run_training
+2024-03-10 16:22:15,666 INFO    :        | end of iter   0 | time: 10.61s | train loss 0.1893 | 
+2024-03-10 16:23:26,119 INFO    :        | end of iter 100 | time:  0.62s | train loss 7.1916 | 
+2024-03-10 16:24:24,127 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-10 16:24:24,465 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240310_162550 b/logs/train_20240310_162550
new file mode 100644
index 0000000..18765e8
--- /dev/null
+++ b/logs/train_20240310_162550
@@ -0,0 +1,19 @@
+2024-03-10 16:25:50,971 INFO    : Pytorch 2.1.1+cu121
+2024-03-10 16:25:50,972 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
+2024-03-10 16:25:51,019 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-10 16:25:51,019 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-10 16:25:51,168 INFO    : [INFO] Loading external word embedding...
+2024-03-10 16:26:15,853 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-10 16:26:16,747 INFO    : Namespace(data_dir='/scratch/hitesh.goel/cnndm', cache_dir='/scratch/hitesh.goel/cache/CNNDM', embedding_path='/scratch/hitesh.goel/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-10 16:26:16,863 INFO    : [MODEL] HeterSumGraph 
+2024-03-10 16:26:16,863 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 16:26:26,793 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.929747, Total size is 287084
+2024-03-10 16:26:26,793 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 16:26:26,980 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-10 16:28:14,406 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 16:28:14,796 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.390079, Total size is 13367
+2024-03-10 16:28:14,797 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 16:28:14,989 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-10 16:28:22,335 INFO    : [INFO] Use cuda
+2024-03-10 16:28:22,335 INFO    : [INFO] Create new model for training...
+2024-03-10 16:28:22,346 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240310_163630 b/logs/train_20240310_163630
new file mode 100644
index 0000000..d78fde0
--- /dev/null
+++ b/logs/train_20240310_163630
@@ -0,0 +1,20 @@
+2024-03-10 16:36:30,884 INFO    : Pytorch 2.1.1+cu121
+2024-03-10 16:36:30,884 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
+2024-03-10 16:36:30,939 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-10 16:36:30,939 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-10 16:36:31,136 INFO    : [INFO] Loading external word embedding...
+2024-03-10 16:36:54,839 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-10 16:36:55,648 INFO    : Namespace(data_dir='/scratch/hitesh.goel/cnndm', cache_dir='/scratch/hitesh.goel/cache/CNNDM', embedding_path='/scratch/hitesh.goel/glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=16, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-10 16:36:55,761 INFO    : [MODEL] HeterSumGraph 
+2024-03-10 16:36:55,761 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 16:37:05,574 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.812226, Total size is 287084
+2024-03-10 16:37:05,574 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 16:37:05,757 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-10 16:38:52,554 INFO    : [INFO] Start reading ExampleSet
+2024-03-10 16:38:52,944 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.390236, Total size is 13367
+2024-03-10 16:38:52,944 INFO    : [INFO] Loading filter word File /scratch/hitesh.goel/cache/CNNDM/filter_word.txt
+2024-03-10 16:38:53,114 INFO    : [INFO] Loading word2sent TFIDF file from /scratch/hitesh.goel/cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-10 16:38:58,984 INFO    : [INFO] Use cuda
+2024-03-10 16:38:58,985 INFO    : [INFO] Create new model for training...
+2024-03-10 16:38:58,985 INFO    : [INFO] Starting run_training
+2024-03-10 16:39:38,820 INFO    :        | end of iter   0 | time: 18.29s | train loss 0.1737 | 
diff --git a/logs/train_20240313_195952 b/logs/train_20240313_195952
new file mode 100644
index 0000000..59ea223
--- /dev/null
+++ b/logs/train_20240313_195952
@@ -0,0 +1,22 @@
+2024-03-13 19:59:52,129 INFO    : Pytorch 2.1.2+cu121
+2024-03-13 19:59:52,130 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-13 19:59:52,176 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-13 19:59:52,176 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-13 19:59:52,375 INFO    : [INFO] Loading external word embedding...
+2024-03-13 20:00:12,817 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-13 20:00:13,640 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-13 20:00:13,783 INFO    : [MODEL] HeterSumGraph 
+2024-03-13 20:00:13,783 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:00:23,180 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.397234, Total size is 287084
+2024-03-13 20:00:23,180 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:00:23,460 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-13 20:02:09,453 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:02:14,373 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.919685, Total size is 13367
+2024-03-13 20:02:14,374 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:02:14,599 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-13 20:02:26,767 INFO    : [INFO] Use cuda
+2024-03-13 20:02:26,767 INFO    : [INFO] Create new model for training...
+2024-03-13 20:02:26,768 INFO    : [INFO] Starting run_training
+2024-03-13 20:03:11,189 INFO    :        | end of iter   0 | time: 18.72s | train loss 0.1739 | 
+2024-03-13 20:05:32,756 INFO    :        | end of iter 100 | time:  1.15s | train loss 7.1578 | 
+2024-03-13 20:07:43,187 INFO    :        | end of iter 200 | time:  6.88s | train loss 6.4838 | 
diff --git a/logs/train_20240313_202313 b/logs/train_20240313_202313
new file mode 100644
index 0000000..c76aec1
--- /dev/null
+++ b/logs/train_20240313_202313
@@ -0,0 +1,25 @@
+2024-03-13 20:23:13,538 INFO    : Pytorch 2.1.2+cu121
+2024-03-13 20:23:13,538 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-13 20:23:13,585 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-13 20:23:13,585 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-13 20:23:13,760 INFO    : [INFO] Loading external word embedding...
+2024-03-13 20:23:32,459 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-13 20:23:33,272 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=20, m=3)
+2024-03-13 20:23:33,376 INFO    : [MODEL] HeterSumGraph 
+2024-03-13 20:23:33,376 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:23:39,953 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.577035, Total size is 287084
+2024-03-13 20:23:39,953 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:23:40,167 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-13 20:25:26,784 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:25:31,111 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.326437, Total size is 13367
+2024-03-13 20:25:31,111 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:25:31,343 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-13 20:25:41,100 INFO    : [INFO] Use cuda
+2024-03-13 20:25:41,101 INFO    : [INFO] Create new model for training...
+2024-03-13 20:25:41,101 INFO    : [INFO] Starting run_training
+2024-03-13 20:26:08,337 INFO    :        | end of iter   0 | time: 13.49s | train loss 0.1739 | 
+2024-03-13 20:28:31,819 INFO    :        | end of iter 100 | time:  1.19s | train loss 7.0417 | 
+2024-03-13 20:30:42,476 INFO    :        | end of iter 200 | time:  1.27s | train loss 6.4820 | 
+2024-03-13 20:32:56,626 INFO    :        | end of iter 300 | time:  1.42s | train loss 6.2474 | 
+2024-03-13 20:35:09,709 INFO    :        | end of iter 400 | time:  1.40s | train loss 6.1955 | 
+2024-03-13 20:37:23,852 INFO    :        | end of iter 500 | time:  1.13s | train loss 6.0585 | 
diff --git a/logs/train_20240313_204048 b/logs/train_20240313_204048
new file mode 100644
index 0000000..b215c26
--- /dev/null
+++ b/logs/train_20240313_204048
@@ -0,0 +1,12 @@
+2024-03-13 20:40:48,152 INFO    : Pytorch 2.1.2+cu121
+2024-03-13 20:40:48,152 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-13 20:40:48,215 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-13 20:40:48,215 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-13 20:40:48,368 INFO    : [INFO] Loading external word embedding...
+2024-03-13 20:41:12,383 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-13 20:41:13,164 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-13 20:41:13,274 INFO    : [MODEL] HeterSumGraph 
+2024-03-13 20:41:13,274 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:41:22,807 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.532565, Total size is 287084
+2024-03-13 20:41:22,807 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:41:23,034 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240313_204348 b/logs/train_20240313_204348
new file mode 100644
index 0000000..1c94686
--- /dev/null
+++ b/logs/train_20240313_204348
@@ -0,0 +1,12 @@
+2024-03-13 20:43:48,968 INFO    : Pytorch 2.1.2+cu121
+2024-03-13 20:43:48,968 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-13 20:43:49,015 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-13 20:43:49,015 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-13 20:43:49,164 INFO    : [INFO] Loading external word embedding...
+2024-03-13 20:44:08,346 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-13 20:44:09,152 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-13 20:44:09,291 INFO    : [MODEL] HeterSumGraph 
+2024-03-13 20:44:09,291 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:44:16,201 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.909347, Total size is 287084
+2024-03-13 20:44:16,201 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:44:16,422 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240313_204751 b/logs/train_20240313_204751
new file mode 100644
index 0000000..4a24b5b
--- /dev/null
+++ b/logs/train_20240313_204751
@@ -0,0 +1,22 @@
+2024-03-13 20:47:51,519 INFO    : Pytorch 2.1.2+cu121
+2024-03-13 20:47:51,519 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-13 20:47:51,582 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-13 20:47:51,582 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-13 20:47:51,774 INFO    : [INFO] Loading external word embedding...
+2024-03-13 20:48:11,005 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-13 20:48:11,809 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-13 20:48:11,917 INFO    : [MODEL] HeterSumGraph 
+2024-03-13 20:48:11,917 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:48:18,687 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.770476, Total size is 287084
+2024-03-13 20:48:18,688 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:48:18,909 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-13 20:50:10,856 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 20:50:15,300 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.444042, Total size is 13367
+2024-03-13 20:50:15,300 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 20:50:15,537 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-13 20:50:24,413 INFO    : [INFO] Use cuda
+2024-03-13 20:50:24,414 INFO    : [INFO] Create new model for training...
+2024-03-13 20:50:24,414 INFO    : [INFO] Starting run_training
+2024-03-13 20:50:36,648 INFO    :        | end of iter   0 | time:  7.93s | train loss 0.1739 | 
+2024-03-13 20:54:57,146 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-13 20:54:57,280 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240313_210002 b/logs/train_20240313_210002
new file mode 100644
index 0000000..3887090
--- /dev/null
+++ b/logs/train_20240313_210002
@@ -0,0 +1,25 @@
+2024-03-13 21:00:02,215 INFO    : Pytorch 2.1.2+cu121
+2024-03-13 21:00:02,215 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-13 21:00:02,276 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-13 21:00:02,276 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-13 21:00:02,467 INFO    : [INFO] Loading external word embedding...
+2024-03-13 21:00:22,191 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-13 21:00:23,069 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-13 21:00:23,214 INFO    : [MODEL] HeterSumGraph 
+2024-03-13 21:00:23,214 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 21:00:29,787 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.573366, Total size is 287084
+2024-03-13 21:00:29,788 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 21:00:30,010 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-13 21:02:17,934 INFO    : [INFO] Start reading ExampleSet
+2024-03-13 21:02:22,055 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.120558, Total size is 13367
+2024-03-13 21:02:22,056 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-13 21:02:22,285 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-13 21:02:29,065 INFO    : [INFO] Use cuda
+2024-03-13 21:02:29,065 INFO    : [INFO] Create new model for training...
+2024-03-13 21:02:29,073 INFO    : [INFO] Starting run_training
+2024-03-13 21:02:35,444 INFO    :        | end of iter   0 | time:  2.85s | train loss 0.1739 | 
+2024-03-13 21:08:35,299 INFO    :        | end of iter 100 | time:  1.19s | train loss 7.1492 | 
+2024-03-13 21:14:35,217 INFO    :        | end of iter 200 | time:  1.25s | train loss 6.4774 | 
+2024-03-13 21:20:35,631 INFO    :        | end of iter 300 | time:  1.44s | train loss 6.3574 | 
+2024-03-13 21:25:29,217 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-13 21:25:29,363 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240314_111137 b/logs/train_20240314_111137
new file mode 100644
index 0000000..29c4ed0
--- /dev/null
+++ b/logs/train_20240314_111137
@@ -0,0 +1,22 @@
+2024-03-14 11:11:37,782 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 11:11:37,782 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 11:11:37,846 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 11:11:37,846 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 11:11:38,036 INFO    : [INFO] Loading external word embedding...
+2024-03-14 11:11:57,228 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 11:11:57,996 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=8, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 11:11:58,103 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 11:11:58,104 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 11:12:04,502 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.397902, Total size is 287084
+2024-03-14 11:12:04,502 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 11:12:04,713 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-14 11:13:47,866 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 11:13:52,082 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.216200, Total size is 13367
+2024-03-14 11:13:52,082 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 11:13:52,305 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-14 11:14:03,225 INFO    : [INFO] Use cuda
+2024-03-14 11:14:03,225 INFO    : [INFO] Create new model for training...
+2024-03-14 11:14:03,232 INFO    : [INFO] Starting run_training
+2024-03-14 11:14:08,851 INFO    :        | end of iter   0 | time:  2.60s | train loss 0.1739 | 
+2024-03-14 11:15:04,051 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-14 11:15:04,186 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240314_112007 b/logs/train_20240314_112007
new file mode 100644
index 0000000..b7b9749
--- /dev/null
+++ b/logs/train_20240314_112007
@@ -0,0 +1,22 @@
+2024-03-14 11:20:07,558 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 11:20:07,559 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 11:20:07,623 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 11:20:07,624 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 11:20:07,809 INFO    : [INFO] Loading external word embedding...
+2024-03-14 11:20:26,041 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 11:20:26,858 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 11:20:27,003 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 11:20:27,003 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 11:20:33,495 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.491271, Total size is 287084
+2024-03-14 11:20:33,495 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 11:20:33,706 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-14 11:22:18,447 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 11:22:22,713 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.266135, Total size is 13367
+2024-03-14 11:22:22,713 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 11:22:22,944 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-14 11:22:29,547 INFO    : [INFO] Use cuda
+2024-03-14 11:22:29,548 INFO    : [INFO] Create new model for training...
+2024-03-14 11:22:29,555 INFO    : [INFO] Starting run_training
+2024-03-14 11:22:42,343 INFO    :        | end of iter   0 | time:  3.20s | train loss 0.1813 | 
+2024-03-14 11:22:47,892 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-14 11:22:48,030 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240314_112542 b/logs/train_20240314_112542
new file mode 100644
index 0000000..416aff9
--- /dev/null
+++ b/logs/train_20240314_112542
@@ -0,0 +1,22 @@
+2024-03-14 11:25:42,802 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 11:25:42,802 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 11:25:42,863 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 11:25:42,863 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 11:25:43,055 INFO    : [INFO] Loading external word embedding...
+2024-03-14 11:26:02,864 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 11:26:03,754 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 11:26:03,898 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 11:26:03,898 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 11:26:10,748 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.849214, Total size is 287084
+2024-03-14 11:26:10,748 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 11:26:10,970 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-14 11:28:08,251 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 11:28:12,489 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.237693, Total size is 13367
+2024-03-14 11:28:12,489 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 11:28:12,727 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-14 11:28:21,784 INFO    : [INFO] Use cuda
+2024-03-14 11:28:21,784 INFO    : [INFO] Create new model for training...
+2024-03-14 11:28:21,792 INFO    : [INFO] Starting run_training
+2024-03-14 11:29:02,183 INFO    :        | end of iter   0 | time:  4.66s | train loss 0.1832 | 
+2024-03-14 11:29:10,893 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-14 11:29:11,031 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240314_151316 b/logs/train_20240314_151316
new file mode 100644
index 0000000..48fd675
--- /dev/null
+++ b/logs/train_20240314_151316
@@ -0,0 +1,21 @@
+2024-03-14 15:13:16,254 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 15:13:16,255 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 15:13:16,318 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 15:13:16,318 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 15:13:16,467 INFO    : [INFO] Loading external word embedding...
+2024-03-14 15:13:34,650 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 15:13:35,420 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 15:13:35,522 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 15:13:35,522 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 15:13:41,978 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.455553, Total size is 287084
+2024-03-14 15:13:41,978 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 15:13:42,216 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-14 15:15:26,866 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 15:15:31,147 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.280194, Total size is 13367
+2024-03-14 15:15:31,147 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 15:15:31,371 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-14 15:15:42,423 INFO    : [INFO] Use cuda
+2024-03-14 15:15:42,423 INFO    : [INFO] Create new model for training...
+2024-03-14 15:15:42,431 INFO    : [INFO] Starting run_training
+2024-03-14 15:15:49,460 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-14 15:15:49,597 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240314_155550 b/logs/train_20240314_155550
new file mode 100644
index 0000000..8300760
--- /dev/null
+++ b/logs/train_20240314_155550
@@ -0,0 +1,22 @@
+2024-03-14 15:55:50,705 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 15:55:50,706 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 15:55:50,769 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 15:55:50,769 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 15:55:50,963 INFO    : [INFO] Loading external word embedding...
+2024-03-14 15:56:09,002 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 15:56:09,874 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 15:56:10,021 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 15:56:10,021 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 15:56:16,430 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.409096, Total size is 287084
+2024-03-14 15:56:16,430 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 15:56:16,646 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-14 16:01:11,881 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 16:01:12,110 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.228669, Total size is 13367
+2024-03-14 16:01:12,110 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 16:01:12,344 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-14 16:01:28,645 INFO    : [INFO] Use cuda
+2024-03-14 16:01:28,645 INFO    : [INFO] Create new model for training...
+2024-03-14 16:01:28,654 INFO    : [INFO] Starting run_training
+2024-03-14 16:02:13,094 INFO    :        | end of iter   0 | time:  7.62s | train loss 0.1832 | 
+2024-03-14 16:02:23,146 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-14 16:02:23,285 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240314_160735 b/logs/train_20240314_160735
new file mode 100644
index 0000000..049ac50
--- /dev/null
+++ b/logs/train_20240314_160735
@@ -0,0 +1,12 @@
+2024-03-14 16:07:35,172 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 16:07:35,172 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 16:07:35,228 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 16:07:35,228 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 16:07:35,403 INFO    : [INFO] Loading external word embedding...
+2024-03-14 16:07:53,687 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 16:07:54,495 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 16:07:54,600 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 16:07:54,601 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 16:08:01,360 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.759209, Total size is 287084
+2024-03-14 16:08:01,360 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 16:08:01,592 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240314_161103 b/logs/train_20240314_161103
new file mode 100644
index 0000000..a260d18
--- /dev/null
+++ b/logs/train_20240314_161103
@@ -0,0 +1,12 @@
+2024-03-14 16:11:03,388 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 16:11:03,389 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 16:11:03,454 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 16:11:03,454 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 16:11:03,645 INFO    : [INFO] Loading external word embedding...
+2024-03-14 16:11:22,047 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 16:11:22,881 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 16:11:22,986 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 16:11:22,986 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 16:11:29,582 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.595914, Total size is 287084
+2024-03-14 16:11:29,582 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 16:11:29,795 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240314_163303 b/logs/train_20240314_163303
new file mode 100644
index 0000000..b355ea6
--- /dev/null
+++ b/logs/train_20240314_163303
@@ -0,0 +1,12 @@
+2024-03-14 16:33:03,061 INFO    : Pytorch 2.1.2+cu121
+2024-03-14 16:33:03,062 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-14 16:33:03,127 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-14 16:33:03,127 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-14 16:33:03,319 INFO    : [INFO] Loading external word embedding...
+2024-03-14 16:33:21,773 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-14 16:33:22,590 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-14 16:33:22,736 INFO    : [MODEL] HeterSumGraph 
+2024-03-14 16:33:22,737 INFO    : [INFO] Start reading ExampleSet
+2024-03-14 16:33:29,354 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.617285, Total size is 287084
+2024-03-14 16:33:29,354 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-14 16:33:29,566 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240316_230148 b/logs/train_20240316_230148
new file mode 100644
index 0000000..dcc8358
--- /dev/null
+++ b/logs/train_20240316_230148
@@ -0,0 +1,12 @@
+2024-03-16 23:01:48,952 INFO    : Pytorch 2.1.2+cu121
+2024-03-16 23:01:48,952 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-16 23:01:49,018 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-16 23:01:49,018 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-16 23:01:49,210 INFO    : [INFO] Loading external word embedding...
+2024-03-16 23:02:08,518 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-16 23:02:09,442 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=0, m=3)
+2024-03-16 23:02:09,565 INFO    : [MODEL] HeterSumGraph 
+2024-03-16 23:02:09,565 INFO    : [INFO] Start reading ExampleSet
+2024-03-16 23:02:16,269 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.704105, Total size is 287084
+2024-03-16 23:02:16,269 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-16 23:02:16,493 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
diff --git a/logs/train_20240316_231022 b/logs/train_20240316_231022
new file mode 100644
index 0000000..9cb1b45
--- /dev/null
+++ b/logs/train_20240316_231022
@@ -0,0 +1,2 @@
+2024-03-16 23:10:22,494 INFO    : Pytorch 2.1.2+cu121
+2024-03-16 23:10:22,494 INFO    : [INFO] Create Vocab, vocab path is /scratch/hitesh.goel/cache/CNNDM/vocab
diff --git a/logs/train_20240316_231106 b/logs/train_20240316_231106
new file mode 100644
index 0000000..136b757
--- /dev/null
+++ b/logs/train_20240316_231106
@@ -0,0 +1,21 @@
+2024-03-16 23:11:06,642 INFO    : Pytorch 2.1.2+cu121
+2024-03-16 23:11:06,642 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-16 23:11:06,712 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-16 23:11:06,713 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-16 23:11:06,908 INFO    : [INFO] Loading external word embedding...
+2024-03-16 23:11:26,092 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-16 23:11:27,019 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, m=3)
+2024-03-16 23:11:27,132 INFO    : [MODEL] HeterSumGraph 
+2024-03-16 23:11:27,133 INFO    : [INFO] Start reading ExampleSet
+2024-03-16 23:11:33,662 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.529430, Total size is 287084
+2024-03-16 23:11:33,662 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-16 23:11:33,874 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-16 23:13:20,587 INFO    : [INFO] Start reading ExampleSet
+2024-03-16 23:13:24,599 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.011803, Total size is 13367
+2024-03-16 23:13:24,599 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-16 23:13:24,831 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-16 23:13:31,435 INFO    : [INFO] Use cuda
+2024-03-16 23:13:31,435 INFO    : [INFO] Create new model for training...
+2024-03-16 23:13:31,444 INFO    : [INFO] Starting run_training
+2024-03-16 23:13:54,507 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-16 23:13:54,928 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240316_231440 b/logs/train_20240316_231440
new file mode 100644
index 0000000..009f66b
--- /dev/null
+++ b/logs/train_20240316_231440
@@ -0,0 +1,20 @@
+2024-03-16 23:14:40,515 INFO    : Pytorch 2.1.2+cu121
+2024-03-16 23:14:40,515 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-16 23:14:40,577 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-16 23:14:40,578 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-16 23:14:40,768 INFO    : [INFO] Loading external word embedding...
+2024-03-16 23:15:00,034 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-16 23:15:00,824 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-16 23:15:00,966 INFO    : [MODEL] HeterSumGraph 
+2024-03-16 23:15:00,966 INFO    : [INFO] Start reading ExampleSet
+2024-03-16 23:15:07,875 INFO    : [INFO] Finish reading ExampleSet. Total time is 6.908815, Total size is 287084
+2024-03-16 23:15:07,875 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-16 23:15:08,094 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-16 23:16:54,936 INFO    : [INFO] Start reading ExampleSet
+2024-03-16 23:16:58,811 INFO    : [INFO] Finish reading ExampleSet. Total time is 3.874347, Total size is 13367
+2024-03-16 23:16:58,811 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-16 23:16:59,043 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-16 23:17:07,411 INFO    : [INFO] Use cuda
+2024-03-16 23:17:07,411 INFO    : [INFO] Create new model for training...
+2024-03-16 23:17:07,420 INFO    : [INFO] Starting run_training
+2024-03-16 23:17:41,298 INFO    :        | end of iter   0 | time:  7.46s | train loss 0.1813 | 
diff --git a/logs/train_20240318_120234 b/logs/train_20240318_120234
new file mode 100644
index 0000000..469c213
--- /dev/null
+++ b/logs/train_20240318_120234
@@ -0,0 +1,20 @@
+2024-03-18 12:02:34,617 INFO    : Pytorch 2.1.2+cu121
+2024-03-18 12:02:34,617 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-18 12:02:34,666 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-18 12:02:34,666 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-18 12:02:34,860 INFO    : [INFO] Loading external word embedding...
+2024-03-18 12:02:55,096 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-18 12:02:55,964 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-18 12:02:56,082 INFO    : [MODEL] HeterSumGraph 
+2024-03-18 12:02:56,083 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 12:03:04,467 INFO    : [INFO] Finish reading ExampleSet. Total time is 8.384569, Total size is 287084
+2024-03-18 12:03:04,467 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 12:03:04,690 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-18 12:04:51,574 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 12:04:55,817 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.242370, Total size is 13367
+2024-03-18 12:04:55,817 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 12:04:56,062 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-18 12:05:07,494 INFO    : [INFO] Use cuda
+2024-03-18 12:05:07,495 INFO    : [INFO] Create new model for training...
+2024-03-18 12:05:07,495 INFO    : [INFO] Starting run_training
+2024-03-18 12:05:47,371 INFO    :        | end of iter   0 | time: 10.69s | train loss 0.1813 | 
diff --git a/logs/train_20240318_123528 b/logs/train_20240318_123528
new file mode 100644
index 0000000..2204089
--- /dev/null
+++ b/logs/train_20240318_123528
@@ -0,0 +1,20 @@
+2024-03-18 12:35:28,802 INFO    : Pytorch 2.1.2+cu121
+2024-03-18 12:35:28,802 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-18 12:35:28,869 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-18 12:35:28,869 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-18 12:35:29,023 INFO    : [INFO] Loading external word embedding...
+2024-03-18 12:35:52,400 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-18 12:35:53,225 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-18 12:35:53,374 INFO    : [MODEL] HeterSumGraph 
+2024-03-18 12:35:53,374 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 12:36:02,736 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.362411, Total size is 287084
+2024-03-18 12:36:02,737 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 12:36:02,962 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-18 12:37:48,226 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 12:37:52,199 INFO    : [INFO] Finish reading ExampleSet. Total time is 3.973138, Total size is 13367
+2024-03-18 12:37:52,199 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 12:37:52,419 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-18 12:38:03,720 INFO    : [INFO] Use cuda
+2024-03-18 12:38:03,720 INFO    : [INFO] Create new model for training...
+2024-03-18 12:38:03,720 INFO    : [INFO] Starting run_training
+2024-03-18 12:38:40,814 INFO    :        | end of iter   0 | time:  8.82s | train loss 0.1813 | 
diff --git a/logs/train_20240318_124925 b/logs/train_20240318_124925
new file mode 100644
index 0000000..a365471
--- /dev/null
+++ b/logs/train_20240318_124925
@@ -0,0 +1,20 @@
+2024-03-18 12:49:25,248 INFO    : Pytorch 2.1.2+cu121
+2024-03-18 12:49:25,249 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-18 12:49:25,297 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-18 12:49:25,297 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-18 12:49:25,445 INFO    : [INFO] Loading external word embedding...
+2024-03-18 12:49:49,111 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-18 12:49:49,922 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-18 12:49:50,055 INFO    : [MODEL] HeterSumGraph 
+2024-03-18 12:49:50,055 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 12:49:59,393 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.337595, Total size is 287084
+2024-03-18 12:49:59,393 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 12:49:59,623 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-18 12:51:45,027 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 12:51:49,547 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.519422, Total size is 13367
+2024-03-18 12:51:49,547 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 12:51:49,776 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-18 12:52:01,017 INFO    : [INFO] Use cuda
+2024-03-18 12:52:01,017 INFO    : [INFO] Create new model for training...
+2024-03-18 12:52:01,018 INFO    : [INFO] Starting run_training
+2024-03-18 12:52:36,853 INFO    :        | end of iter   0 | time:  8.16s | train loss 0.1813 | 
diff --git a/logs/train_20240318_140517 b/logs/train_20240318_140517
new file mode 100644
index 0000000..c3ef6d3
--- /dev/null
+++ b/logs/train_20240318_140517
@@ -0,0 +1,22 @@
+2024-03-18 14:05:17,845 INFO    : Pytorch 2.1.2+cu121
+2024-03-18 14:05:17,845 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-18 14:05:17,910 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-18 14:05:17,910 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-18 14:05:18,100 INFO    : [INFO] Loading external word embedding...
+2024-03-18 14:05:41,684 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-18 14:05:42,445 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-18 14:05:42,548 INFO    : [MODEL] HeterSumGraph 
+2024-03-18 14:05:42,548 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 14:05:52,005 INFO    : [INFO] Finish reading ExampleSet. Total time is 9.456998, Total size is 287084
+2024-03-18 14:05:52,005 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 14:05:52,248 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-18 14:07:37,704 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 14:07:41,925 INFO    : [INFO] Finish reading ExampleSet. Total time is 4.220155, Total size is 13367
+2024-03-18 14:07:41,925 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 14:07:42,150 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-18 14:07:53,295 INFO    : [INFO] Use cuda
+2024-03-18 14:07:53,295 INFO    : [INFO] Create new model for training...
+2024-03-18 14:07:53,295 INFO    : [INFO] Starting run_training
+2024-03-18 14:08:07,919 INFO    :        | end of iter   0 | time:  4.48s | train loss 0.1813 | 
+2024-03-18 14:16:44,467 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-18 14:16:44,604 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240318_141726 b/logs/train_20240318_141726
new file mode 100644
index 0000000..f25aee1
--- /dev/null
+++ b/logs/train_20240318_141726
@@ -0,0 +1,25 @@
+2024-03-18 14:17:26,934 INFO    : Pytorch 2.1.2+cu121
+2024-03-18 14:17:26,934 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-18 14:17:26,998 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-18 14:17:26,998 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-18 14:17:27,203 INFO    : [INFO] Loading external word embedding...
+2024-03-18 14:17:45,779 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-18 14:17:46,587 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-18 14:17:46,692 INFO    : [MODEL] HeterSumGraph 
+2024-03-18 14:17:46,692 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 14:17:53,709 INFO    : [INFO] Finish reading ExampleSet. Total time is 7.016949, Total size is 1000
+2024-03-18 14:17:53,710 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 14:17:53,919 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-18 14:19:40,000 INFO    : [INFO] Start reading ExampleSet
+2024-03-18 14:19:40,252 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.251016, Total size is 1000
+2024-03-18 14:19:40,252 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-18 14:19:40,462 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-18 14:19:51,486 INFO    : [INFO] Use cuda
+2024-03-18 14:19:51,486 INFO    : [INFO] Create new model for training...
+2024-03-18 14:19:51,494 INFO    : [INFO] Starting run_training
+2024-03-18 14:20:06,000 INFO    :        | end of iter   0 | time:  2.79s | train loss 0.2065 | 
+2024-03-18 14:25:22,459 INFO    : [INFO] The learning rate now is 0.000250
+2024-03-18 14:25:22,460 INFO    :    | end of epoch   1 | time: 328.11s | epoch train loss 8.7745 | 
+2024-03-18 14:25:22,460 INFO    : [INFO] Found new best model with 8.774 running_train_loss. Saving to models/train/bestmodel
+2024-03-18 14:25:22,600 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-18 14:25:22,601 INFO    : [INFO] Starting eval for this model ...
diff --git a/logs/train_20240319_112415 b/logs/train_20240319_112415
new file mode 100644
index 0000000..60f197e
--- /dev/null
+++ b/logs/train_20240319_112415
@@ -0,0 +1,25 @@
+2024-03-19 11:24:15,983 INFO    : Pytorch 2.1.2+cu121
+2024-03-19 11:24:15,984 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-19 11:24:16,049 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-19 11:24:16,049 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-19 11:24:16,246 INFO    : [INFO] Loading external word embedding...
+2024-03-19 11:24:34,768 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-19 11:24:35,597 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-19 11:24:35,728 INFO    : [MODEL] HeterSumGraph 
+2024-03-19 11:24:35,728 INFO    : [INFO] Start reading ExampleSet
+2024-03-19 11:24:42,857 INFO    : [INFO] Finish reading ExampleSet. Total time is 7.128790, Total size is 1000
+2024-03-19 11:24:42,857 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-19 11:24:43,071 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-19 11:26:27,389 INFO    : [INFO] Start reading ExampleSet
+2024-03-19 11:26:27,625 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.236137, Total size is 1000
+2024-03-19 11:26:27,625 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-19 11:26:27,832 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-19 11:26:38,189 INFO    : [INFO] Use cuda
+2024-03-19 11:26:38,189 INFO    : [INFO] Create new model for training...
+2024-03-19 11:26:38,204 INFO    : [INFO] Starting run_training
+2024-03-19 11:26:52,687 INFO    :        | end of iter   0 | time:  2.71s | train loss 0.2065 | 
+2024-03-19 11:32:06,338 INFO    : [INFO] The learning rate now is 0.000250
+2024-03-19 11:32:06,339 INFO    :    | end of epoch   1 | time: 325.20s | epoch train loss 8.7744 | 
+2024-03-19 11:32:06,339 INFO    : [INFO] Found new best model with 8.774 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 11:32:06,450 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 11:32:06,450 INFO    : [INFO] Starting eval for this model ...
diff --git a/logs/train_20240319_122337 b/logs/train_20240319_122337
new file mode 100644
index 0000000..7313bdf
--- /dev/null
+++ b/logs/train_20240319_122337
@@ -0,0 +1,195 @@
+2024-03-19 12:23:37,048 INFO    : Pytorch 2.1.2+cu121
+2024-03-19 12:23:37,049 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-19 12:23:37,117 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-19 12:23:37,117 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-19 12:23:37,311 INFO    : [INFO] Loading external word embedding...
+2024-03-19 12:23:56,354 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-19 12:23:57,141 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-19 12:23:57,246 INFO    : [MODEL] HeterSumGraph 
+2024-03-19 12:23:57,246 INFO    : [INFO] Start reading ExampleSet
+2024-03-19 12:24:04,374 INFO    : [INFO] Finish reading ExampleSet. Total time is 7.127745, Total size is 1000
+2024-03-19 12:24:04,374 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-19 12:24:04,588 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-19 12:25:49,499 INFO    : [INFO] Start reading ExampleSet
+2024-03-19 12:25:49,732 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.232753, Total size is 1000
+2024-03-19 12:25:49,732 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-19 12:25:49,939 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-19 12:26:00,228 INFO    : [INFO] Use cuda
+2024-03-19 12:26:00,228 INFO    : [INFO] Create new model for training...
+2024-03-19 12:26:00,239 INFO    : [INFO] Starting run_training
+2024-03-19 12:26:14,354 INFO    :        | end of iter   0 | time:  2.95s | train loss 0.2065 | 
+2024-03-19 12:31:29,322 INFO    : [INFO] The learning rate now is 0.000250
+2024-03-19 12:31:29,322 INFO    :    | end of epoch   1 | time: 326.83s | epoch train loss 8.7745 | 
+2024-03-19 12:31:29,322 INFO    : [INFO] Found new best model with 8.774 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 12:31:29,439 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 12:31:29,439 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 12:38:59,413 INFO    : [INFO] End of valid | time: 449.97s | valid loss 6.3562 | 
+2024-03-19 12:38:59,413 INFO    : Rouge1:
+	p:0.357692, r:0.514719, f:0.409876
+Rouge2:
+	p:0.148435, r:0.228899, f:0.172815
+Rougel:
+	p:0.323567, r:0.465179, f:0.370607
+
+2024-03-19 12:38:59,413 INFO    : [INFO] Validset match_true 822, pred 3000, true 2229, total 29835, match 26250
+2024-03-19 12:38:59,414 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.879839, precision is 0.274000, recall is 0.368775, F is 0.314400
+2024-03-19 12:38:59,414 INFO    : [INFO] Found new best model with 6.356172 running_avg_loss. The original loss is None, Saving to models/eval/bestmodel_0
+2024-03-19 12:38:59,525 INFO    : [INFO] Found new best model with 0.314400 F. The original F is None, Saving to models/eval/bestFmodel
+2024-03-19 12:39:09,557 INFO    :        | end of iter   0 | time:  1.86s | train loss 0.0682 | 
+2024-03-19 12:44:24,996 INFO    : [INFO] The learning rate now is 0.000167
+2024-03-19 12:44:24,997 INFO    :    | end of epoch   2 | time: 325.36s | epoch train loss 6.2774 | 
+2024-03-19 12:44:24,997 INFO    : [INFO] Found new best model with 6.277 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 12:44:25,124 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 12:44:25,124 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 12:51:54,253 INFO    : [INFO] End of valid | time: 449.13s | valid loss 6.2358 | 
+2024-03-19 12:51:54,253 INFO    : Rouge1:
+	p:0.378497, r:0.497024, f:0.417855
+Rouge2:
+	p:0.160050, r:0.223750, f:0.179303
+Rougel:
+	p:0.342519, r:0.449423, f:0.377969
+
+2024-03-19 12:51:54,253 INFO    : [INFO] Validset match_true 891, pred 3000, true 2229, total 29835, match 26388
+2024-03-19 12:51:54,254 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.884465, precision is 0.297000, recall is 0.399731, F is 0.340792
+2024-03-19 12:51:54,254 INFO    : [INFO] Found new best model with 6.235840 running_avg_loss. The original loss is 6.356172, Saving to models/eval/bestmodel_1
+2024-03-19 12:51:54,366 INFO    : [INFO] Found new best model with 0.340792 F. The original F is 0.314400, Saving to models/eval/bestFmodel
+2024-03-19 12:52:04,947 INFO    :        | end of iter   0 | time:  1.84s | train loss 0.0673 | 
+2024-03-19 12:57:19,493 INFO    : [INFO] The learning rate now is 0.000125
+2024-03-19 12:57:19,493 INFO    :    | end of epoch   3 | time: 325.00s | epoch train loss 6.1596 | 
+2024-03-19 12:57:19,493 INFO    : [INFO] Found new best model with 6.160 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 12:57:19,858 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 12:57:19,858 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 13:04:53,674 INFO    : [INFO] End of valid | time: 453.82s | valid loss 6.0526 | 
+2024-03-19 13:04:53,674 INFO    : Rouge1:
+	p:0.389004, r:0.482969, f:0.419111
+Rouge2:
+	p:0.165013, r:0.217756, f:0.180522
+Rougel:
+	p:0.352666, r:0.437124, f:0.379601
+
+2024-03-19 13:04:53,675 INFO    : [INFO] Validset match_true 921, pred 3000, true 2229, total 29835, match 26448
+2024-03-19 13:04:53,675 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.886476, precision is 0.307000, recall is 0.413190, F is 0.352266
+2024-03-19 13:04:53,675 INFO    : [INFO] Found new best model with 6.052581 running_avg_loss. The original loss is 6.235840, Saving to models/eval/bestmodel_2
+2024-03-19 13:04:53,786 INFO    : [INFO] Found new best model with 0.352266 F. The original F is 0.340792, Saving to models/eval/bestFmodel
+2024-03-19 13:05:03,620 INFO    :        | end of iter   0 | time:  1.72s | train loss 0.0608 | 
+2024-03-19 13:10:16,472 INFO    : [INFO] The learning rate now is 0.000100
+2024-03-19 13:10:16,473 INFO    :    | end of epoch   4 | time: 322.57s | epoch train loss 6.0296 | 
+2024-03-19 13:10:16,473 INFO    : [INFO] Found new best model with 6.030 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 13:10:16,602 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 13:10:16,602 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 13:17:49,581 INFO    : [INFO] End of valid | time: 452.98s | valid loss 6.0084 | 
+2024-03-19 13:17:49,581 INFO    : Rouge1:
+	p:0.392684, r:0.475958, f:0.418339
+Rouge2:
+	p:0.167677, r:0.214560, f:0.180730
+Rougel:
+	p:0.355335, r:0.429879, f:0.378152
+
+2024-03-19 13:17:49,581 INFO    : [INFO] Validset match_true 923, pred 3000, true 2229, total 29835, match 26452
+2024-03-19 13:17:49,582 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.886610, precision is 0.307667, recall is 0.414087, F is 0.353031
+2024-03-19 13:17:49,582 INFO    : [INFO] Found new best model with 6.008358 running_avg_loss. The original loss is 6.052581, Saving to models/eval/bestmodel_0
+2024-03-19 13:17:49,706 INFO    : [INFO] Found new best model with 0.353031 F. The original F is 0.352266, Saving to models/eval/bestFmodel
+2024-03-19 13:18:00,790 INFO    :        | end of iter   0 | time:  1.73s | train loss 0.0650 | 
+2024-03-19 13:23:15,453 INFO    : [INFO] The learning rate now is 0.000083
+2024-03-19 13:23:15,454 INFO    :    | end of epoch   5 | time: 325.63s | epoch train loss 5.9968 | 
+2024-03-19 13:23:15,454 INFO    : [INFO] Found new best model with 5.997 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 13:23:15,692 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 13:23:15,693 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 13:30:46,781 INFO    : [INFO] End of valid | time: 451.09s | valid loss 6.0079 | 
+2024-03-19 13:30:46,781 INFO    : Rouge1:
+	p:0.382643, r:0.501072, f:0.422175
+Rouge2:
+	p:0.162434, r:0.227737, f:0.182310
+Rougel:
+	p:0.346282, r:0.452796, f:0.381743
+
+2024-03-19 13:30:46,781 INFO    : [INFO] Validset match_true 935, pred 3000, true 2229, total 29835, match 26476
+2024-03-19 13:30:46,781 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.887414, precision is 0.311667, recall is 0.419471, F is 0.357621
+2024-03-19 13:30:46,781 INFO    : [INFO] Found new best model with 6.007932 running_avg_loss. The original loss is 6.008358, Saving to models/eval/bestmodel_1
+2024-03-19 13:30:46,906 INFO    : [INFO] Found new best model with 0.357621 F. The original F is 0.353031, Saving to models/eval/bestFmodel
+2024-03-19 13:30:57,764 INFO    :        | end of iter   0 | time:  1.86s | train loss 0.0588 | 
+2024-03-19 13:36:12,919 INFO    : [INFO] The learning rate now is 0.000071
+2024-03-19 13:36:12,920 INFO    :    | end of epoch   6 | time: 325.89s | epoch train loss 5.8848 | 
+2024-03-19 13:36:12,920 INFO    : [INFO] Found new best model with 5.885 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 13:36:13,047 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 13:36:13,048 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 13:43:49,161 INFO    : [INFO] End of valid | time: 456.11s | valid loss 5.9862 | 
+2024-03-19 13:43:49,161 INFO    : Rouge1:
+	p:0.391962, r:0.490211, f:0.423493
+Rouge2:
+	p:0.167892, r:0.223725, f:0.184290
+Rougel:
+	p:0.354859, r:0.443051, f:0.383058
+
+2024-03-19 13:43:49,161 INFO    : [INFO] Validset match_true 943, pred 3000, true 2229, total 29835, match 26492
+2024-03-19 13:43:49,161 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.887950, precision is 0.314333, recall is 0.423060, F is 0.360681
+2024-03-19 13:43:49,161 INFO    : [INFO] Found new best model with 5.986159 running_avg_loss. The original loss is 6.007932, Saving to models/eval/bestmodel_2
+2024-03-19 13:43:49,290 INFO    : [INFO] Found new best model with 0.360681 F. The original F is 0.357621, Saving to models/eval/bestFmodel
+2024-03-19 13:44:00,172 INFO    :        | end of iter   0 | time:  1.91s | train loss 0.0577 | 
+2024-03-19 13:49:15,853 INFO    : [INFO] The learning rate now is 0.000063
+2024-03-19 13:49:15,853 INFO    :    | end of epoch   7 | time: 326.21s | epoch train loss 5.7508 | 
+2024-03-19 13:49:15,853 INFO    : [INFO] Found new best model with 5.751 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 13:49:15,982 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 13:49:15,982 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 13:56:49,622 INFO    : [INFO] End of valid | time: 453.64s | valid loss 5.9731 | 
+2024-03-19 13:56:49,623 INFO    : Rouge1:
+	p:0.392544, r:0.493714, f:0.424677
+Rouge2:
+	p:0.168827, r:0.227061, f:0.185652
+Rougel:
+	p:0.355451, r:0.446351, f:0.384230
+
+2024-03-19 13:56:49,623 INFO    : [INFO] Validset match_true 964, pred 3000, true 2229, total 29835, match 26534
+2024-03-19 13:56:49,623 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.889358, precision is 0.321333, recall is 0.432481, F is 0.368713
+2024-03-19 13:56:49,623 INFO    : [INFO] Found new best model with 5.973098 running_avg_loss. The original loss is 5.986159, Saving to models/eval/bestmodel_0
+2024-03-19 13:56:49,749 INFO    : [INFO] Found new best model with 0.368713 F. The original F is 0.360681, Saving to models/eval/bestFmodel
+2024-03-19 13:57:00,936 INFO    :        | end of iter   0 | time:  1.77s | train loss 0.0561 | 
+2024-03-19 14:02:16,164 INFO    : [INFO] The learning rate now is 0.000056
+2024-03-19 14:02:16,165 INFO    :    | end of epoch   8 | time: 326.05s | epoch train loss 5.6386 | 
+2024-03-19 14:02:16,165 INFO    : [INFO] Found new best model with 5.639 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 14:02:16,298 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 14:02:16,299 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 14:09:46,955 INFO    : [INFO] End of valid | time: 450.66s | valid loss 6.0122 | 
+2024-03-19 14:09:46,955 INFO    : Rouge1:
+	p:0.387541, r:0.486009, f:0.418450
+Rouge2:
+	p:0.165692, r:0.222457, f:0.181867
+Rougel:
+	p:0.350597, r:0.439051, f:0.378278
+
+2024-03-19 14:09:46,955 INFO    : [INFO] Validset match_true 948, pred 3000, true 2229, total 29835, match 26502
+2024-03-19 14:09:46,955 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.888286, precision is 0.316000, recall is 0.425303, F is 0.362593
+2024-03-19 14:09:57,955 INFO    :        | end of iter   0 | time:  1.83s | train loss 0.0514 | 
+2024-03-19 14:15:13,465 INFO    : [INFO] The learning rate now is 0.000050
+2024-03-19 14:15:13,466 INFO    :    | end of epoch   9 | time: 326.51s | epoch train loss 5.5134 | 
+2024-03-19 14:15:13,466 INFO    : [INFO] Found new best model with 5.513 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 14:15:13,595 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 14:15:13,595 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 14:22:43,893 INFO    : [INFO] End of valid | time: 450.30s | valid loss 6.1033 | 
+2024-03-19 14:22:43,893 INFO    : Rouge1:
+	p:0.388316, r:0.478041, f:0.416073
+Rouge2:
+	p:0.165929, r:0.217783, f:0.180645
+Rougel:
+	p:0.351052, r:0.431854, f:0.375974
+
+2024-03-19 14:22:43,893 INFO    : [INFO] Validset match_true 931, pred 3000, true 2229, total 29835, match 26468
+2024-03-19 14:22:43,893 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.887146, precision is 0.310333, recall is 0.417676, F is 0.356091
+2024-03-19 14:22:54,277 INFO    :        | end of iter   0 | time:  1.82s | train loss 0.0543 | 
+2024-03-19 14:28:08,853 INFO    : [INFO] The learning rate now is 0.000045
+2024-03-19 14:28:08,853 INFO    :    | end of epoch  10 | time: 324.96s | epoch train loss 5.3175 | 
+2024-03-19 14:28:08,853 INFO    : [INFO] Found new best model with 5.317 running_train_loss. Saving to models/train/bestmodel
+2024-03-19 14:28:08,984 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-19 14:28:08,984 INFO    : [INFO] Starting eval for this model ...
+2024-03-19 14:35:41,951 INFO    : [INFO] End of valid | time: 452.97s | valid loss 6.1974 | 
+2024-03-19 14:35:41,952 INFO    : Rouge1:
+	p:0.386940, r:0.456113, f:0.406152
+Rouge2:
+	p:0.164244, r:0.204770, f:0.174493
+Rougel:
+	p:0.349821, r:0.412322, f:0.367085
+
+2024-03-19 14:35:41,952 INFO    : [INFO] Validset match_true 894, pred 3000, true 2229, total 29835, match 26394
+2024-03-19 14:35:41,952 INFO    : [INFO] The size of totalset is 1000, sent_number is 29835, accu is 0.884666, precision is 0.298000, recall is 0.401077, F is 0.341939
+2024-03-19 14:35:41,953 ERROR   : [Error] val loss does not descent for three times. Stopping supervisor...
+2024-03-19 14:35:42,065 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240320_222337 b/logs/train_20240320_222337
new file mode 100644
index 0000000..c59f6d0
--- /dev/null
+++ b/logs/train_20240320_222337
@@ -0,0 +1,19 @@
+2024-03-20 22:23:37,144 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 22:23:37,145 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 22:23:37,211 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 22:23:37,211 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 22:23:37,405 INFO    : [INFO] Loading external word embedding...
+2024-03-20 22:23:55,539 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 22:23:56,713 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 22:23:56,818 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 22:23:56,818 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 22:23:57,858 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.039716, Total size is 1
+2024-03-20 22:23:57,858 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 22:23:58,069 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 22:25:41,192 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 22:25:43,394 INFO    : [INFO] Finish reading ExampleSet. Total time is 2.202353, Total size is 13367
+2024-03-20 22:25:43,395 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 22:25:43,607 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 22:25:53,780 INFO    : [INFO] Use cuda
+2024-03-20 22:25:53,781 INFO    : [INFO] Create new model for training...
+2024-03-20 22:25:53,798 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_223411 b/logs/train_20240320_223411
new file mode 100644
index 0000000..e18e3f9
--- /dev/null
+++ b/logs/train_20240320_223411
@@ -0,0 +1,19 @@
+2024-03-20 22:34:11,643 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 22:34:11,644 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 22:34:11,706 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 22:34:11,706 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 22:34:11,894 INFO    : [INFO] Loading external word embedding...
+2024-03-20 22:34:30,632 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 22:34:31,414 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 22:34:31,524 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 22:34:31,524 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 22:34:32,616 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.091735, Total size is 1
+2024-03-20 22:34:32,616 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 22:34:32,828 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 22:36:15,682 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 22:36:15,689 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006582, Total size is 1
+2024-03-20 22:36:15,689 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 22:36:15,895 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 22:36:26,076 INFO    : [INFO] Use cuda
+2024-03-20 22:36:26,076 INFO    : [INFO] Create new model for training...
+2024-03-20 22:36:26,076 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_225725 b/logs/train_20240320_225725
new file mode 100644
index 0000000..8244285
--- /dev/null
+++ b/logs/train_20240320_225725
@@ -0,0 +1,19 @@
+2024-03-20 22:57:25,994 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 22:57:25,994 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 22:57:26,039 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 22:57:26,039 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 22:57:26,188 INFO    : [INFO] Loading external word embedding...
+2024-03-20 22:57:44,517 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 22:57:45,286 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 22:57:45,429 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 22:57:45,429 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 22:57:52,896 INFO    : [INFO] Finish reading ExampleSet. Total time is 7.465928, Total size is 287084
+2024-03-20 22:57:52,896 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 22:57:53,178 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 22:59:36,904 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 22:59:36,926 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.022161, Total size is 13367
+2024-03-20 22:59:36,926 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 22:59:37,135 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 22:59:47,316 INFO    : [INFO] Use cuda
+2024-03-20 22:59:47,317 INFO    : [INFO] Create new model for training...
+2024-03-20 22:59:47,317 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_230107 b/logs/train_20240320_230107
new file mode 100644
index 0000000..64ac66a
--- /dev/null
+++ b/logs/train_20240320_230107
@@ -0,0 +1,19 @@
+2024-03-20 23:01:07,831 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:01:07,831 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:01:07,892 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:01:07,893 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:01:08,082 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:01:26,307 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:01:27,143 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 23:01:27,289 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:01:27,289 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:01:28,372 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.083049, Total size is 287084
+2024-03-20 23:01:28,373 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:01:28,585 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 23:03:11,499 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:03:11,506 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006288, Total size is 13367
+2024-03-20 23:03:11,506 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:03:11,710 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 23:03:21,884 INFO    : [INFO] Use cuda
+2024-03-20 23:03:21,885 INFO    : [INFO] Create new model for training...
+2024-03-20 23:03:21,885 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_230414 b/logs/train_20240320_230414
new file mode 100644
index 0000000..50c0dbf
--- /dev/null
+++ b/logs/train_20240320_230414
@@ -0,0 +1,19 @@
+2024-03-20 23:04:14,822 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:04:14,822 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:04:14,868 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:04:14,868 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:04:15,013 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:04:33,583 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:04:34,350 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 23:04:34,484 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:04:34,484 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:04:35,518 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.033573, Total size is 287084
+2024-03-20 23:04:35,518 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:04:35,728 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 23:06:18,407 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:06:18,414 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006357, Total size is 13367
+2024-03-20 23:06:18,414 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:06:18,618 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 23:06:28,800 INFO    : [INFO] Use cuda
+2024-03-20 23:06:28,800 INFO    : [INFO] Create new model for training...
+2024-03-20 23:06:28,801 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_231306 b/logs/train_20240320_231306
new file mode 100644
index 0000000..0914adc
--- /dev/null
+++ b/logs/train_20240320_231306
@@ -0,0 +1,19 @@
+2024-03-20 23:13:06,871 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:13:06,872 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:13:06,933 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:13:06,933 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:13:07,124 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:13:25,364 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:13:26,131 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 23:13:26,234 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:13:26,234 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:13:27,259 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.024637, Total size is 287084
+2024-03-20 23:13:27,259 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:13:27,473 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 23:15:09,706 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:15:09,712 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006279, Total size is 13367
+2024-03-20 23:15:09,713 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:15:09,920 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 23:15:20,093 INFO    : [INFO] Use cuda
+2024-03-20 23:15:20,093 INFO    : [INFO] Create new model for training...
+2024-03-20 23:15:20,094 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_232304 b/logs/train_20240320_232304
new file mode 100644
index 0000000..276d30b
--- /dev/null
+++ b/logs/train_20240320_232304
@@ -0,0 +1,19 @@
+2024-03-20 23:23:04,258 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:23:04,258 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:23:04,321 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:23:04,321 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:23:04,511 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:23:23,757 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:23:24,517 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 23:23:24,620 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:23:24,620 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:23:25,656 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.035856, Total size is 287084
+2024-03-20 23:23:25,657 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:23:25,874 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 23:25:12,684 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:25:12,691 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006538, Total size is 13367
+2024-03-20 23:25:12,691 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:25:12,893 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 23:25:23,322 INFO    : [INFO] Use cuda
+2024-03-20 23:25:23,323 INFO    : [INFO] Create new model for training...
+2024-03-20 23:25:23,323 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_232607 b/logs/train_20240320_232607
new file mode 100644
index 0000000..79c146f
--- /dev/null
+++ b/logs/train_20240320_232607
@@ -0,0 +1,22 @@
+2024-03-20 23:26:07,808 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:26:07,808 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:26:07,853 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:26:07,854 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:26:08,002 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:26:26,600 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:26:27,406 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=32, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=32, m=3)
+2024-03-20 23:26:27,539 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:26:27,540 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:26:28,612 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.072462, Total size is 287084
+2024-03-20 23:26:28,612 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:26:28,831 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/train.w2s.tfidf.jsonl!
+2024-03-20 23:28:12,031 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:28:12,037 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006162, Total size is 13367
+2024-03-20 23:28:12,037 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:28:12,245 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/val.w2s.tfidf.jsonl!
+2024-03-20 23:28:22,493 INFO    : [INFO] Use cuda
+2024-03-20 23:28:22,493 INFO    : [INFO] Create new model for training...
+2024-03-20 23:28:22,494 INFO    : [INFO] Starting run_training
+2024-03-20 23:28:37,774 INFO    :        | end of iter   0 | time:  2.88s | train loss 0.1813 | 
+2024-03-20 23:38:02,571 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-20 23:38:02,710 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240320_234834 b/logs/train_20240320_234834
new file mode 100644
index 0000000..1c0357a
--- /dev/null
+++ b/logs/train_20240320_234834
@@ -0,0 +1,19 @@
+2024-03-20 23:48:34,652 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:48:34,652 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:48:34,706 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:48:34,706 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:48:34,850 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:48:58,365 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:48:59,214 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=10, m=3)
+2024-03-20 23:48:59,359 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:48:59,359 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:49:00,423 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.064523, Total size is 287084
+2024-03-20 23:49:00,424 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:49:00,668 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_train.json!
+2024-03-20 23:49:01,244 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:49:01,251 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.007632, Total size is 13367
+2024-03-20 23:49:01,252 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:49:01,463 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_val.json!
+2024-03-20 23:49:06,881 INFO    : [INFO] Use cuda
+2024-03-20 23:49:06,882 INFO    : [INFO] Create new model for training...
+2024-03-20 23:49:06,890 INFO    : [INFO] Starting run_training
diff --git a/logs/train_20240320_234956 b/logs/train_20240320_234956
new file mode 100644
index 0000000..f84dfc9
--- /dev/null
+++ b/logs/train_20240320_234956
@@ -0,0 +1,22 @@
+2024-03-20 23:49:56,717 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:49:56,718 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:49:56,789 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:49:56,789 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:49:56,979 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:50:15,619 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:50:16,439 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=10, m=3)
+2024-03-20 23:50:16,541 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:50:16,542 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:50:17,636 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.094221, Total size is 287084
+2024-03-20 23:50:17,636 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:50:17,849 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_train.json!
+2024-03-20 23:50:18,386 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:50:18,393 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006829, Total size is 13367
+2024-03-20 23:50:18,393 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:50:18,598 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_val.json!
+2024-03-20 23:50:23,938 INFO    : [INFO] Use cuda
+2024-03-20 23:50:23,938 INFO    : [INFO] Create new model for training...
+2024-03-20 23:50:23,939 INFO    : [INFO] Starting run_training
+2024-03-20 23:51:37,496 INFO    :        | end of iter   0 | time:  7.51s | train loss 0.1832 | 
+2024-03-20 23:53:39,334 ERROR   : [Error] Caught keyboard interrupt on worker. Stopping supervisor...
+2024-03-20 23:53:39,485 INFO    : [INFO] Saving model to models/train/earlystop
diff --git a/logs/train_20240320_235406 b/logs/train_20240320_235406
new file mode 100644
index 0000000..22d88f6
--- /dev/null
+++ b/logs/train_20240320_235406
@@ -0,0 +1,115 @@
+2024-03-20 23:54:06,210 INFO    : Pytorch 2.1.2+cu121
+2024-03-20 23:54:06,210 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-03-20 23:54:06,262 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-03-20 23:54:06,262 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-03-20 23:54:06,415 INFO    : [INFO] Loading external word embedding...
+2024-03-20 23:54:24,836 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-03-20 23:54:25,685 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=10, m=3)
+2024-03-20 23:54:25,789 INFO    : [MODEL] HeterSumGraph 
+2024-03-20 23:54:25,789 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:54:26,863 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.074061, Total size is 287084
+2024-03-20 23:54:26,864 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:54:27,072 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_train.json!
+2024-03-20 23:54:27,616 INFO    : [INFO] Start reading ExampleSet
+2024-03-20 23:54:27,624 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006964, Total size is 13367
+2024-03-20 23:54:27,624 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-03-20 23:54:27,827 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_val.json!
+2024-03-20 23:54:33,224 INFO    : [INFO] Use cuda
+2024-03-20 23:54:33,224 INFO    : [INFO] Create new model for training...
+2024-03-20 23:54:33,235 INFO    : [INFO] Starting run_training
+2024-03-20 23:55:47,038 INFO    :        | end of iter   0 | time:  7.01s | train loss 0.1832 | 
+2024-03-21 00:07:00,659 INFO    :        | end of iter 100 | time:  5.03s | train loss 6.8293 | 
+2024-03-21 00:18:17,971 INFO    :        | end of iter 200 | time:  4.83s | train loss 6.0123 | 
+2024-03-21 00:29:47,921 INFO    :        | end of iter 300 | time:  4.85s | train loss 5.8906 | 
+2024-03-21 00:41:15,446 INFO    :        | end of iter 400 | time:  4.87s | train loss 5.8731 | 
+2024-03-21 00:52:44,601 INFO    :        | end of iter 500 | time:  4.84s | train loss 5.8042 | 
+2024-03-21 01:04:21,949 INFO    :        | end of iter 600 | time:  4.98s | train loss 5.8022 | 
+2024-03-21 01:15:50,237 INFO    :        | end of iter 700 | time:  5.07s | train loss 5.7440 | 
+2024-03-21 01:27:10,142 INFO    :        | end of iter 800 | time:  4.90s | train loss 5.7187 | 
+2024-03-21 01:38:48,960 INFO    :        | end of iter 900 | time:  4.89s | train loss 5.7385 | 
+2024-03-21 01:50:09,796 INFO    :        | end of iter 1000 | time:  4.99s | train loss 5.7498 | 
+2024-03-21 02:01:40,929 INFO    :        | end of iter 1100 | time:  5.00s | train loss 5.6977 | 
+2024-03-21 02:12:42,642 INFO    :        | end of iter 1200 | time:  4.91s | train loss 5.7142 | 
+2024-03-21 02:24:13,824 INFO    :        | end of iter 1300 | time:  4.94s | train loss 5.7059 | 
+2024-03-21 02:35:31,313 INFO    :        | end of iter 1400 | time:  4.86s | train loss 5.6905 | 
+2024-03-21 02:46:51,744 INFO    :        | end of iter 1500 | time:  5.02s | train loss 5.6739 | 
+2024-03-21 02:58:33,938 INFO    :        | end of iter 1600 | time:  3.75s | train loss 5.6726 | 
+2024-03-21 03:09:57,553 INFO    :        | end of iter 1700 | time:  4.79s | train loss 5.6532 | 
+2024-03-21 03:21:10,192 INFO    :        | end of iter 1800 | time:  5.15s | train loss 5.6407 | 
+2024-03-21 03:32:52,210 INFO    :        | end of iter 1900 | time:  5.03s | train loss 5.6589 | 
+2024-03-21 03:44:23,887 INFO    :        | end of iter 2000 | time:  4.94s | train loss 5.6496 | 
+2024-03-21 03:55:41,090 INFO    :        | end of iter 2100 | time:  5.07s | train loss 5.6708 | 
+2024-03-21 04:07:09,601 INFO    :        | end of iter 2200 | time:  5.04s | train loss 5.6751 | 
+2024-03-21 04:11:30,519 INFO    : [INFO] The learning rate now is 0.000250
+2024-03-21 04:11:30,520 INFO    :    | end of epoch   1 | time: 15416.23s | epoch train loss 5.7879 | 
+2024-03-21 04:11:30,520 INFO    : [INFO] Found new best model with 5.788 running_train_loss. Saving to models/train/bestmodel
+2024-03-21 04:11:30,654 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-21 04:11:30,654 INFO    : [INFO] Starting eval for this model ...
+2024-03-21 04:27:25,859 INFO    : [INFO] End of valid | time: 955.20s | valid loss 5.7359 | 
+2024-03-21 04:27:25,860 INFO    : Rouge1:
+	p:0.416027, r:0.479910, f:0.432834
+Rouge2:
+	p:0.180950, r:0.222410, f:0.191929
+Rougel:
+	p:0.374796, r:0.431777, f:0.389638
+
+2024-03-21 04:27:25,860 INFO    : [INFO] Validset match_true 13607, pred 40101, true 30173, total 402328, match 359268
+2024-03-21 04:27:25,860 INFO    : [INFO] The size of totalset is 13367, sent_number is 402328, accu is 0.892973, precision is 0.339318, recall is 0.450966, F is 0.387256
+2024-03-21 04:27:25,860 INFO    : [INFO] Found new best model with 5.735910 running_avg_loss. The original loss is None, Saving to models/eval/bestmodel_0
+2024-03-21 04:27:25,998 INFO    : [INFO] Found new best model with 0.387256 F. The original F is None, Saving to models/eval/bestFmodel
+2024-03-21 04:28:40,536 INFO    :        | end of iter   0 | time:  4.06s | train loss 0.0543 | 
+2024-03-21 04:40:41,975 INFO    :        | end of iter 100 | time:  4.93s | train loss 5.6226 | 
+2024-03-21 04:52:48,223 INFO    :        | end of iter 200 | time:  4.96s | train loss 5.6228 | 
+2024-03-21 05:04:42,136 INFO    :        | end of iter 300 | time:  5.20s | train loss 5.6179 | 
+2024-03-21 05:16:31,509 INFO    :        | end of iter 400 | time:  4.86s | train loss 5.6057 | 
+2024-03-21 05:28:10,120 INFO    :        | end of iter 500 | time:  5.02s | train loss 5.6008 | 
+2024-03-21 05:39:58,566 INFO    :        | end of iter 600 | time:  4.91s | train loss 5.6140 | 
+2024-03-21 05:52:03,194 INFO    :        | end of iter 700 | time:  4.92s | train loss 5.5911 | 
+2024-03-21 06:03:41,727 INFO    :        | end of iter 800 | time:  5.08s | train loss 5.5671 | 
+2024-03-21 06:15:44,626 INFO    :        | end of iter 900 | time:  4.80s | train loss 5.5743 | 
+2024-03-21 06:27:31,372 INFO    :        | end of iter 1000 | time:  4.94s | train loss 5.6030 | 
+2024-03-21 06:39:29,377 INFO    :        | end of iter 1100 | time:  4.97s | train loss 5.5848 | 
+2024-03-21 06:51:32,386 INFO    :        | end of iter 1200 | time:  5.52s | train loss 5.5690 | 
+2024-03-21 07:03:33,925 INFO    :        | end of iter 1300 | time:  4.84s | train loss 5.5867 | 
+2024-03-21 07:15:30,486 INFO    :        | end of iter 1400 | time:  5.09s | train loss 5.5787 | 
+2024-03-21 07:27:20,862 INFO    :        | end of iter 1500 | time:  5.53s | train loss 5.6136 | 
+2024-03-21 07:39:15,540 INFO    :        | end of iter 1600 | time:  4.88s | train loss 5.5748 | 
+2024-03-21 07:51:12,129 INFO    :        | end of iter 1700 | time:  5.02s | train loss 5.5891 | 
+2024-03-21 08:03:00,133 INFO    :        | end of iter 1800 | time:  4.85s | train loss 5.5929 | 
+2024-03-21 08:14:45,040 INFO    :        | end of iter 1900 | time:  4.97s | train loss 5.5998 | 
+2024-03-21 08:26:53,526 INFO    :        | end of iter 2000 | time:  4.86s | train loss 5.6104 | 
+2024-03-21 08:38:43,773 INFO    :        | end of iter 2100 | time:  5.10s | train loss 5.5694 | 
+2024-03-21 08:50:35,949 INFO    :        | end of iter 2200 | time:  5.14s | train loss 5.5668 | 
+2024-03-21 08:55:23,620 INFO    : [INFO] The learning rate now is 0.000167
+2024-03-21 08:55:23,620 INFO    :    | end of epoch   2 | time: 16077.32s | epoch train loss 5.5931 | 
+2024-03-21 08:55:23,620 INFO    : [INFO] Found new best model with 5.593 running_train_loss. Saving to models/train/bestmodel
+2024-03-21 08:55:23,755 INFO    : [INFO] Saving model to models/train/bestmodel
+2024-03-21 08:55:23,755 INFO    : [INFO] Starting eval for this model ...
+2024-03-21 09:11:08,796 INFO    : [INFO] End of valid | time: 945.04s | valid loss 5.5904 | 
+2024-03-21 09:11:08,796 INFO    : Rouge1:
+	p:0.417907, r:0.484292, f:0.435885
+Rouge2:
+	p:0.183534, r:0.225782, f:0.194802
+Rougel:
+	p:0.377786, r:0.437270, f:0.393752
+
+2024-03-21 09:11:08,796 INFO    : [INFO] Validset match_true 13993, pred 40101, true 30173, total 402328, match 360040
+2024-03-21 09:11:08,796 INFO    : [INFO] The size of totalset is 13367, sent_number is 402328, accu is 0.894892, precision is 0.348944, recall is 0.463759, F is 0.398241
+2024-03-21 09:11:08,796 INFO    : [INFO] Found new best model with 5.590423 running_avg_loss. The original loss is 5.735910, Saving to models/eval/bestmodel_1
+2024-03-21 09:11:08,933 INFO    : [INFO] Found new best model with 0.398241 F. The original F is 0.387256, Saving to models/eval/bestFmodel
+2024-03-21 09:12:23,702 INFO    :        | end of iter   0 | time:  5.15s | train loss 0.0589 | 
+2024-03-21 09:24:53,690 INFO    :        | end of iter 100 | time:  5.09s | train loss 5.5627 | 
+2024-03-21 09:36:51,370 INFO    :        | end of iter 200 | time:  4.89s | train loss 5.5666 | 
+2024-03-21 09:48:42,781 INFO    :        | end of iter 300 | time:  5.11s | train loss 5.5582 | 
+2024-03-21 10:00:53,105 INFO    :        | end of iter 400 | time:  4.92s | train loss 5.5636 | 
+2024-03-21 10:13:02,156 INFO    :        | end of iter 500 | time:  4.85s | train loss 5.5557 | 
+2024-03-21 10:25:00,739 INFO    :        | end of iter 600 | time:  5.02s | train loss 5.5537 | 
+2024-03-21 10:37:01,523 INFO    :        | end of iter 700 | time:  5.49s | train loss 5.5316 | 
+2024-03-21 10:48:55,432 INFO    :        | end of iter 800 | time:  5.15s | train loss 5.5658 | 
+2024-03-21 11:00:48,352 INFO    :        | end of iter 900 | time:  5.11s | train loss 5.5339 | 
+2024-03-21 11:12:57,786 INFO    :        | end of iter 1000 | time:  5.24s | train loss 5.5187 | 
+2024-03-21 11:24:55,311 INFO    :        | end of iter 1100 | time:  5.15s | train loss 5.5553 | 
+2024-03-21 11:36:42,145 INFO    :        | end of iter 1200 | time:  5.19s | train loss 5.5829 | 
+2024-03-21 11:48:41,371 INFO    :        | end of iter 1300 | time:  5.14s | train loss 5.5052 | 
+2024-03-21 12:00:52,944 INFO    :        | end of iter 1400 | time:  4.94s | train loss 5.5066 | 
+2024-03-21 12:12:39,953 INFO    :        | end of iter 1500 | time:  4.85s | train loss 5.5598 | 
diff --git a/logs/train_20240404_155936 b/logs/train_20240404_155936
new file mode 100644
index 0000000..14ef92a
--- /dev/null
+++ b/logs/train_20240404_155936
@@ -0,0 +1,20 @@
+2024-04-04 15:59:36,231 INFO    : Pytorch 2.1.2+cu121
+2024-04-04 15:59:36,231 INFO    : [INFO] Create Vocab, vocab path is ../cache/CNNDM/vocab
+2024-04-04 15:59:36,277 INFO    : [INFO] max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
+2024-04-04 15:59:36,278 INFO    : [INFO] Finished constructing vocabulary of 50000 total words. Last word added: chaudhary
+2024-04-04 15:59:36,428 INFO    : [INFO] Loading external word embedding...
+2024-04-04 15:59:54,298 INFO    : [INFO] External Word Embedding iov count: 48427, oov count: 1573
+2024-04-04 15:59:55,103 INFO    : Namespace(data_dir='../cnndm', cache_dir='../cache/CNNDM', embedding_path='../glove.6B.300d.txt', model='HSG', restore_model='None', save_root='models', log_root='logs', seed=666, gpu='0', cuda=True, vocab_size=50000, n_epochs=20, batch_size=128, n_iter=1, word_embedding=True, word_emb_dim=300, embed_train=False, feat_embed_size=50, n_layers=1, lstm_hidden_state=128, lstm_layers=2, bidirectional=True, n_feature_size=600, hidden_size=300, ffn_inner_hidden_size=512, n_head=6, recurrent_dropout_prob=0.1, atten_dropout_prob=0.1, ffn_dropout_prob=0.1, use_orthnormal_init=True, sent_max_len=100, doc_max_timesteps=50, lr=0.0005, lr_descent=True, grad_clip=True, max_grad_norm=1.0, num_workers=10, m=3)
+2024-04-04 15:59:55,240 INFO    : [MODEL] HeterSumGraph 
+2024-04-04 15:59:55,240 INFO    : [INFO] Start reading ExampleSet
+2024-04-04 15:59:56,291 INFO    : [INFO] Finish reading ExampleSet. Total time is 1.050431, Total size is 287084
+2024-04-04 15:59:56,291 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-04-04 15:59:56,500 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_train.json!
+2024-04-04 15:59:57,013 INFO    : [INFO] Start reading ExampleSet
+2024-04-04 15:59:57,019 INFO    : [INFO] Finish reading ExampleSet. Total time is 0.006243, Total size is 13367
+2024-04-04 15:59:57,019 INFO    : [INFO] Loading filter word File ../cache/CNNDM/filter_word.txt
+2024-04-04 15:59:57,222 INFO    : [INFO] Loading word2sent TFIDF file from ../cache/CNNDM/index_to_file_mapping_val.json!
+2024-04-04 16:00:02,579 INFO    : [INFO] Use cuda
+2024-04-04 16:00:02,580 INFO    : [INFO] Create new model for training...
+2024-04-04 16:00:02,580 INFO    : [INFO] Starting run_training
+2024-04-04 16:01:03,989 INFO    :        | end of iter   0 | time: 10.09s | train loss 0.1832 | 
diff --git a/module/GAT.py b/module/GAT.py
index e7e7d45..83cabe4 100644
--- a/module/GAT.py
+++ b/module/GAT.py
@@ -54,9 +54,11 @@ def forward(self, g, w, s):
             origin, neighbor = None, None
 
         if self.layerType == "S2S":
-            h = F.elu(self.layer(g, neighbor))
+            x = self.layer(g, neighbor)
+            h = F.elu(x)
         else:
-            h = F.elu(self.layer(g, origin, neighbor))
+            x = self.layer(g, origin, neighbor)
+            h = F.elu(x)
         h = h + origin
         h = self.ffn(h.unsqueeze(0)).squeeze(0)
         return h
diff --git a/module/GATLayer.py b/module/GATLayer.py
index 994ee0b..a36066f 100644
--- a/module/GATLayer.py
+++ b/module/GATLayer.py
@@ -108,6 +108,7 @@ def forward(self, g, dsth, srch):
         # print("id in WSGATLayer")
         # print(wnode_id, snode_id, wsedge_id)
         g.nodes[wnode_id].data['z'] = self.fc(srch)
+#        print(dsth.shape)
         g.nodes[snode_id].data['z'] = self.fc(dsth)
         g.apply_edges(self.edge_attention, edges=wsedge_id)
         g.pull(snode_id, self.message_func, self.reduce_func)
diff --git a/module/dataloader.py b/module/dataloader.py
index 28d5702..05eb8fb 100644
--- a/module/dataloader.py
+++ b/module/dataloader.py
@@ -159,7 +159,9 @@ def __init__(self, data_path, vocab, doc_max_timesteps, sent_max_len, filter_wor
 
         logger.info("[INFO] Start reading %s", self.__class__.__name__)
         start = time.time()
-        self.example_list = readJson(data_path)
+        with open(data_path, "r", encoding="utf-8") as f:
+            self.example_list = json.load(f)
+        # print( self.example_list)
         logger.info("[INFO] Finish reading %s. Total time is %f, Total size is %d", self.__class__.__name__,
                     time.time() - start, len(self.example_list))
         self.size = len(self.example_list)
@@ -182,12 +184,19 @@ def __init__(self, data_path, vocab, doc_max_timesteps, sent_max_len, filter_wor
                 break
 
         logger.info("[INFO] Loading word2sent TFIDF file from %s!" % w2s_path)
-        self.w2s_tfidf = readJson(w2s_path)
+        with open(w2s_path, "r") as f:
+            self.w2s_tfidf = json.load(f)
 
     def get_example(self, index):
-        e = self.example_list[index]
+        # print(self.example_list[str(index)])
+        file_name, new_index  = self.example_list[str(index)]
+        # open the e file and check the summary
+        # with open(file_name, "r", encoding="utf-8") as f:
+        e = readJson(file_name)
+        e = e[new_index]
         e["summary"] = e.setdefault("summary", [])
         example = Example(e["text"], e["summary"], self.vocab, self.sent_max_len, e["label"])
+        
         return example
 
     def pad_label_m(self, label_matrix):
@@ -232,7 +241,7 @@ def CreateGraph(self, input_pad, label, w2s_w):
             edge:
                 word2sent, sent2word:  tffrac=int, dtype=0
         """
-        G = dgl.DGLGraph()
+        G = dgl.graph(([], []))
         wid2nid, nid2wid = self.AddWordNode(G, input_pad)
         w_nodes = len(nid2wid)
 
@@ -266,6 +275,12 @@ def CreateGraph(self, input_pad, label, w2s_w):
         G.nodes[sentid2nid].data["label"] = torch.LongTensor(label)  # [N, doc_max]
 
         return G
+    def get_w2s(self,index):
+        index = str(index)
+        file_name, new_index = self.w2s_tfidf[index]
+        ws = readJson(file_name)
+        return ws[new_index]
+    
 
     def __getitem__(self, index):
         """
@@ -277,7 +292,9 @@ def __getitem__(self, index):
         item = self.get_example(index)
         input_pad = item.enc_sent_input_pad[:self.doc_max_timesteps]
         label = self.pad_label_m(item.label_matrix)
-        w2s_w = self.w2s_tfidf[index]
+        # w2s_w = self.w2s_tfidf[index]
+        w2s_w = self.get_w2s(index)
+        
         G = self.CreateGraph(input_pad, label, w2s_w)
 
         return G, index
@@ -345,7 +362,7 @@ def CreateGraph(self, docLen, sent_pad, doc_pad, label, w2s_w, w2d_w):
                 sent2doc: dtype=2
         """
         # add word nodes
-        G = dgl.DGLGraph()
+        G = dgl.graph(([], []))
         wid2nid, nid2wid = self.AddWordNode(G, sent_pad)
         w_nodes = len(nid2wid)
 
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..f692baa
--- /dev/null
+++ b/run.sh
@@ -0,0 +1 @@
+python train.py --cuda --gpu 0 --data_dir ../cnndm --cache_dir ../cache/CNNDM  --embedding_path ../glove.6B.300d.txt --model HSG --save_root models --log_root logs  --lr_descent --grad_clip -m 3 --batch_size 128 --num_workers 10 
diff --git a/tools/datasplitter.py b/tools/datasplitter.py
new file mode 100644
index 0000000..2d2c117
--- /dev/null
+++ b/tools/datasplitter.py
@@ -0,0 +1,60 @@
+
+import json
+import time 
+import os 
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Split a file into multiple files")
+    parser.add_argument("--input_file", type=str, required=True, help="Path to the file to be split")
+    parser.add_argument("--num_files", type=int, required=True, help="Number of files to split into")
+    parser.add_argument("--output_dir", type=str, required=True, help="Directory to save the split files")
+    parser.add_argument("--index_to_file_mapping", type=str, required=True, help="Path to save the index to file mapping")
+    return parser.parse_args()
+
+
+
+def split(path: str,     # Path to the file to be split
+          num_files: int, # Number of files to split into
+          output_dir: str # Directory to save the split files
+          ):
+    """
+    Split the file into num_files files and save them in output_dir
+    """
+    now = time.time()
+    with open (path) as f: 
+        data = f.readlines()
+
+    print("Time taken to read the file is ", time.time()-now)
+    os.makedirs(output_dir, exist_ok=True)
+    len_in_each_file = len(data)//num_files + 1
+    print(len_in_each_file)
+    index_to_file_mapping = {}
+
+    for i in range(num_files):
+        # print(i)
+        for j in range(len_in_each_file):
+            if i*len_in_each_file+j < len(data):
+                index_to_file_mapping[int(i*len_in_each_file+j)] = [f"{output_dir}/{i}.jsonl" , j]
+
+    print(len(index_to_file_mapping))
+    for key, testue in index_to_file_mapping.items():
+        with open(f"{testue[0]}", "a") as f:
+            f.write(data[key])
+    return index_to_file_mapping
+
+if __name__ == "__main__":
+    args = parse_args()
+    input_file = args.input_file
+    num_files = args.num_files
+    output_dir = args.output_dir
+    index_to_file_mapping = args.index_to_file_mapping
+    try:
+        mapping = split(input_file, num_files, output_dir)
+        # print("Splitting complete. Mapping:", mapping)
+    except (ValueError, FileNotFoundError) as e:
+        print(e)
+    with open(index_to_file_mapping, "w") as f:
+        json.dump(mapping, f)
+
+
diff --git a/tools/split_data.sh b/tools/split_data.sh
new file mode 100644
index 0000000..22ccc2a
--- /dev/null
+++ b/tools/split_data.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+python datasplitter.py --input_file /scratch/jainit/cache/CNNDM/train.w2s.tfidf.jsonl --num_files 2000 --output_dir /scratch/jainit/cache/CNNDM/train --index_to_file_mapping /scratch/jainit/cache/CNNDM/index_to_file_mapping_train.json
+python datasplitter.py --input_file /scratch/jainit/cache/CNNDM/val.w2s.tfidf.jsonl --num_files 2000 --output_dir /scratch/jainit/cache/CNNDM/val --index_to_file_mapping /scratch/jainit/cache/CNNDM/index_to_file_mapping_val.json
+python datasplitter.py --input_file /scratch/jainit/cnndm/val.label.jsonl --num_files 2000 --output_dir /scratch/jainit/cnndm/val --index_to_file_mapping /scratch/jainit/cnndm/index_to_file_mapping_val.json
+python datasplitter.py --input_file /scratch/jainit/cnndm/train.label.jsonl --num_files 2000 --output_dir /scratch/jainit/cnndm/train --index_to_file_mapping /scratch/jainit/cnndm/index_to_file_mapping_train.json
\ No newline at end of file
diff --git a/train.py b/train.py
index 72ceaeb..aba10cf 100644
--- a/train.py
+++ b/train.py
@@ -110,8 +110,8 @@ def run_training(model, train_loader, valid_loader, valset, hps, train_dir):
             model.train()
 
             if hps.cuda:
-                G.to(torch.device("cuda"))
-
+                G = G.to(torch.device("cuda"))
+            graph_device = G.device
             outputs = model.forward(G)  # [n_snodes, 2]
             snode_id = G.filter_nodes(lambda nodes: nodes.data["dtype"] == 1)
             label = G.ndata["label"][snode_id].sum(-1)  # [n_nodes]
@@ -202,7 +202,7 @@ def run_eval(model, loader, valset, hps, best_loss, best_F, non_descent_cnt, sav
         tester = SLTester(model, hps.m)
         for i, (G, index) in enumerate(loader):
             if hps.cuda:
-                G.to(torch.device("cuda"))
+               G = G.to(torch.device(0))
             tester.evaluation(G, index, valset)
 
     running_avg_loss = tester.running_avg_loss
@@ -291,10 +291,10 @@ def main():
     parser.add_argument('--lstm_hidden_state', type=int, default=128, help='size of lstm hidden state [default: 128]')
     parser.add_argument('--lstm_layers', type=int, default=2, help='Number of lstm layers [default: 2]')
     parser.add_argument('--bidirectional', action='store_true', default=True, help='whether to use bidirectional LSTM [default: True]')
-    parser.add_argument('--n_feature_size', type=int, default=128, help='size of node feature [default: 128]')
-    parser.add_argument('--hidden_size', type=int, default=64, help='hidden size [default: 64]')
+    parser.add_argument('--n_feature_size', type=int, default=600, help='size of node feature [default: 128]')
+    parser.add_argument('--hidden_size', type=int, default=300, help='hidden size [default: 64]')
     parser.add_argument('--ffn_inner_hidden_size', type=int, default=512,help='PositionwiseFeedForward inner hidden size [default: 512]')
-    parser.add_argument('--n_head', type=int, default=8, help='multihead attention number [default: 8]')
+    parser.add_argument('--n_head', type=int, default=6, help='multihead attention number [default: 8]')
     parser.add_argument('--recurrent_dropout_prob', type=float, default=0.1,help='recurrent dropout prob [default: 0.1]')
     parser.add_argument('--atten_dropout_prob', type=float, default=0.1, help='attention dropout prob [default: 0.1]')
     parser.add_argument('--ffn_dropout_prob', type=float, default=0.1,help='PositionwiseFeedForward dropout prob [default: 0.1]')
@@ -307,7 +307,7 @@ def main():
     parser.add_argument('--lr_descent', action='store_true', default=False, help='learning rate descent')
     parser.add_argument('--grad_clip', action='store_true', default=False, help='for gradient clipping')
     parser.add_argument('--max_grad_norm', type=float, default=1.0, help='for gradient clipping max gradient normalization')
-
+    parser.add_argument('--num_workers', type=int, default=32, help='Number of workers in data loader [default: 4]')
     parser.add_argument('-m', type=int, default=3, help='decode summary length')
 
     args = parser.parse_args()
@@ -321,8 +321,8 @@ def main():
     torch.set_printoptions(threshold=50000)
 
     # File paths
-    DATA_FILE = os.path.join(args.data_dir, "train.label.jsonl")
-    VALID_FILE = os.path.join(args.data_dir, "val.label.jsonl")
+    DATA_FILE = os.path.join(args.data_dir, "index_to_file_mapping_train.json")
+    VALID_FILE = os.path.join(args.data_dir, "index_to_file_mapping_val.json")
     VOCAL_FILE = os.path.join(args.cache_dir, "vocab")
     FILTER_WORD = os.path.join(args.cache_dir, "filter_word.txt")
     LOG_PATH = args.log_root
@@ -351,27 +351,27 @@ def main():
     hps = args
     logger.info(hps)
 
-    train_w2s_path = os.path.join(args.cache_dir, "train.w2s.tfidf.jsonl")
-    val_w2s_path = os.path.join(args.cache_dir, "val.w2s.tfidf.jsonl")
+    train_w2s_path = os.path.join(args.cache_dir, "index_to_file_mapping_train.json")
+    val_w2s_path = os.path.join(args.cache_dir, "index_to_file_mapping_val.json")
 
     if hps.model == "HSG":
         model = HSumGraph(hps, embed)
         logger.info("[MODEL] HeterSumGraph ")
         dataset = ExampleSet(DATA_FILE, vocab, hps.doc_max_timesteps, hps.sent_max_len, FILTER_WORD, train_w2s_path)
-        train_loader = torch.utils.data.DataLoader(dataset, batch_size=hps.batch_size, shuffle=True, num_workers=32,collate_fn=graph_collate_fn)
+        train_loader = torch.utils.data.DataLoader(dataset, batch_size=hps.batch_size, shuffle=True, num_workers=args.num_workers,collate_fn=graph_collate_fn,pin_memory=True)
         del dataset
         valid_dataset = ExampleSet(VALID_FILE, vocab, hps.doc_max_timesteps, hps.sent_max_len, FILTER_WORD, val_w2s_path)
-        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=hps.batch_size, shuffle=False, collate_fn=graph_collate_fn, num_workers=32)
+        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=hps.batch_size, shuffle=False, collate_fn=graph_collate_fn, num_workers=args.num_workers,pin_memory=True)
     elif hps.model == "HDSG":
         model = HSumDocGraph(hps, embed)
         logger.info("[MODEL] HeterDocSumGraph ")
         train_w2d_path = os.path.join(args.cache_dir, "train.w2d.tfidf.jsonl")
         dataset = MultiExampleSet(DATA_FILE, vocab, hps.doc_max_timesteps, hps.sent_max_len, FILTER_WORD, train_w2s_path, train_w2d_path)
-        train_loader = torch.utils.data.DataLoader(dataset, batch_size=hps.batch_size, shuffle=True, num_workers=32,collate_fn=graph_collate_fn)
+        train_loader = torch.utils.data.DataLoader(dataset, batch_size=hps.batch_size, shuffle=True, num_workers=args.num_workers,collate_fn=graph_collate_fn,pin_memory=True)
         del dataset
         val_w2d_path = os.path.join(args.cache_dir, "val.w2d.tfidf.jsonl")
         valid_dataset = MultiExampleSet(VALID_FILE, vocab, hps.doc_max_timesteps, hps.sent_max_len, FILTER_WORD, val_w2s_path, val_w2d_path)
-        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=hps.batch_size, shuffle=False,collate_fn=graph_collate_fn, num_workers=32)  # Shuffle Must be False for ROUGE evaluation
+        valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=hps.batch_size, shuffle=False,collate_fn=graph_collate_fn, num_workers=args.num_workers,pin_memory=True)  # Shuffle Must be False for ROUGE evaluation
     else:
         logger.error("[ERROR] Invalid Model Type!")
         raise NotImplementedError("Model Type has not been implemented")