-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconcatenate_files.py
More file actions
127 lines (99 loc) · 4.55 KB
/
concatenate_files.py
File metadata and controls
127 lines (99 loc) · 4.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import fnmatch
import logging
import os
import sys
import yaml
CLIPBOARD_SIZE = 14500
RESERVED_MESSAGE = "\n --- This is not the complete content. To be continued. ---"
RESERVED_SIZE = len(RESERVED_MESSAGE.encode('utf-8'))
TEMP_FILE_NAME = 'temp_state.txt'
# Configure the logging settings
logging.basicConfig(filename='concatenate_files.log', level=logging.DEBUG)
def gather_files(fileset_name, filesets):
# Load filesets if it's a path to a YAML file
if isinstance(filesets, str):
with open(filesets, 'r') as f:
filesets = yaml.safe_load(f)
# Get the file patterns for the specified fileset_name
patterns = filesets.get(fileset_name, [])
included_patterns = [p for p in patterns if not p.startswith('!')]
excluded_patterns = [p[1:] for p in patterns if p.startswith('!')]
matching_files = []
# Walk through all files recursively starting from the current directory
for dirpath, dirnames, filenames in os.walk('.'):
for filename in filenames:
full_path = os.path.join(dirpath, filename)
relative_path = os.path.relpath(full_path)
# Check if file matches any of the inclusion patterns
include_file = any(fnmatch.fnmatch(relative_path, pattern) for pattern in included_patterns)
# Check if file matches any of the exclusion patterns
exclude_file = any(fnmatch.fnmatch(relative_path, pattern) for pattern in excluded_patterns)
if include_file and not exclude_file:
matching_files.append(relative_path)
logging.debug(f"Included file: {relative_path}")
return matching_files
def concatenate_files_for_set(fileset_name, filesets):
temp_file_path = f"temp_{fileset_name}.txt"
state_file_path = "state.yml"
# Reserve bytes for the message at the end
actual_buffer_size = CLIPBOARD_SIZE - len(RESERVED_MESSAGE.encode('utf-8'))
# Load previous state
current_part = 1
if os.path.exists(state_file_path):
with open(state_file_path, 'r') as state_file:
state = yaml.safe_load(state_file)
if state['fileset_name'] == fileset_name:
current_part = state['current_part']
else:
# Reset the state for a different fileset
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
current_part = 1
# If the temporary file doesn't exist, it means this is the first call for this fileset.
# Create the concatenated file from scratch.
if not os.path.exists(temp_file_path):
with open(temp_file_path, 'w', encoding='utf-8') as f:
for filepath in gather_files(fileset_name, filesets):
logging.info(f"Concatenating file: {filepath}")
f.write(f"--- {filepath}\n")
with open(filepath, 'r', encoding='utf-8') as file_content:
f.write(file_content.read())
f.write("\n")
# Determine total parts
total_size = os.path.getsize(temp_file_path)
total_parts = max((total_size + CLIPBOARD_SIZE - 1) // CLIPBOARD_SIZE, 1)
# Log relevant information
logging.info(f"Fileset: {fileset_name}, Current Part: {current_part}, Total Parts: {total_parts}")
# Read the appropriate part
with open(temp_file_path, 'rb') as f:
f.seek((current_part - 1) * CLIPBOARD_SIZE)
output = f.read(actual_buffer_size).decode('utf-8')
if current_part < total_parts:
output += RESERVED_MESSAGE
# Update state
with open(state_file_path, 'w') as state_file:
state = {
'fileset_name': fileset_name,
'current_part': current_part + 1
}
yaml.safe_dump(state, state_file)
# If it's the last part, cleanup
if current_part == total_parts:
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
if os.path.exists(state_file_path):
os.remove(state_file_path)
# Print the content to be copied to the clipboard
print(output)
if total_parts > 1:
print(f"Part {current_part} of {total_parts}. Please run this command again to continue.", file=sys.stderr)
if __name__ == '__main__':
if len(sys.argv) > 1 and sys.argv[1] == 'abort-cp':
if os.path.exists(TEMP_FILE_NAME):
os.remove(TEMP_FILE_NAME)
print("Copy process aborted!")
else:
with open('fileset-definitions.yml', 'r') as f:
filesets = yaml.safe_load(f)
fileset_name = sys.argv[1]
concatenate_files_for_set(fileset_name, filesets)