-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathdirectory_.py
More file actions
61 lines (49 loc) · 1.82 KB
/
directory_.py
File metadata and controls
61 lines (49 loc) · 1.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from __future__ import annotations
import warnings
from pathlib import Path
from ...core.catalog import TextDocument
from ...core.ports import DocumentLoaderPort
from .markdown_ import MarkdownLoader
from .plaintext_ import PlainTextLoader
class DirectoryLoader:
"""
Recursively loads a directory by dispatching each file to the loader
registered for its extension.
Default mapping::
.md → MarkdownLoader
.txt → PlainTextLoader
Custom loaders can be added or override defaults::
from lang2sql.integrations.loaders import PDFLoader
docs = DirectoryLoader(
"docs/",
loaders={".md": MarkdownLoader(), ".pdf": PDFLoader()},
).load()
Args:
path: Directory path to load.
loaders: Mapping of lowercase extension → DocumentLoaderPort.
Defaults to ``{".md": MarkdownLoader(), ".txt": PlainTextLoader()}``.
"""
def __init__(
self,
path: str,
loaders: dict[str, DocumentLoaderPort] | None = None,
) -> None:
self._path = Path(path)
self._loaders: dict[str, DocumentLoaderPort] = loaders or {
".md": MarkdownLoader(),
".txt": PlainTextLoader(),
}
def load(self) -> list[TextDocument]:
"""Recursively walk the directory and load all files with a registered extension."""
docs: list[TextDocument] = []
for file in sorted(self._path.rglob("*")):
if not file.is_file():
continue
loader = self._loaders.get(file.suffix.lower())
if loader is None:
continue
try:
docs.extend(loader.load(str(file)))
except Exception as e:
warnings.warn(f"Failed to load {file}: {e}", stacklevel=2)
return docs