diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/__main__.py b/__main__.py new file mode 100644 index 0000000..29d2a7d --- /dev/null +++ b/__main__.py @@ -0,0 +1,28 @@ +import re +from urllib.parse import urlparse +from message_reader import MessageReader +from article_writer import ArticleWriter + +def main(): + # read + reader = MessageReader(".", "android_en.txt") + messages = reader.get_messages() + + # write + for msg in messages: + text = msg['text'] + urls = re.findall(r'(https?://\S+)', text) + print(urls) + # todo + # 1. validation + # 1. urls empty check + # 2. make directories by date + # 3. get open graph data using MetadataParser + # 4. write files in each directories using ArticleWriter + + # upload (git add, commit, push) + + +if __name__ == "__main__": + main() + diff --git a/android_en.txt b/android_en.txt new file mode 100644 index 0000000..7be4e3f --- /dev/null +++ b/android_en.txt @@ -0,0 +1,72 @@ +KakaoTalk Chats with 개발자 정보 / 아티클 공유 방 (790) +Date Saved : December 25, 2023 at 3:07 AM + + +September 27, 2023 at 8:16 PM +September 27, 2023 at 8:16 PM, 김석용 Daniel : https://modulabs.im/popdetail/6513fbe9edad154a661e437a + +September 28, 2023 at 11:18 AM +September 28, 2023 at 11:18 AM, 눈빛 애교 어피치 : https://macoscontainers.org/ +September 28, 2023 at 11:20 AM, 눈빛 애교 어피치 : Photo +September 28, 2023 at 9:00 PM, 졸린 무지 joined this chatroom. + +September 29, 2023 at 5:44 AM +September 29, 2023 at 5:44 AM, Lonepine joined this chatroom. +September 29, 2023 at 9:35 AM, 애교뿜뿜 어피치 joined this chatroom. +September 29, 2023 at 9:44 PM, 김석용 Daniel : 채팅은 이미지나 링크 설명에만 가능 합니다. +September 29, 2023 at 9:51 PM, 눈빛 애교 어피치 : https://steven-giesel.com/blogPost/1b2a4f18-86da-42d3-9ddc-8b41ed1eba0f +September 29, 2023 at 10:39 PM, Neo loves icecream : https://medium.com/coryodaniel/from-erverless-to-elixir-48752db4d7bc + +September 30, 2023 at 11:48 AM +September 30, 2023 at 12:25 PM, 머리 빗는 네오 joined this chatroom. +September 30, 2023 at 12:27 PM, 머리 빗는 네오 left this chatroom. +September 30, 2023 at 12:33 PM, 새벽 : Shared Post in Boards: ❌일반 채팅은 금지 ❌ +해당방에서는 "링크" / "이미지" 만 공유 가능 합니다. +개발 기술관련 모든 아티클을 공유 가능 합니다. +본 채팅방은 “수익”화를 시키지 않는 방입니다. +(개발자 상대로 홍보 자유, 행사 홍보 자유 입니다) +- 단, 링크나 이미지에 대한 설명에 대한 글은 가능합니다. +- 개발관련된 글이라면 어떤 글, 링크를 올리셔도 상관 없습니다 +- 사기에 관련된 글일경우에는 통보 없이 글이 내려 갑니다 +- 사이드 프로젝트 구하는데, 개인사업자 라던가.. 개발자 상대로 사기를 치려고 하시는 분들은 가차 없이 내보냅니다. +- 매너있게 행동 부탁 드립니다. +- 가끔.?! 방장이 선착순 선물을 쏩니다 :) +- 개발자 유머는 한시간 동안 여기 있는 분들의 이모지 투표를 받아 우는 이모티콘이 절반 이상이면 강퇴 시키도록 합니다. + +많은 관심 부탁 드립니다. + +방장문의 - 01030850969로 카톡 등록 하신뒤 갠톡 주세요 + +October 12, 2023 at 12:55 AM +October 12, 2023 at 12:55 AM, 윤경옥 Cody : https://www.inflearn.com/course/%EC%9D%B8%ED%94%84%EB%9F%B0-%ED%87%B4%EA%B7%BC%EA%B8%B8%EB%B0%8B%EC%97%85-03-flutter +October 12, 2023 at 8:57 AM, 나는야김군 joined this chatroom. +October 12, 2023 at 3:19 PM, 태환 : 안녕하세요~ +GDG Korea Android 10월 소소밋업 소식을 공유해드립니다. + +이번 소소밋업에서는 주니어 개발자의 커리어에 대해 소소하게 이야기를 나눕니다. (자세한 내용은 아래 링크를 참조해주세요) + +일시 : 10월 18일 오후 8시 +장소 : 온라인 (RSVP 후 링크 공유) + +https://gdg.community.dev/events/details/google-gdg-korea-android-presents-10weol-sosomiseob-keorieotog/ +October 12, 2023 at 3:47 PM, JetBrains joined this chatroom. +October 12, 2023 at 4:41 PM, 졸린무지 : This message was deleted. +October 12, 2023 at 6:13 PM, 초롱초롱 무지 left this chatroom. +October 12, 2023 at 10:13 PM, 엘리먼츠 joined this chatroom. + +October 13, 2023 at 12:54 AM +October 13, 2023 at 12:54 AM, . left this chatroom. +October 13, 2023 at 11:00 AM, 유광무 [𝙋𝙖𝙣𝙜𝙈𝙤𝙤] : https://blog.jetbrains.com/ko/writerside/2023/10/harnessing-the-power-of-the-kotlin-dsl-for-documentation/ +October 13, 2023 at 11:34 AM, 머리 빗는 네오 joined this chatroom. +October 13, 2023 at 1:09 PM, 퇴근하는 프로도 joined this chatroom. +October 13, 2023 at 2:37 PM, 눈빛 애교 어피치 : https://www.tiobe.com/tiobe-index/ +October 13, 2023 at 8:33 PM, 닭간장소스찜후배 left this chatroom. +October 13, 2023 at 10:05 PM, 라면먹는 제이지 joined this chatroom. + +October 14, 2023 at 9:16 AM +October 14, 2023 at 9:16 AM, Amazing : https://blog.stackademic.com/is-your-flutter-application-secured-41b0ce028769 +October 14, 2023 at 4:00 PM, 팩 : https://lp.jetbrains.com/javascriptday2023/ +October 14, 2023 at 4:41 PM, 먹보 네오 left this chatroom. +October 14, 2023 at 11:12 PM, 김명준 joined this chatroom. + + diff --git a/article_main.py b/article_main.py deleted file mode 100644 index 0ce939d..0000000 --- a/article_main.py +++ /dev/null @@ -1,42 +0,0 @@ -import metadata_parser as parser -import markdown - -### -# {'2023년 10월 1일': ['http://bit.ly/45aU7oc', -# 'https://www.youtube.com/watch?v=zp6nybNYjBQ&list=PLSCuU2a9seuO4xpzlC7dRjrVMhV6idD42', -# 'https://youtu.be/zp6nybNYjBQ', -# 'https://youtu.be/p_q4ECN33Yc', -# 'https://youtu.be/uXS0kiJQMtw', -# 'https://youtu.be/xf4kI_emeFo', -# 'https://youtu.be/XsbKfvznouA', -# 'https://youtu.be/nj2rVsu5n8w', -# 'https://youtu.be/A2yOLycDuI4'], -# '2023년 10월 4일': ['https://medium.com/@s4.ali/flutter-code-review-dos-and-don-ts-and-best-practices-1-5d003035953e'], -# '2023년 10월 5일': ['https://www.youtube.com/playlist?list=PLSCuU2a9seuO4xpzlC7dRjrVMhV6idD42', -# 'https://festa.io/events/4014', -# 'https://janggiraffe.tistory.com/m/405', -# 'https://n.news.naver.com/mnews/article/028/0002658981']} - -### - - -url = "https://www.udemy.com/course/learn-flutter-dart-to-build-ios-android-apps/" -data = parser.MetadataParser().get_metadatas(url) - -markdown_text = f"## [{data['title']}]({data['url']})\n\n" -markdown_text += f"![Course Image]({data['image']})\n\n" -markdown_text += f"**Description:** {data['description']}\n\n" -markdown_text += f"**Type:** {data['type']}\n\n" -markdown_text += f"**Site Name:** {data['site_name']}\n\n" -markdown_text += f"**Locale:** {data['locale']}" - -print(markdown_text) - -output = 'test_output.html' - -try: - with open(output, 'w', encoding='utf-8') as file: - file.write(markdown_text) -except Exception as e: - print(f"Error: {e}") - diff --git a/article_writer.py b/article_writer.py new file mode 100644 index 0000000..68b2351 --- /dev/null +++ b/article_writer.py @@ -0,0 +1,36 @@ +import os + +class ArticleWriter: + __article_format = """## [{title}]({url})
+![Course Image]({image})
+**Description:** {description}
+**Type:** {course_type}
+**Site Name:** {site_name}
+**Locale:** {locale}""" + + def __init__(self, directory: str, filename: str): + self.__directory = directory + self.__filename = filename + + def write(self, data: dict): + try: + if not os.path.exists(self.__directory): + os.makedirs(self.__directory) + except OSError: + print("Error: Failed to create the directory during writing articles.") + + with open(os.path.join(self.__directory, self.__filename), 'w', encoding='utf-8') as file: + article_text = self.__article_format.format( + title=data['title'], + url=data['url'], + image=data['image'], + description=data['description'], + course_type=data['type'], + site_name=data['site_name'], + locale=data['locale']) + file.write(article_text) + +if __name__ == "__main__": + sample_data = {'title': 'Test Title', 'url': 'https://www.udemy.com/course/learn-flutter-dart-to-build-ios-android-apps/', 'description': 'This is test description', 'image': 'https://img-c.udemycdn.com/course/480x270/1708340_7108_5.jpg', 'type': 'udemy_com:course', 'site_name': 'Udemy', 'locale': 'en_US'} + writer = ArticleWriter("./test", "test_output.md") + writer.write(sample_data) diff --git a/message_reader.py b/message_reader.py new file mode 100644 index 0000000..311504c --- /dev/null +++ b/message_reader.py @@ -0,0 +1,132 @@ +import os +import re +from datetime import datetime + + +class MessageReader: + + __kakaotalk_datetime_pattern_dict = {'window_ko_date': "-{15} [0-9]{4}년 [0-9]{1,2}월 [0-9]{1,2}일 \S요일 -{15}", + 'window_ko_time': "((\[)([^\[])+(\])) ((\[오)\S [0-9]{1,2}:[0-9]{1,2}(\]))", + 'android_ko': "([0-9]){4}년 ([0-9]){1,2}월 ([0-9]){1,2}일 (오전|오후) ([0-9]){1,2}:([0-9]){1,2}", + 'android_en': "([A-z])+ ([0-9]){1,2}, ([0-9]){4} at ([0-9]){1,2}:([0-9]){1,2}\s(AM|PM)", + } + + def __init__(self, directory: str, filename: str): + self.__directory = directory + self.__filename = filename + + def get_messages(self) -> list: + self.__validateFileExistence() + file_path = os.path.join(self.__directory, self.__filename) + file_type = self.__check_export_file_type(file_path) + return self.__parse(file_type, file_path) + + def __validateFileExistence(self): + if not os.path.exists(self.__directory): + raise Exception("directory path({directory}) doesn't exist".format(directory=self.__directory)) + if not os.path.isdir(self.__directory): + raise Exception("directory path({directory}) is not a directory".format(directory=self.__directory)) + file_path = os.path.join(self.__directory, self.__filename) + if not os.path.exists(file_path): + raise Exception("file path({file_path}) doesn't exist".format(file_path=file_path)) + if not os.path.isfile(file_path): + raise Exception("file path({file_path}) is not a file".format(file_path=file_path)) + + def __check_export_file_type(self, file_path: str, + datetime_pattern_dict = __kakaotalk_datetime_pattern_dict): + with open(file_path, 'r', encoding = 'utf-8') as f: + for counter in range(5): + line = f.readline() + if not line: break + + for file_type, pattern in datetime_pattern_dict.items(): + if re.search(pattern, line): + return '_'.join(file_type.split('_')[:2]) + + print("Error: Cannot know the device type and language of the file.\n", + f"Please check the file is a kakaotalk export file or the export enviroment is in among {str(list(kakaotalk_include_date_pattern_dict.keys()))}") + + def __parse(self, file_type: str, file_path, datetime_pattern_dict=__kakaotalk_datetime_pattern_dict): + msgs = [] + if file_type == 'window_ko': # window + date_pattern = datetime_pattern_dict['window_ko_date'] + time_pattern = datetime_pattern_dict['window_ko_time'] + + with open(file_path) as file: + # 줄바꿈되어있는 경우도 묶어주기 위해 buffer 사용 + buffer = '' + date = '' + + for line in file: + # window파일의 데이트str(--------------- 2020년 6월 28일 일요일 ---------------)이거나 시간 str([김한길] [오후 2:15] htt)이면 + if re.match(date_pattern, line) or re.match(time_pattern, line): + # buffer가 time_pattern으로 시작하는 경우만 추가해주기 + if re.match(time_pattern, buffer): + buffer_tokens = buffer.split(']', maxsplit=2) + user_name = buffer_tokens[0].replace('[', '').strip() + time = buffer_tokens[1].replace('[', '').strip() + my_datetime = self.__str_to_datetime(file_type, f"{date} {time}") + text = buffer_tokens[2].strip() + + msgs.append({'datetime': my_datetime, + 'user_name': user_name, + 'text': text + }) + + if re.match(date_pattern, line): # window파일의 데이트str이면 + date = line.replace('-', '').strip().rsplit(" ", 1)[0] + buffer = '' + else: # window파일의 시간 str이면 + buffer = line + + else: + buffer += line + + else: # android + datetime_pattern = datetime_pattern_dict[file_type] + msg_exist_check_pattern = datetime_pattern + ",.*:" + + with open(file_path) as file: + # 줄바꿈되어있는 경우도 저장하기 위해 buffer 사용 + buffer='' + for line in file: + if re.match(datetime_pattern, line): + if re.match(msg_exist_check_pattern, buffer): + + temp_01_2_tokens = buffer.split(" : ", maxsplit=1) + temp_0_1_tokens = temp_01_2_tokens[0].rsplit(",", maxsplit=1) + + my_datetime = temp_0_1_tokens[0].strip() + my_datetime = self.__str_to_datetime(file_type, my_datetime) + user_name = temp_0_1_tokens[1].strip() + text = temp_01_2_tokens[1].strip() + msgs.append({'datetime': my_datetime, + 'user_name': user_name, + 'text': text + }) + + buffer = line + else: + buffer += line + + return msgs + + + def __str_to_datetime(self, file_type, text): + kakaotalk_strptime_pattern_dict = {'ko': '%Y년 %m월 %d일 %p %I:%M', + 'en': '%B %d, %Y at %I:%M %p', + } + + os_type, language = file_type.split('_') + if language == 'ko': + text = text.replace('오전', 'AM') + text = text.replace('오후', 'PM') + + text_dt = datetime.strptime(text, kakaotalk_strptime_pattern_dict[language]) + return text_dt + +if __name__ == "__main__": + reader = MessageReader(".", "android_en.txt") + + for msg in reader.get_messages(): + print(msg)