Skip to content

Commit d5164c3

Browse files
author
Tom Softreck
committed
Release version 0.1.47
### Changed - Changes in src/pyfunc2/__init__.py - Changes in src/pyfunc2/markdown/create_dir_structure.py - Changes in src/pyfunc2/markdown/create_dir_structure_from_headers.py - Changes in src/pyfunc2/markdown/create_folders_files.py - Changes in src/pyfunc2/ocr/__init__.py
1 parent de7228a commit d5164c3

File tree

8 files changed

+54
-80
lines changed

8 files changed

+54
-80
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## [0.1.47] - 2025-04-16
6+
7+
### Changed
8+
- Changes in src/pyfunc2/__init__.py
9+
- Changes in src/pyfunc2/markdown/create_dir_structure.py
10+
- Changes in src/pyfunc2/markdown/create_dir_structure_from_headers.py
11+
- Changes in src/pyfunc2/markdown/create_folders_files.py
12+
- Changes in src/pyfunc2/ocr/__init__.py
13+
514
## [0.1.46] - 2025-04-16
615

716
### Added

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# Configuration setup
1919
setup(
2020
name="pyfunc2",
21-
version="0.1.44",
21+
version="0.1.45",
2222
description="libs for cameramonit, ocr, fin-officer, cfo, and other projects",
2323
long_description=LONG_DESCRIPTION,
2424
long_description_content_type="text/markdown",
@@ -34,7 +34,7 @@
3434
"Wiki": "https://github.com/pyfunc/lib/wiki",
3535
"Issue Tracker": "https://github.com/pyfunc/lib/issues/new",
3636
},
37-
packages=["pyfunc2", "pyfunc2.config", "pyfunc2.email", "pyfunc2.file"],
37+
packages=find_packages(where="src"),
3838
package_dir={"": "src"},
3939
license="Apache-2.0", # Use simple string format
4040
license_files=("LICENSE"), # Empty tuple to explicitly prevent license files

setup.py.bak

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ except FileNotFoundError:
1818
# Configuration setup
1919
setup(
2020
name="pyfunc2",
21-
version="0.1.43",
21+
version="0.1.44",
2222
description="libs for cameramonit, ocr, fin-officer, cfo, and other projects",
2323
long_description=LONG_DESCRIPTION,
2424
long_description_content_type="text/markdown",
@@ -34,7 +34,7 @@ setup(
3434
"Wiki": "https://github.com/pyfunc/lib/wiki",
3535
"Issue Tracker": "https://github.com/pyfunc/lib/issues/new",
3636
},
37-
packages=["pyfunc2", "pyfunc2.config", "pyfunc2.email", "pyfunc2.file"],
37+
packages=find_packages(where="src"),
3838
package_dir={"": "src"},
3939
license="Apache-2.0", # Use simple string format
4040
license_files=("LICENSE"), # Empty tuple to explicitly prevent license files

src/pyfunc2/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from .email_update import email_update
1111
from .img_to_base64 import img_to_base64
1212
from .lasts import lasts
13+
from .ocr import *
1314
from .to_lower_case import convert_char
1415
from .to_lower_case import to_lower_case
1516

src/pyfunc2/markdown/create_dir_structure.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,12 @@
11
import os
2-
import mistune
3-
from mistune import HTMLRenderer
4-
2+
import re
53

64
def create_dir_structure(md_text, base_dir):
7-
# renderer = HeaderRenderer()
8-
# renderer = mistune.HTMLRenderer()
9-
renderer = mistune.HTMLRenderer()
10-
# markdown = mistune.Markdown(renderer, plugins=[strikethrough])
11-
markdown = mistune.Markdown(renderer)
12-
# markdown = mistune.Markdown(renderer=renderer)
13-
headers = markdown(md_text).split('\n')
14-
print(headers)
15-
16-
for header in headers:
17-
if header: # Exclude empty directories
18-
pathf = os.path.join(base_dir, header)
19-
print(pathf)
5+
# Twórz katalog z każdej niepustej linii tekstu
6+
for line in md_text.splitlines():
7+
name = line.strip()
8+
if name:
9+
pathf = os.path.join(base_dir, name)
2010
os.makedirs(pathf, exist_ok=True)
2111

2212

src/pyfunc2/markdown/create_dir_structure_from_headers.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,16 @@
55
# markdown_file
66
# source - path to source folder
77
# pattern - regular expression pattern for Markdown headers
8-
def create_dir_structure_from_headers(markdown_file="",
9-
path="",
10-
pattern_list=[r'^#{1,6}\s+(.*)$', r'\[([^\]]+)\]\(([^)]+)\)']):
8+
def create_dir_structure_from_headers(markdown_file="", path="", pattern_list=[r'^#{1,6}\s+(.*)$']):
9+
import re
1110
with open(markdown_file, 'r') as file:
1211
markdown = file.read()
13-
14-
for header in get_header_list(markdown, pattern_list[0]):
15-
#print(header)
16-
#exit()
17-
for url in get_url_list(header, pattern_list[1]):
18-
path_folder = os.path.join(path, str(url))
19-
#print(path_folder)
20-
#exit()
21-
if not os.path.exists(path_folder):
22-
os.makedirs(path_folder)
12+
# Wyciągnij wszystkie nagłówki (tylko tekst, bez #)
13+
headers = re.findall(pattern_list[0], markdown, re.MULTILINE)
14+
for header in headers:
15+
if header:
16+
path_folder = os.path.join(path, header.strip())
17+
os.makedirs(path_folder, exist_ok=True)
2318

2419

2520

src/pyfunc2/markdown/create_folders_files.py

Lines changed: 24 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# pattern - regular expression pattern for Markdown headers
1515
def create_folders_files(markdown_file="",
1616
path="",
17-
pattern_list=[r'^#{1,6}\s+(.*)$', r'\[([^\]]+)\]\(([^)]+)\)'],
17+
pattern_list=[r'^#{1,6}\s+(.*)$'],
1818
extension_list=['bash', 'php', 'js', 'javascript', 'shell', 'sh'],
1919
extension_head_list={
2020
'bash': '#!/bin/bash',
@@ -23,49 +23,27 @@ def create_folders_files(markdown_file="",
2323
'php': '<?php'
2424
}
2525
):
26-
markdown_data = get_dictionary_structure_from_headers_content(markdown_file)
27-
for section, content in markdown_data.items():
28-
# print(f"Section: {section}\nContent: {content}\n")
29-
# print(content.splitlines())
30-
# exit()
31-
# markdown_data = get_dictionary_structure_by_separator_list(content.splitlines())
32-
33-
# print(markdown_data)
34-
# continue
35-
36-
for url in get_url_list(section, pattern_list[1]):
37-
# print(path_folder)
38-
# exit()
39-
try:
40-
path_folder = os.path.join(path, str(url))
41-
42-
if not os.path.exists(path_folder):
43-
os.makedirs(path_folder)
44-
45-
filename = 'README.md'
26+
import re
27+
# Wczytaj plik markdown
28+
with open(markdown_file, 'r') as file:
29+
markdown = file.read()
30+
# Wyciągnij nagłówki
31+
headers = re.findall(pattern_list[0], markdown, re.MULTILINE)
32+
# Wyciągnij bloki kodu
33+
code_blocks = re.findall(r'```(\w+)?\n([\s\S]*?)```', markdown)
34+
for idx, header in enumerate(headers):
35+
if header:
36+
path_folder = os.path.join(path, header.strip())
37+
os.makedirs(path_folder, exist_ok=True)
38+
# Zapisz README.md z treścią sekcji (opcjonalnie)
39+
# path_file = os.path.join(path_folder, 'README.md')
40+
# with open(path_file, "w") as f:
41+
# f.write(header)
42+
# Zapisz pliki kodów (jeśli są)
43+
if idx < len(code_blocks):
44+
lang, code = code_blocks[idx]
45+
extension = lang if lang else 'txt'
46+
filename = f"{idx+1}.{extension}"
4647
path_file = os.path.join(path_folder, filename)
47-
# print(path_file)
48-
49-
f = open(path_file, "w")
50-
f.write(content)
51-
f.close()
52-
53-
result_list = get_code_extension_dict(content, extension_list, extension_head_list)
54-
# print(result_list)
55-
56-
for item in result_list:
57-
#print(item)
58-
extension = item['extension']
59-
filename = item['filename']
60-
code = item['code']
61-
print(extension, filename, code)
62-
# print(item['code'])
63-
path_file = os.path.join(path_folder, filename + '.' + extension)
64-
f = open(path_file, "w")
65-
f.write(code)
66-
f.close()
67-
68-
69-
except Exception as e:
70-
print(e)
71-
continue
48+
with open(path_file, "w") as f:
49+
f.write(code.strip())

src/pyfunc2/ocr/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from .get_date_from_pdf import remove_only_single_spaces
2020
from .get_date_from_pdf import get_date_from_pdf
2121
from .get_date_from_pdf_pattern import get_date_from_pdf_pattern
22+
from .get_company_from_pdf import get_company_from_pdf
2223

2324
# Public API of the package
24-
__all__ = [CompanyList, convert_pdf_to_string, find_string_in_file_path, find_string_in_pdf, from_folder_to_year, from_month_to_company, convertPdf2String, remove_extra_spaces, remove_all_spaces, remove_only_single_spaces, get_company_from_pdf, find_company, convertPdf2String, remove_extra_spaces, remove_all_spaces, remove_only_single_spaces, get_date_from_pdf, get_date_from_pdf_pattern]
25+
__all__ = [CompanyList, convert_pdf_to_string, find_string_in_file_path, find_string_in_pdf, from_folder_to_year, from_month_to_company, convertPdf2String, remove_extra_spaces, remove_all_spaces, remove_only_single_spaces, get_company_from_pdf, find_company, convertPdf2String, remove_extra_spaces, remove_all_spaces, remove_only_single_spaces, get_date_from_pdf, get_date_from_pdf_pattern, get_company_from_pdf]

0 commit comments

Comments
 (0)