latest
User Documentation
Introduction
Installation
Using DODFMiner
Architecture’s Document
Code of Conduct
Contributing Guide
Downloader Module
Downloader Core
Extractor Module
Pure Core
Pure Utils
Polished Core
Polished Helper
Acts
Acts
Regex Backend
NER Backend
ETC
Acknowledgements
About the KneDLE Team
DODFMiner
»
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
G
|
I
|
J
|
L
|
M
|
P
|
R
|
S
|
T
|
X
|
Y
_
_create_download_folder() (dodfminer.downloader.core.Downloader method)
_create_single_folder() (dodfminer.downloader.core.Downloader method)
(dodfminer.extract.pure.core.ContentExtractor class method)
_download_path (dodfminer.downloader.core.Downloader attribute)
_download_pdf() (dodfminer.downloader.core.Downloader method)
_extract_bold_upper_page() (in module dodfminer.extract.pure.utils.title_extractor)
_extract_bold_upper_pdf() (in module dodfminer.extract.pure.utils.title_extractor)
_extract_page_lines_content() (in module dodfminer.extract.pure.utils.box_extractor)
_extract_titles() (dodfminer.extract.pure.core.ContentExtractor class method)
_fail_request_message() (dodfminer.downloader.core.Downloader method)
_file_exist() (dodfminer.downloader.core.Downloader method)
_find_prop_value() (dodfminer.extract.polished.backend.regex.ActRegex method)
_flags (dodfminer.extract.polished.backend.regex.ActRegex attribute)
_get_doc_img() (in module dodfminer.extract.pure.utils.box_extractor)
_get_json_list() (dodfminer.extract.pure.core.ContentExtractor class method)
_get_pdfs_list() (dodfminer.extract.pure.core.ContentExtractor class method)
_get_titles_subtitles() (in module dodfminer.extract.pure.utils.title_extractor)
_get_titles_subtitles_smart() (in module dodfminer.extract.pure.utils.title_extractor)
_get_txt_list() (dodfminer.extract.pure.core.ContentExtractor class method)
_inst_rule (dodfminer.extract.polished.backend.regex.ActRegex attribute)
_log() (dodfminer.downloader.core.Downloader method)
(dodfminer.extract.pure.core.ContentExtractor class method)
_make_month_path() (dodfminer.downloader.core.Downloader method)
_normalize_text() (dodfminer.extract.pure.core.ContentExtractor class method)
_prog_bar (dodfminer.downloader.core.Downloader attribute)
_prop_rules() (dodfminer.extract.polished.backend.regex.ActRegex method)
_regex_flags() (dodfminer.extract.polished.backend.regex.ActRegex class method)
_regex_instances() (dodfminer.extract.polished.backend.regex.ActRegex method)
_regex_props() (dodfminer.extract.polished.backend.regex.ActRegex method)
_rule_for_inst() (dodfminer.extract.polished.backend.regex.ActRegex method)
_rules (dodfminer.extract.polished.backend.regex.ActRegex attribute)
_string_to_date() (dodfminer.downloader.core.Downloader class method)
_struct_subfolders() (dodfminer.extract.pure.core.ContentExtractor class method)
A
ActRegex (class in dodfminer.extract.polished.backend.regex)
B
BBox (class in dodfminer.extract.pure.utils.title_extractor)
bbox (dodfminer.extract.pure.utils.title_extractor.BBox property)
(dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
BoldUpperCase (class in dodfminer.extract.pure.utils.title_filter)
Box (class in dodfminer.extract.pure.utils.title_extractor)
C
compare_blocks() (in module dodfminer.extract.pure.utils.box_extractor)
ContentExtractor (class in dodfminer.extract.pure.core)
D
dict_bold() (dodfminer.extract.pure.utils.title_filter.BoldUpperCase class method)
dict_text() (dodfminer.extract.pure.utils.title_filter.BoldUpperCase class method)
dodfminer.downloader.core
module
dodfminer.extract.polished.backend.regex
module
dodfminer.extract.pure.core
module
dodfminer.extract.pure.utils.box_extractor
module
dodfminer.extract.pure.utils.title_extractor
module
dodfminer.extract.pure.utils.title_filter
module
Downloader (class in dodfminer.downloader.core)
draw_doc_text_boxes() (in module dodfminer.extract.pure.utils.box_extractor)
dump_json() (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle method)
,
[1]
E
extract_structure() (dodfminer.extract.pure.core.ContentExtractor class method)
extract_text() (dodfminer.extract.pure.core.ContentExtractor class method)
extract_titles_subtitles() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
extract_to_json() (dodfminer.extract.pure.core.ContentExtractor class method)
extract_to_txt() (dodfminer.extract.pure.core.ContentExtractor class method)
ExtractorTitleSubtitle (class in dodfminer.extract.pure.utils.title_extractor)
,
[1]
G
gen_hierarchy_base() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
gen_title_base() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
get_doc_img_boxes() (in module dodfminer.extract.pure.utils.box_extractor)
,
[1]
get_doc_text_boxes() (in module dodfminer.extract.pure.utils.box_extractor)
,
[1]
get_doc_text_lines() (in module dodfminer.extract.pure.utils.box_extractor)
,
[1]
group_by_column() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
group_by_page() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
I
invert_text_type_bbox_page_tuple() (in module dodfminer.extract.pure.utils.title_extractor)
J
json (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
L
load_blocks_list() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
M
module
dodfminer.downloader.core
dodfminer.extract.polished.backend.regex
dodfminer.extract.pure.core
dodfminer.extract.pure.utils.box_extractor
dodfminer.extract.pure.utils.title_extractor
dodfminer.extract.pure.utils.title_filter
P
page (dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
pull() (dodfminer.downloader.core.Downloader method)
R
reset() (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle method)
,
[1]
S
sort_2column() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
sort_blocks() (in module dodfminer.extract.pure.utils.box_extractor)
sort_by_column() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
subtitles (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
(dodfminer.extract.pure.utils.title_extractor.TitlesSubtitles property)
T
text (dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
TextTypeBboxPageTuple (class in dodfminer.extract.pure.utils.title_extractor)
titles (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
(dodfminer.extract.pure.utils.title_extractor.TitlesSubtitles property)
titles_subtitles (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
titles_subtitles_hierarchy (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
TitlesSubtitles (class in dodfminer.extract.pure.utils.title_extractor)
type (dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
X
x0 (dodfminer.extract.pure.utils.title_extractor.Box property)
x1 (dodfminer.extract.pure.utils.title_extractor.Box property)
Y
y0 (dodfminer.extract.pure.utils.title_extractor.Box property)
y1 (dodfminer.extract.pure.utils.title_extractor.Box property)
Read the Docs
v: latest
Versions
latest
main
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds