main
User Documentation
Introduction
Installation
Using DODFMiner
Architecture’s Document
Downloader Module
Downloader Core
Extractor Module
Pure Core
Pure Utils
Polished Core
Polished Helper
Acts
Acts
Regex Backend
NER Backend
ETC
Acknowledgements
About the KneDLE Team
Tutorials
JSON Acts Extraction Tutorial
DODFMiner
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
G
|
I
|
J
|
L
|
M
|
N
|
P
|
R
|
S
|
T
|
X
|
Y
_
_act_name() (dodfminer.extract.polished.acts.base.Atos method)
_acts (dodfminer.extract.polished.acts.base.Atos attribute)
_acts_str (dodfminer.extract.polished.acts.base.Atos attribute)
_add_base_feat() (dodfminer.extract.polished.backend.ner.ActNER method)
_build_dataframe() (dodfminer.extract.polished.acts.base.Atos method)
_columns (dodfminer.extract.polished.acts.base.Atos attribute)
_create_download_folder() (dodfminer.downloader.core.Downloader method)
_create_single_folder() (dodfminer.downloader.core.Downloader method)
(dodfminer.extract.pure.core.ContentExtractor class method)
_data_frame (dodfminer.extract.polished.acts.base.Atos attribute)
_download_path (dodfminer.downloader.core.Downloader attribute)
_download_pdf() (dodfminer.downloader.core.Downloader method)
_extract_bold_upper_page() (in module dodfminer.extract.pure.utils.title_extractor)
_extract_bold_upper_pdf() (in module dodfminer.extract.pure.utils.title_extractor)
_extract_page_lines_content() (in module dodfminer.extract.pure.utils.box_extractor)
_extract_props() (dodfminer.extract.polished.acts.base.Atos method)
_extract_titles() (dodfminer.extract.pure.core.ContentExtractor class method)
_fail_request_message() (dodfminer.downloader.core.Downloader method)
_file_exist() (dodfminer.downloader.core.Downloader method)
_file_name (dodfminer.extract.polished.acts.base.Atos attribute)
_find_prop_value() (dodfminer.extract.polished.backend.regex.ActRegex method)
_flags (dodfminer.extract.polished.backend.regex.ActRegex attribute)
_get_base_feat() (dodfminer.extract.polished.backend.ner.ActNER class method)
_get_doc_img() (in module dodfminer.extract.pure.utils.box_extractor)
_get_features() (dodfminer.extract.polished.backend.ner.ActNER method)
_get_json_list() (dodfminer.extract.pure.core.ContentExtractor class method)
_get_pdfs_list() (dodfminer.extract.pure.core.ContentExtractor class method)
_get_titles_subtitles() (in module dodfminer.extract.pure.utils.title_extractor)
_get_titles_subtitles_smart() (in module dodfminer.extract.pure.utils.title_extractor)
_get_txt_list() (dodfminer.extract.pure.core.ContentExtractor class method)
_inst_rule (dodfminer.extract.polished.backend.regex.ActRegex attribute)
_limits() (dodfminer.extract.polished.backend.ner.ActNER class method)
_load_model() (dodfminer.extract.polished.backend.ner.ActNER method)
_log() (dodfminer.downloader.core.Downloader method)
(dodfminer.extract.pure.core.ContentExtractor class method)
_make_month_path() (dodfminer.downloader.core.Downloader method)
_model (dodfminer.extract.polished.backend.ner.ActNER attribute)
_name (dodfminer.extract.polished.acts.base.Atos attribute)
_normalize_text() (dodfminer.extract.pure.core.ContentExtractor class method)
_prediction() (dodfminer.extract.polished.backend.ner.ActNER method)
_predictions_dict() (dodfminer.extract.polished.backend.ner.ActNER method)
_preprocess() (dodfminer.extract.polished.backend.ner.ActNER class method)
_prog_bar (dodfminer.downloader.core.Downloader attribute)
_prop_rules() (dodfminer.extract.polished.backend.regex.ActRegex method)
_props_names() (dodfminer.extract.polished.acts.base.Atos method)
_raw_acts (dodfminer.extract.polished.acts.base.Atos attribute)
_regex_flags() (dodfminer.extract.polished.backend.regex.ActRegex class method)
_regex_props() (dodfminer.extract.polished.backend.regex.ActRegex method)
_rule_for_inst() (dodfminer.extract.polished.backend.regex.ActRegex method)
_rules (dodfminer.extract.polished.backend.regex.ActRegex attribute)
_split_sentence() (dodfminer.extract.polished.backend.ner.ActNER method)
_string_to_date() (dodfminer.downloader.core.Downloader class method)
_struct_subfolders() (dodfminer.extract.pure.core.ContentExtractor class method)
_text (dodfminer.extract.polished.acts.base.Atos attribute)
A
ActNER (class in dodfminer.extract.polished.backend.ner)
ActRegex (class in dodfminer.extract.polished.backend.regex)
acts_str (dodfminer.extract.polished.acts.base.Atos property)
ActsExtractor (class in dodfminer.extract.polished.core)
Atos (class in dodfminer.extract.polished.acts.base)
B
BBox (class in dodfminer.extract.pure.utils.title_extractor)
bbox (dodfminer.extract.pure.utils.title_extractor.BBox property)
(dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
BoldUpperCase (class in dodfminer.extract.pure.utils.title_filter)
Box (class in dodfminer.extract.pure.utils.title_extractor)
build_act_txt() (in module dodfminer.extract.polished.helper)
,
[1]
C
committee_classification() (in module dodfminer.extract.polished.helper)
ContentExtractor (class in dodfminer.extract.pure.core)
D
data_frame (dodfminer.extract.polished.acts.base.Atos property)
dict_bold() (dodfminer.extract.pure.utils.title_filter.BoldUpperCase class method)
dict_text() (dodfminer.extract.pure.utils.title_filter.BoldUpperCase class method)
dodfminer.downloader.core
module
dodfminer.extract.polished.backend.ner
module
dodfminer.extract.polished.backend.regex
module
dodfminer.extract.polished.core
module
dodfminer.extract.polished.helper
module
dodfminer.extract.pure.core
module
dodfminer.extract.pure.utils.box_extractor
module
dodfminer.extract.pure.utils.title_extractor
module
dodfminer.extract.pure.utils.title_filter
module
Downloader (class in dodfminer.downloader.core)
draw_doc_text_boxes() (in module dodfminer.extract.pure.utils.box_extractor)
dump_json() (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle method)
,
[1]
E
extract_multiple() (in module dodfminer.extract.polished.helper)
,
[1]
extract_multiple_acts() (in module dodfminer.extract.polished.helper)
,
[1]
extract_multiple_acts_parallel() (in module dodfminer.extract.polished.helper)
extract_multiple_acts_with_committee() (in module dodfminer.extract.polished.helper)
extract_single() (in module dodfminer.extract.polished.helper)
,
[1]
extract_structure() (dodfminer.extract.pure.core.ContentExtractor class method)
extract_text() (dodfminer.extract.pure.core.ContentExtractor class method)
extract_titles_subtitles() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
extract_to_json() (dodfminer.extract.pure.core.ContentExtractor class method)
extract_to_txt() (dodfminer.extract.pure.core.ContentExtractor class method)
ExtractorTitleSubtitle (class in dodfminer.extract.pure.utils.title_extractor)
,
[1]
G
gen_hierarchy_base() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
gen_title_base() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
get_act_df() (dodfminer.extract.polished.core.ActsExtractor static method)
,
[1]
get_act_obj() (dodfminer.extract.polished.core.ActsExtractor static method)
,
[1]
get_all_df() (dodfminer.extract.polished.core.ActsExtractor static method)
,
[1]
get_all_df_highlight() (dodfminer.extract.polished.core.ActsExtractor static method)
get_all_df_parallel() (dodfminer.extract.polished.core.ActsExtractor static method)
get_all_obj() (dodfminer.extract.polished.core.ActsExtractor static method)
,
[1]
get_all_obj_highlight() (dodfminer.extract.polished.core.ActsExtractor static method)
get_all_obj_parallel() (dodfminer.extract.polished.core.ActsExtractor static method)
get_doc_img_boxes() (in module dodfminer.extract.pure.utils.box_extractor)
,
[1]
get_doc_text_boxes() (in module dodfminer.extract.pure.utils.box_extractor)
,
[1]
get_doc_text_lines() (in module dodfminer.extract.pure.utils.box_extractor)
,
[1]
get_expected_colunms() (dodfminer.extract.polished.acts.base.Atos method)
get_files_path() (in module dodfminer.extract.polished.helper)
,
[1]
get_xml() (dodfminer.extract.polished.core.ActsExtractor static method)
,
[1]
group_by_column() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
group_by_page() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
I
invert_text_type_bbox_page_tuple() (in module dodfminer.extract.pure.utils.title_extractor)
J
json (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
L
load_blocks_list() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
M
module
dodfminer.downloader.core
dodfminer.extract.polished.backend.ner
dodfminer.extract.polished.backend.regex
dodfminer.extract.polished.core
dodfminer.extract.polished.helper
dodfminer.extract.pure.core
dodfminer.extract.pure.utils.box_extractor
dodfminer.extract.pure.utils.title_extractor
dodfminer.extract.pure.utils.title_filter
N
name (dodfminer.extract.polished.acts.base.Atos property)
P
page (dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
print_dataframe() (in module dodfminer.extract.polished.helper)
,
[1]
pull() (dodfminer.downloader.core.Downloader method)
pull_json() (dodfminer.downloader.core.Downloader method)
R
read_json() (dodfminer.extract.polished.acts.base.Atos method)
read_txt() (dodfminer.extract.polished.acts.base.Atos method)
reset() (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle method)
,
[1]
run_extract_simple_wrap() (in module dodfminer.extract.polished.helper)
run_thread_wrap() (in module dodfminer.extract.polished.helper)
run_thread_wrap_multiple() (in module dodfminer.extract.polished.helper)
S
sort_2column() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
sort_by_column() (in module dodfminer.extract.pure.utils.title_extractor)
,
[1]
subtitles (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
(dodfminer.extract.pure.utils.title_extractor.TitlesSubtitles property)
T
text (dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
TextTypeBboxPageTuple (class in dodfminer.extract.pure.utils.title_extractor)
titles (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
(dodfminer.extract.pure.utils.title_extractor.TitlesSubtitles property)
titles_subtitles (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
titles_subtitles_hierarchy (dodfminer.extract.pure.utils.title_extractor.ExtractorTitleSubtitle property)
,
[1]
TitlesSubtitles (class in dodfminer.extract.pure.utils.title_extractor)
type (dodfminer.extract.pure.utils.title_extractor.TextTypeBboxPageTuple property)
X
x0 (dodfminer.extract.pure.utils.title_extractor.Box property)
x1 (dodfminer.extract.pure.utils.title_extractor.Box property)
xml_multiple() (in module dodfminer.extract.polished.helper)
Y
y0 (dodfminer.extract.pure.utils.title_extractor.Box property)
y1 (dodfminer.extract.pure.utils.title_extractor.Box property)
Read the Docs
v: main
Versions
latest
main
json_merge
cli_contratos
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds