Impresso Essentials
Contents:
Input/Output
Data Versioning
Utilities
BBOX visualizer JSON extractor
Impresso Essentials
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
U
|
V
|
W
A
add_by_ci_id() (impresso_essentials.versioning.data_manifest.DataManifest method)
add_by_title_year() (impresso_essentials.versioning.data_manifest.DataManifest method)
add_count_list_by_title_year() (impresso_essentials.versioning.data_manifest.DataManifest method)
add_counts() (impresso_essentials.versioning.data_statistics.DataStatistics method)
add_stats_to_mft() (in module impresso_essentials.versioning.compute_manifest)
agg() (in module impresso_essentials.versioning.aggregators)
aggregate_stats_for_title() (impresso_essentials.versioning.data_manifest.DataManifest method)
alternative_read_text() (in module impresso_essentials.io.s3)
append_to_notes() (impresso_essentials.versioning.data_manifest.DataManifest method)
B
build_bbox_json() (in module impresso_essentials.bbox_visualizer.json_builder)
bytes_to() (in module impresso_essentials.utils)
C
CANONICAL (impresso_essentials.versioning.helpers.DataStage attribute)
canonical_path() (in module impresso_essentials.io.fs_utils)
check_filenaming() (in module impresso_essentials.io.fs_utils)
check_id() (in module impresso_essentials.io.fs_utils)
chunk() (in module impresso_essentials.utils)
(in module impresso_essentials.versioning.aggregators)
clone_git_repo() (in module impresso_essentials.versioning.helpers)
compute() (impresso_essentials.versioning.data_manifest.DataManifest method)
compute_stats_for_stage() (in module impresso_essentials.versioning.compute_manifest)
compute_stats_in_canonical_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_doc_emb_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_entities_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_img_emb_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_langident_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_lingproc_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_ocrqa_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_rebuilt_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_solr_text_ing_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_text_reuse_passage_bag() (in module impresso_essentials.versioning.aggregators)
compute_stats_in_topics_bag() (in module impresso_essentials.versioning.aggregators)
count_keys (impresso_essentials.versioning.data_statistics.DataStatistics attribute)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
counts (impresso_essentials.versioning.data_statistics.DataStatistics attribute)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
counts_for_canonical_issue() (in module impresso_essentials.versioning.aggregators)
counts_for_rebuilt() (in module impresso_essentials.versioning.aggregators)
create_image_url() (in module impresso_essentials.bbox_visualizer.get_bbox)
create_manifest() (in module impresso_essentials.versioning.compute_manifest)
create_s3_path() (in module impresso_essentials.bbox_visualizer.get_bbox)
D
DataManifest (class in impresso_essentials.versioning.data_manifest)
DataStage (class in impresso_essentials.versioning.helpers)
DataStatistics (class in impresso_essentials.versioning.data_statistics)
date (impresso_essentials.utils.IssueDir attribute)
define_update_info_for_title() (impresso_essentials.versioning.data_manifest.DataManifest method)
delete_versioned_keys() (in module impresso_essentials.io.s3_delete)
E
edition (impresso_essentials.utils.IssueDir attribute)
element (impresso_essentials.versioning.data_statistics.DataStatistics attribute)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
EMB_DOCS (impresso_essentials.versioning.helpers.DataStage attribute)
EMB_ENTITIES (impresso_essentials.versioning.helpers.DataStage attribute)
EMB_IMAGES (impresso_essentials.versioning.helpers.DataStage attribute)
EMB_PARAGRAPHS (impresso_essentials.versioning.helpers.DataStage attribute)
EMB_SENTS (impresso_essentials.versioning.helpers.DataStage attribute)
EMB_WORDS (impresso_essentials.versioning.helpers.DataStage attribute)
ENTITIES (impresso_essentials.versioning.helpers.DataStage attribute)
extract_np_key() (in module impresso_essentials.versioning.compute_manifest)
extract_version() (in module impresso_essentials.versioning.helpers)
F
fetch_files() (in module impresso_essentials.io.s3)
filter_new_or_modified_media() (in module impresso_essentials.versioning.helpers)
finalize() (in module impresso_essentials.versioning.aggregators)
find_s3_data_manifest_path() (in module impresso_essentials.versioning.helpers)
fixed_s3fs_glob() (in module impresso_essentials.io.s3)
G
generate_media_dict() (impresso_essentials.versioning.data_manifest.DataManifest method)
get_base_url() (in module impresso_essentials.bbox_visualizer.get_bbox)
get_bucket() (in module impresso_essentials.io.s3)
get_ci_bounding_boxes() (in module impresso_essentials.bbox_visualizer.get_bbox)
get_ci_type() (in module impresso_essentials.bbox_visualizer.get_bbox)
get_count_keys() (impresso_essentials.versioning.data_manifest.DataManifest method)
get_files_to_consider() (in module impresso_essentials.versioning.compute_manifest)
get_head_commit_url() (in module impresso_essentials.versioning.helpers)
get_issue_bounding_boxes() (in module impresso_essentials.bbox_visualizer.get_bbox)
get_issueshortpath() (in module impresso_essentials.io.fs_utils)
get_list_intersection() (in module impresso_essentials.utils)
get_media_item_years() (in module impresso_essentials.versioning.helpers)
get_media_titles() (in module impresso_essentials.versioning.helpers)
get_or_create_bucket() (in module impresso_essentials.io.s3)
get_page_bounding_boxes() (in module impresso_essentials.bbox_visualizer.get_bbox)
get_pkg_resource() (in module impresso_essentials.utils)
get_s3_client() (in module impresso_essentials.io.s3)
get_s3_object_size() (in module impresso_essentials.io.s3)
get_s3_resource() (in module impresso_essentials.io.s3)
get_storage_options() (in module impresso_essentials.io.s3)
git_commit_push() (in module impresso_essentials.versioning.helpers)
glob_with_size() (in module impresso_essentials.io.fs_utils)
granularity (impresso_essentials.versioning.data_statistics.DataStatistics attribute)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
H
has_title_year_key() (impresso_essentials.versioning.data_manifest.DataManifest method)
has_value() (impresso_essentials.utils.SourceType class method)
(impresso_essentials.versioning.helpers.DataStage class method)
I
id_to_issuedir() (in module impresso_essentials.utils)
impresso_essentials.bbox_visualizer.get_bbox
module
impresso_essentials.bbox_visualizer.json_builder
module
impresso_essentials.io.fs_utils
module
impresso_essentials.io.s3
module
impresso_essentials.io.s3_delete
module
impresso_essentials.text_utils
module
impresso_essentials.utils
module
impresso_essentials.versioning.aggregators
module
impresso_essentials.versioning.compute_manifest
module
impresso_essentials.versioning.data_manifest
module
impresso_essentials.versioning.data_statistics
module
impresso_essentials.versioning.helpers
module
increment_version() (in module impresso_essentials.versioning.helpers)
init_counts() (impresso_essentials.versioning.data_statistics.DataStatistics method)
init_logger() (in module impresso_essentials.utils)
init_media_info() (in module impresso_essentials.versioning.helpers)
init_yearly_count_dict() (impresso_essentials.versioning.data_manifest.DataManifest method)
is_git_repo() (in module impresso_essentials.versioning.helpers)
is_stopword_or_all_stopwords() (in module impresso_essentials.text_utils)
IssueDir (class in impresso_essentials.utils)
(in module impresso_essentials.io.s3)
J
journal (impresso_essentials.utils.IssueDir attribute)
L
LANGIDENT (impresso_essentials.versioning.helpers.DataStage attribute)
LINGPROC (impresso_essentials.versioning.helpers.DataStage attribute)
list_files() (in module impresso_essentials.io.s3)
list_local_directories() (in module impresso_essentials.io.fs_utils)
list_newspapers() (in module impresso_essentials.io.s3)
list_s3_directories() (in module impresso_essentials.io.s3)
M
main() (in module impresso_essentials.io.s3_delete)
(in module impresso_essentials.versioning.compute_manifest)
manifest_summary() (in module impresso_essentials.versioning.helpers)
media_list_from_mft_json() (in module impresso_essentials.versioning.helpers)
MG (impresso_essentials.utils.SourceType attribute)
module
impresso_essentials.bbox_visualizer.get_bbox
impresso_essentials.bbox_visualizer.json_builder
impresso_essentials.io.fs_utils
impresso_essentials.io.s3
impresso_essentials.io.s3_delete
impresso_essentials.text_utils
impresso_essentials.utils
impresso_essentials.versioning.aggregators
impresso_essentials.versioning.compute_manifest
impresso_essentials.versioning.data_manifest
impresso_essentials.versioning.data_statistics
impresso_essentials.versioning.helpers
MYSQL_CIS (impresso_essentials.versioning.helpers.DataStage attribute)
N
new_media() (impresso_essentials.versioning.data_manifest.DataManifest method)
NEWS_AGENCIES (impresso_essentials.versioning.helpers.DataStage attribute)
NewspaperStatistics (class in impresso_essentials.versioning.data_statistics)
normalize_text() (in module impresso_essentials.text_utils)
NP (impresso_essentials.utils.SourceType attribute)
O
OCRQA (impresso_essentials.versioning.helpers.DataStage attribute)
output_mft_s3_path (impresso_essentials.versioning.data_manifest.DataManifest property)
overall_stats() (impresso_essentials.versioning.data_manifest.DataManifest method)
P
parse_canonical_filename() (in module impresso_essentials.io.fs_utils)
parse_json() (in module impresso_essentials.io.fs_utils)
partitioner() (in module impresso_essentials.utils)
PASSIM (impresso_essentials.versioning.helpers.DataStage attribute)
path (impresso_essentials.utils.IssueDir attribute)
possible_count_keys (impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
pretty_print() (impresso_essentials.versioning.data_statistics.DataStatistics method)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics method)
process_altogether() (in module impresso_essentials.versioning.compute_manifest)
process_by_title() (in module impresso_essentials.versioning.compute_manifest)
R
RB (impresso_essentials.utils.SourceType attribute)
read_jsonlines() (in module impresso_essentials.io.s3)
read_manifest_from_s3() (in module impresso_essentials.versioning.helpers)
read_manifest_from_s3_path() (in module impresso_essentials.versioning.helpers)
read_s3_issues() (in module impresso_essentials.io.s3)
readtext_jsonlines() (in module impresso_essentials.io.s3)
REBUILT (impresso_essentials.versioning.helpers.DataStage attribute)
remove_corrupted_files() (in module impresso_essentials.versioning.compute_manifest)
remove_media_in_manifest() (in module impresso_essentials.versioning.helpers)
replace_by_ci_id() (impresso_essentials.versioning.data_manifest.DataManifest method)
replace_by_title_year() (impresso_essentials.versioning.data_manifest.DataManifest method)
RM (impresso_essentials.utils.SourceType attribute)
RS (impresso_essentials.utils.SourceType attribute)
S
s3_glob_with_size() (in module impresso_essentials.io.s3)
s3_iter_bucket() (in module impresso_essentials.io.s3)
same_counts() (impresso_essentials.versioning.data_statistics.DataStatistics method)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics method)
search_text() (in module impresso_essentials.text_utils)
segment_and_trim_sentences() (in module impresso_essentials.text_utils)
SOLR_TEXT (impresso_essentials.versioning.helpers.DataStage attribute)
SourceType (class in impresso_essentials.utils)
stage (impresso_essentials.versioning.data_statistics.DataStatistics attribute)
(impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
stage_extra_keys (impresso_essentials.versioning.data_statistics.NewspaperStatistics attribute)
stop() (impresso_essentials.utils.Timer method)
T
TEXT_REUSE (impresso_essentials.versioning.helpers.DataStage attribute)
tick() (impresso_essentials.utils.Timer method)
Timer (class in impresso_essentials.utils)
timestamp() (in module impresso_essentials.utils)
title_level_stats() (impresso_essentials.versioning.data_manifest.DataManifest method)
tokenise() (in module impresso_essentials.text_utils)
TOPICS (impresso_essentials.versioning.helpers.DataStage attribute)
U
update_media_stats() (impresso_essentials.versioning.data_manifest.DataManifest method)
upload_to_s3() (in module impresso_essentials.io.s3)
user_confirmation() (in module impresso_essentials.utils)
user_question() (in module impresso_essentials.utils)
V
validate_against_schema() (in module impresso_essentials.utils)
validate_and_export_manifest() (impresso_essentials.versioning.data_manifest.DataManifest method)
validate_config() (in module impresso_essentials.versioning.compute_manifest)
validate_granularity() (in module impresso_essentials.versioning.helpers)
validate_stage() (in module impresso_essentials.versioning.helpers)
validate_version() (in module impresso_essentials.versioning.helpers)
version_as_list() (in module impresso_essentials.versioning.helpers)
W
write_and_push_to_git() (in module impresso_essentials.versioning.helpers)
write_dump_to_fs() (in module impresso_essentials.versioning.helpers)