Skip to content
Snippets Groups Projects
Snakefile 1006 B
Newer Older
rule lang:
        "data/txts/",
        "data/corpus_lang.csv"
        "python create_corpus_before_lang.py"

rule corpus_iramuteq:
    input:
        "data/preprocessed/",
    output:
        directory("data/corpus_iramuteq/")
    shell:
        "python create_corpus.py -t themes.json -d data/preprocessed/ -m iramuteq"

rule corpus_cortex:
    input:
        "data/preprocessed/",
    output:
        directory("data/corpus_cortex/")
    shell:
        "python create_corpus.py -t themes.json -d data/preprocessed/ -m cortext"
        "data/txts/",
        directory("data/preprocessed/")
    shell:
        "python preprocess.py"

rule parse:
        "data/docs/",
        directory("data/txts/")
        "python parse_docs.py"

rule download:
    output:
        directory("data/docs/")
    shell:
        "python dl_docs.py"

rule clean:
    shell:
        "rm  -rf docs txts preprocessed"