Add indexing, fix minor typos

DARIAH-DE · Feb 12, 2014 · d942e4c · d942e4c
1 parent be98083
commit d942e4c
Show file tree

Hide file tree

Showing 16 changed files with 29 additions and 11 deletions.
diff --git a/source/case_study_racine.rst b/source/case_study_racine.rst
@@ -1,3 +1,4 @@
+.. index:: Racine, case-study, theatre
 .. _case-study-racine:
 
 ===============================================

diff --git a/source/classification_logistic_regression.rst b/source/classification_logistic_regression.rst
@@ -1,3 +1,4 @@
+.. index:: classification, machine learning, logistic regression
 .. _classification-machine-learning:
 
 ===========================================================

diff --git a/source/conf.py b/source/conf.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 #
-# Text Analysis with Topic Models for the Geisteswissenschaften documentation build configuration file, created by
-# sphinx-quickstart on Wed Jun  5 20:52:53 2013.
+# Text Analysis with Topic Models configuration file
 #
 # This file is execfile()d with the current directory set to its
 # containing dir.
@@ -58,7 +57,7 @@
 master_doc = 'index'
 
 # General information about the project.
-project = 'Text Analysis with Topic Models for the Geisteswissenschaften'
+project = 'Text Analysis with Topic Models for the Humanities and Social Sciences'
 copyright = '2013, Allen B. Riddell'
 
 # The version info for the project you're documenting, acts as replacement for

diff --git a/source/datasets.rst b/source/datasets.rst
@@ -1,3 +1,7 @@
+.. index:: datasets, French plays, British novels, stop words
+   single: Austen, Jane
+   single: Brontë, Charlotte
+   single: Hugo, Victor
 .. _datasets:
 
 ==========

diff --git a/source/feature_selection.rst b/source/feature_selection.rst
@@ -1,3 +1,7 @@
+.. index:: feature selection, distinctive words, Bayesian t-test, keyness, chi-squared test, Dunning log-likelihood, G-test
+   single: Austen, Jane
+   single: Brönte, Charlotte
+
 .. _feature-selection:
 
 ==============================================

diff --git a/source/getting_started.rst b/source/getting_started.rst
@@ -1,3 +1,4 @@
+.. index:: Python, NumPy, matplotlib
 .. _getting-started:
 
 =================

diff --git a/source/index.rst b/source/index.rst
@@ -1,4 +1,4 @@
-.. Text Analysis with Topic Models for the Geisteswissenschaften documentation master file
+.. Text Analysis with Topic Models for the Humanities and Social Sciences
 
 TAToM: Text Analysis with Topic Models for the Humanities and Social Sciences
 =============================================================================
@@ -8,9 +8,6 @@ consists of a series of tutorials covering basic procedures in quantitative text
 analysis. The tutorials cover how to prepare a text corpus for analysis and how
 to explore a collection of texts using topic models and machine learning.
 
-These tutorials are addressed to an audience in the humanities and social
-sciences.
-
 .. toctree::
    :maxdepth: 2
 

diff --git a/source/predicting_frequencies.rst b/source/predicting_frequencies.rst
@@ -1,5 +1,7 @@
 .. _predicting:
 
+.. NOTE: THIS IS CURRENTLY UNUSED
+
 =============================
  Predicting Word Frequencies
 =============================

diff --git a/source/preliminaries.rst b/source/preliminaries.rst
@@ -1,3 +1,4 @@
+.. index:: Python, installing Python, Python modules
 .. _preliminaries:
 
 ================

diff --git a/source/preprocessing.rst b/source/preprocessing.rst
@@ -1,3 +1,4 @@
+.. index:: preprocessing, document-term matrix, tokenization, hyphenation, stemming
 .. _preprocessing:
 
 ===============

diff --git a/source/references.rst b/source/references.rst
@@ -1,4 +1,3 @@
-
 .. _references:
 
 ============

diff --git a/source/topic_model_mallet.rst b/source/topic_model_mallet.rst
@@ -1,3 +1,4 @@
+.. index:: topic model, MALLET
 .. _topic-model-mallet:
 
 ============================

diff --git a/source/topic_model_python.rst b/source/topic_model_python.rst
@@ -1,3 +1,4 @@
+.. index:: topic model, non-negative matrix factorization, NMF
 .. _topic-model-python:
 
 ==========================

diff --git a/source/topic_model_visualization.rst b/source/topic_model_visualization.rst
@@ -1,3 +1,4 @@
+.. index:: visualization, topic model
 .. _topic-model-visualization:
 
 ==========================

diff --git a/source/visualizing_trends.rst b/source/visualizing_trends.rst
@@ -1,3 +1,7 @@
+.. index:: Les Misérables
+   pair: visualization; trends
+   single: Hugo, Victor
+
 .. _visualizing-trends:
 
 ====================
@@ -20,7 +24,7 @@ standard.[#fn_les_mis]_ The novel comes in five volumes ("Fantine", "Cosette",
 "Marius", "The Idyll in the Rue Plumet and the Epic in the Rue St. Denis", and
 "Jean Valjean"). And within each volume we have a sequence of chapters. (And
 within each chapter we have a sequence of paragraphs, ...). In this section we
-will address how to visualize topic shares in sequence.  
+will address how to visualize topic shares in sequence.
 
 To whet your appetite, consider the rise and fall of a topic associated with
 revolutionary activity in *Les Misérables*:
@@ -224,7 +228,7 @@ novel where the topic appears:
     plt.plot(series, '.', alpha=0.3)
     plt.plot(series_smooth, '-', linewidth=2)
     plt.vlines(volume_indexes, ymin=0, ymax=np.max(series))
-    text_xs = np.array(volume_indexes) + np.diff(np.array(volume_indexes + [max(xs)]))/2 
+    text_xs = np.array(volume_indexes) + np.diff(np.array(volume_indexes + [max(xs)]))/2
     text_ys = np.repeat(max(series), len(volume_names)) - 0.05
     for x, y, s in zip(text_xs, text_ys, volume_names):
         plt.text(x, y, s, horizontalalignment='center')
@@ -233,7 +237,7 @@ novel where the topic appears:
     plt.ylabel("Topic share")
     plt.xlabel("Novel segment")
     plt.ylim(0, max(series))
-    
+
     @savefig plot_topics_over_time_series_les_misérables.png width=7in
     plt.tight_layout()
 

diff --git a/source/working_with_text.rst b/source/working_with_text.rst
@@ -1,3 +1,4 @@
+.. index:: document-term matrix, tokenizing, CountVectorizer, n-gram, word frequency
 .. _working-with-text:
 
 ===================