/ notebook.ipynb
notebook.ipynb
  1  {
  2   "cells": [
  3    {
  4     "cell_type": "markdown",
  5     "metadata": {},
  6     "source": [
  7      "# Common functions"
  8     ]
  9    },
 10    {
 11     "cell_type": "code",
 12     "execution_count": 16,
 13     "metadata": {},
 14     "outputs": [],
 15     "source": [
 16      "import json\n",
 17      "from tasklib import TaskWarrior\n",
 18      "import datetime\n",
 19      "import pandas as pd\n",
 20      "import matplotlib.pyplot as plt\n",
 21      "import re\n",
 22      "from sklearn.feature_extraction.text import CountVectorizer\n",
 23      "from sklearn.feature_extraction.text import TfidfVectorizer"
 24     ]
 25    },
 26    {
 27     "cell_type": "code",
 28     "execution_count": 2,
 29     "metadata": {},
 30     "outputs": [],
 31     "source": [
 32      "def only2019(df):\n",
 33      "    return df.filter(like='2019', axis=0)"
 34     ]
 35    },
 36    {
 37     "cell_type": "markdown",
 38     "metadata": {},
 39     "source": [
 40      "#  Journal"
 41     ]
 42    },
 43    {
 44     "cell_type": "markdown",
 45     "metadata": {},
 46     "source": [
 47      "### import journal into habits data\n",
 48      "\n",
 49      "### TF-IDF for jrnl"
 50     ]
 51    },
 52    {
 53     "cell_type": "markdown",
 54     "metadata": {},
 55     "source": [
 56      "https://kavita-ganesan.com/extracting-keywords-from-text-tfidf/"
 57     ]
 58    },
 59    {
 60     "cell_type": "code",
 61     "execution_count": 19,
 62     "metadata": {},
 63     "outputs": [],
 64     "source": [
 65      "def pre_process(text):\n",
 66      "    # lowercase\n",
 67      "    text=text.lower()\n",
 68      "    #remove tags\n",
 69      "    #text=re.sub(\"<!--?.*?-->\",\"\",text)\n",
 70      "    # remove special characters and digits\n",
 71      "    text=re.sub(\"(\\\\d|\\\\W)+\",\" \",text)\n",
 72      "    return text\n",
 73      "\n",
 74      "def sort_coo(coo_matrix):\n",
 75      "    tuples = zip(coo_matrix.col, coo_matrix.data)\n",
 76      "    return sorted(tuples, key=lambda x: (x[1], x[0]), reverse=True)\n",
 77      " \n",
 78      "def extract_topn_from_vector(feature_names, sorted_items, topn=10):\n",
 79      "    \"\"\"get the feature names and tf-idf score of top n items\"\"\"\n",
 80      "    \n",
 81      "    #use only topn items from vector\n",
 82      "    sorted_items = sorted_items[:topn]\n",
 83      " \n",
 84      "    score_vals = []\n",
 85      "    feature_vals = []\n",
 86      "    \n",
 87      "    # word index and corresponding tf-idf score\n",
 88      "    for idx, score in sorted_items:\n",
 89      "        \n",
 90      "        #keep track of feature name and its corresponding score\n",
 91      "        score_vals.append(round(score, 3))\n",
 92      "        feature_vals.append(feature_names[idx])\n",
 93      " \n",
 94      "    #create a tuples of feature,score\n",
 95      "    #results = zip(feature_vals,score_vals)\n",
 96      "    results= {}\n",
 97      "    for idx in range(len(feature_vals)):\n",
 98      "        results[feature_vals[idx]]=score_vals[idx]\n",
 99      "    \n",
100      "    return results\n",
101      " "
102     ]
103    },
104    {
105     "cell_type": "code",
106     "execution_count": 23,
107     "metadata": {},
108     "outputs": [
109      {
110       "name": "stdout",
111       "output_type": "stream",
112       "text": [
113        "\n",
114        "===Keywords===\n",
115        "rain 0.699\n",
116        "years 0.658\n",
117        "spokes 0.584\n",
118        "new 0.567\n",
119        "dont 0.545\n",
120        "quite 0.507\n",
121        "feeling 0.507\n",
122        "freedom 0.502\n",
123        "despair 0.502\n",
124        "awaits 0.502\n",
125        "voices 0.501\n",
126        "hearing 0.501\n",
127        "hard 0.495\n",
128        "better 0.494\n",
129        "news 0.488\n",
130        "uber 0.469\n",
131        "pay 0.468\n",
132        "graveyard 0.443\n",
133        "cult 0.443\n",
134        "repeatedly 0.437\n",
135        "tech 0.433\n",
136        "laurel 0.431\n",
137        "wedding 0.426\n",
138        "wont 0.422\n",
139        "bit 0.418\n",
140        "tour 0.404\n",
141        "able 0.394\n",
142        "strategy 0.392\n",
143        "like 0.392\n",
144        "clouds 0.383\n",
145        "travelling 0.383\n",
146        "watching 0.383\n",
147        "sources 0.377\n",
148        "flows 0.377\n",
149        "doodling 0.374\n",
150        "successful 0.373\n",
151        "imagine 0.373\n",
152        "ye 0.373\n",
153        "tell 0.371\n",
154        "im 0.37\n"
155       ]
156      }
157     ],
158     "source": [
159      "with open('data/tiddlers.json' , 'r') as file:\n",
160      "    tiddly = json.load(file)\n",
161      "    \n",
162      "jrnl = pd.DataFrame.from_dict(tiddly)\n",
163      "jrnl.set_axis(jrnl['title'], axis='index', inplace=True)\n",
164      "jrnl = only2019(jrnl.drop(['created', 'modified'], axis=1))\n",
165      "jrnl.drop(['title', 'tags'], axis=1, inplace=True)\n",
166      "jrnl['text'] = jrnl['text'].apply(lambda x: pre_process(x))\n",
167      " \n",
168      "#get the text column \n",
169      "docs=jrnl['text'].tolist()\n",
170      "\n",
171      "tfidf = TfidfVectorizer(stop_words='english')\n",
172      "X = tfidf.fit_transform(docs)\n",
173      "\n",
174      "#sort the tf-idf vectors by descending order of scores\n",
175      "sorted_items=sort_coo(X.tocoo())\n",
176      " \n",
177      "#extract only the top n; n here is 10\n",
178      "keywords=extract_topn_from_vector(tfidf.get_feature_names(),sorted_items,40)\n",
179      " \n",
180      "# now print the results\n",
181      "#print(\"\\n=====Doc=====\")\n",
182      "#print(doc)\n",
183      "print(\"\\n===Keywords===\")\n",
184      "for k in keywords:\n",
185      "    print(k,keywords[k])"
186     ]
187    },
188    {
189     "cell_type": "markdown",
190     "metadata": {},
191     "source": [
192      "According to habits I have 49 Journal entries\n",
193      "\n",
194      "\n",
195      "Let's compare that with this data"
196     ]
197    },
198    {
199     "cell_type": "code",
200     "execution_count": 59,
201     "metadata": {},
202     "outputs": [],
203     "source": [
204      "import json"
205     ]
206    },
207    {
208     "cell_type": "code",
209     "execution_count": 106,
210     "metadata": {},
211     "outputs": [
212      {
213       "data": {
214        "text/plain": [
215         "tags     87\n",
216         "text     88\n",
217         "title    88\n",
218         "dtype: int64"
219        ]
220       },
221       "execution_count": 106,
222       "metadata": {},
223       "output_type": "execute_result"
224      }
225     ],
226     "source": [
227      "with open('data/tiddlers.json' , 'r') as file:\n",
228      "    tiddly = json.load(file)\n",
229      "\n",
230      "jrnl = pd.DataFrame.from_dict(tiddly)\n",
231      "jrnl.set_axis(jrnl['title'], axis='index', inplace=True)\n",
232      "jrnl = only2019(jrnl.drop(['created', 'modified'], axis=1))\n",
233      "jrnl.count()"
234     ]
235    },
236    {
237     "cell_type": "code",
238     "execution_count": 107,
239     "metadata": {},
240     "outputs": [
241      {
242       "data": {
243        "text/plain": [
244         "tags     30\n",
245         "text     30\n",
246         "title    30\n",
247         "dtype: int64"
248        ]
249       },
250       "execution_count": 107,
251       "metadata": {},
252       "output_type": "execute_result"
253      }
254     ],
255     "source": [
256      "sameDay = jrnl\n",
257      "sameDay['title'] = sameDay.title.map(lambda x: x[:-8])\n",
258      "sameDay[sameDay['title'].duplicated(keep=False)].count()"
259     ]
260    },
261    {
262     "cell_type": "markdown",
263     "metadata": {},
264     "source": [
265      "Where are the [87-15=(73)] - 49 = 23 entries that exist but aren't in my loop Habits?\n",
266      "\n",
267      "23 Entries before March?"
268     ]
269    },
270    {
271     "cell_type": "code",
272     "execution_count": 119,
273     "metadata": {},
274     "outputs": [],
275     "source": [
276      "def beforeMarch(df):\n",
277      "    return df.filter(regex='(January|Febuary|March)', axis=0)"
278     ]
279    },
280    {
281     "cell_type": "code",
282     "execution_count": 121,
283     "metadata": {},
284     "outputs": [
285      {
286       "data": {
287        "text/plain": [
288         "tags     23\n",
289         "text     23\n",
290         "title    23\n",
291         "dtype: int64"
292        ]
293       },
294       "execution_count": 121,
295       "metadata": {},
296       "output_type": "execute_result"
297      }
298     ],
299     "source": [
300      "beforeMarch(jrnl).count()"
301     ]
302    },
303    {
304     "cell_type": "markdown",
305     "metadata": {},
306     "source": [
307      "# Taskwarrior"
308     ]
309    },
310    {
311     "cell_type": "code",
312     "execution_count": 2,
313     "metadata": {},
314     "outputs": [],
315     "source": [
316      "import json\n",
317      "from tasklib import TaskWarrior\n",
318      "import datetime\n",
319      "import pandas as pd\n",
320      "import matplotlib.pyplot as plt"
321     ]
322    },
323    {
324     "cell_type": "raw",
325     "metadata": {},
326     "source": [
327      "Year Month     Added Completed Deleted Net\n",
328      "---- --------- ----- --------- ------- ---\n",
329      "2019 January      18        19       0  -1\n",
330      "     February      9         7       0   2\n",
331      "     March        27        23       0   4\n",
332      "     April        13        14       3  -4\n",
333      "     May          10         9       0   1\n",
334      "     June         17        13       0   4\n",
335      "     July         17        17       0   0\n",
336      "     August       35        37       0  -2\n",
337      "     September    22        24       1  -3\n",
338      "     October      19        15       2   2\n",
339      "     November     17        16       0   1\n",
340      "     December     37        42       1  -6\n",
341      "     \n",
342      "     Total        241       236      7   5 "
343     ]
344    },
345    {
346     "cell_type": "code",
347     "execution_count": 36,
348     "metadata": {},
349     "outputs": [
350      {
351       "data": {
352        "text/plain": [
353         "description    241\n",
354         "due            107\n",
355         "end            241\n",
356         "id             241\n",
357         "imask            0\n",
358         "mask             0\n",
359         "modified       241\n",
360         "priority         3\n",
361         "project        188\n",
362         "recur            0\n",
363         "status         241\n",
364         "tags           215\n",
365         "until            0\n",
366         "urgency        241\n",
367         "dtype: int64"
368        ]
369       },
370       "execution_count": 36,
371       "metadata": {},
372       "output_type": "execute_result"
373      }
374     ],
375     "source": [
376      "with open('data/task.json', 'r') as myfile:\n",
377      "    task = json.load(myfile)\n",
378      "\n",
379      "# Convert the data into a data frame\n",
380      "# Some preliminary analysis\n",
381      "tasks = pd.DataFrame.from_dict(task)\n",
382      "tasks.set_axis(tasks['entry'], axis='index', inplace=True)\n",
383      "tasks = tasks.drop(['annotations', 'depends', 'parent', 'uuid', 'entry'], axis=1)\n",
384      "only2019(tasks).count()"
385     ]
386    },
387    {
388     "cell_type": "code",
389     "execution_count": 35,
390     "metadata": {},
391     "outputs": [
392      {
393       "name": "stdout",
394       "output_type": "stream",
395       "text": [
396        "16 unique tags\n",
397        "215 tagged items\n",
398        "The tags are: {'rocks', 'friends', 'd.tech', 'chores', 'job', 'prpj', 'fam', 'artifex', 'contact', 'd.infra', 'fun', 'piracy', 'work', 'uni', 'life', 'travel'}\n"
399       ]
400      },
401      {
402       "data": {
403        "image/png": "iVBORw0KGgoAAAANSUhEUgAAAzkAAAHVCAYAAADSCzXJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X28bHVdL/DPV46KaIjI0RTQQ16uZd2r6bnmY1FYqZToTVMzBePGNS3S8mV4rfSVeS9qN8tKDZ8gNdPwiTRNRQg1QQ/IMz7wQlKC4JgPpV5N7Hf/WGvDsJl99j57Zs7e/Hy/X6/9mjVr1pr1/c2sNWs+67fW7GqtBQAAoBe32OgCAAAA5knIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0ZctGF5AkBxxwQNu2bdtGlwEAAGxS55xzzhdba1vXMu2mCDnbtm3Ljh07NroMAABgk6qqf1zrtE5XAwAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK5s2egCNqNtx79n3fNeccIRc6wEAADYXXpyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANCVVUNOVb2uqq6tqoumPPbsqmpVdcB4v6rq5VV1WVVdUFX3XUTRAAAAK1lLT85JSR6+fGRVHZzkJ5N8fmL0I5IcOv4dm+SVs5cIAACwdquGnNbamUm+NOWhlyV5TpI2Me7IJH/RBmcl2a+q7jKXSgEAANZgXdfkVNWjkvxTa+38ZQ8dmOQLE/evHMcBAADsEVt2d4aq2ifJ85L81LSHp4xrU8alqo7NcEpb7na3u+1uGQAAAFOtpyfnHkkOSXJ+VV2R5KAk51bV92bouTl4YtqDklw17Ulaaye21ra31rZv3bp1HWUAAADc1G6HnNbaha21O7XWtrXWtmUINvdtrf1zklOTPGX8lbUHJPlqa+3q+ZYMAACwsrX8hPSbk3wsyT2r6sqqOmYXk/9tksuTXJbk1UmePpcqAQAA1mjVa3Jaa09c5fFtE8MtyTNmLwsAAGB91vXragAAAJuVkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANCVVUNOVb2uqq6tqosmxr20qj5VVRdU1Tuqar+Jx55bVZdV1aer6qcXVTgAAMA0a+nJOSnJw5eN+0CSH2qt/dckn0ny3CSpqnsleUKSHxzneUVV7TW3agEAAFaxashprZ2Z5EvLxr2/tXbdePesJAeNw0cm+avW2rdaa59LclmS+8+xXgAAgF2axzU5v5TkvePwgUm+MPHYleO4m6iqY6tqR1Xt2Llz5xzKAAAAmDHkVNXzklyX5E1Lo6ZM1qbN21o7sbW2vbW2fevWrbOUAQAAcL0t652xqo5K8jNJDm+tLQWZK5McPDHZQUmuWn95AAAAu2ddPTlV9fAkv5XkUa21b0w8dGqSJ1TVravqkCSHJvn47GUCAACszao9OVX15iSHJTmgqq5M8vwMv6Z26yQfqKokOau19rTW2sVV9dYkl2Q4je0ZrbXvLKp4AACA5VYNOa21J04Z/dpdTP+iJC+apSgAAID1msevqwEAAGwaQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXVg05VfW6qrq2qi6aGLd/VX2gqj473t5hHF9V9fKquqyqLqiq+y6yeAAAgOXW0pNzUpKHLxt3fJLTWmuHJjltvJ8kj0hy6Ph3bJJXzqdMAACAtVk15LTWzkzypWWjj0xy8jh8cpJHT4z/izY4K8l+VXWXeRULAACwmvVek3Pn1trVSTLe3mkcf2CSL0xMd+U47iaq6tiq2lFVO3bu3LnOMgAAAG5s3j88UFPGtWkTttZObK1tb61t37p165zLAAAAvlutN+Rcs3Qa2nh77Tj+yiQHT0x3UJKr1l8eAADA7llvyDk1yVHj8FFJ3jUx/injr6w9IMlXl05rAwAA2BO2rDZBVb05yWFJDqiqK5M8P8kJSd5aVcck+XySx42T/22SRya5LMk3kjx1ATUDAACsaNWQ01p74goPHT5l2pbkGbMWBQAAsF7z/uEBAACADSXkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdGWmkFNVz6qqi6vqoqp6c1XtXVWHVNXZVfXZqnpLVd1qXsUCAACsZt0hp6oOTHJcku2ttR9KsleSJyR5cZKXtdYOTfLlJMfMo1AAAIC1mPV0tS1JblNVW5Lsk+TqJD+R5JTx8ZOTPHrGZQAAAKzZukNOa+2fkvxBks9nCDdfTXJOkq+01q4bJ7syyYHT5q+qY6tqR1Xt2Llz53rLAAAAuJFZTle7Q5IjkxyS5K5JbpvkEVMmbdPmb62d2Frb3lrbvnXr1vWWAQAAcCOznK72sCSfa63tbK19O8nbkzwoyX7j6WtJclCSq2asEQAAYM1mCTmfT/KAqtqnqirJ4UkuSXJ6kseO0xyV5F2zlQgAALB2s1yTc3aGHxg4N8mF43OdmOS3kvxGVV2W5I5JXjuHOgEAANZky+qTrKy19vwkz182+vIk95/leQEAANZr1p+QBgAA2FSEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF2ZKeRU1X5VdUpVfaqqLq2qB1bV/lX1gar67Hh7h3kVCwAAsJpZe3L+OMn7Wmvfn+TeSS5NcnyS01prhyY5bbwPAACwR6w75FTVvkl+NMlrk6S19u+tta8kOTLJyeNkJyd59KxFAgAArNUsPTnfl2RnktdX1Ser6jVVddskd26tXZ0k4+2d5lAnAADAmswScrYkuW+SV7bWfjjJ17Mbp6ZV1bFVtaOqduzcuXOGMgAAAG4wS8i5MsmVrbWzx/unZAg911TVXZJkvL122syttRNba9tba9u3bt06QxkAAAA3WHfIaa39c5IvVNU9x1GHJ7kkyalJjhrHHZXkXTNVCAAAsBu2zDj/ryV5U1XdKsnlSZ6aITi9taqOSfL5JI+bcRkAAABrNlPIaa2dl2T7lIcOn+V5AQAA1mvW/5MDAACwqQg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXZk55FTVXlX1yap693j/kKo6u6o+W1VvqapbzV4mAADA2syjJ+fXk1w6cf/FSV7WWjs0yZeTHDOHZQAAAKzJTCGnqg5KckSS14z3K8lPJDllnOTkJI+eZRkAAAC7Y9aenD9K8pwk/zHev2OSr7TWrhvvX5nkwGkzVtWxVbWjqnbs3LlzxjIAAAAG6w45VfUzSa5trZ0zOXrKpG3a/K21E1tr21tr27du3breMgAAAG5kywzzPjjJo6rqkUn2TrJvhp6d/apqy9ibc1CSq2YvEwAAYG3W3ZPTWntua+2g1tq2JE9I8qHW2pOSnJ7kseNkRyV518xVAgAArNEi/k/ObyX5jaq6LMM1Oq9dwDIAAACmmuV0teu11s5IcsY4fHmS+8/jeQEAAHbXInpyAAAANoyQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0ZctGFwDseduOf8+6573ihCPmWAkAwPzpyQEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtCDgAA0BUhBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK+sOOVV1cFWdXlWXVtXFVfXr4/j9q+oDVfXZ8fYO8ysXAABg12bpybkuyW+21n4gyQOSPKOq7pXk+CSntdYOTXLaeB8AAGCPWHfIaa1d3Vo7dxz+tySXJjkwyZFJTh4nOznJo2ctEgAAYK3mck1OVW1L8sNJzk5y59ba1ckQhJLcaYV5jq2qHVW1Y+fOnfMoAwAAYPaQU1W3S/K2JM9srf3rWudrrZ3YWtveWtu+devWWcsAAABIMmPIqapbZgg4b2qtvX0cfU1V3WV8/C5Jrp2tRAAAgLWb5dfVKslrk1zaWvvDiYdOTXLUOHxUknetvzwAAIDds2WGeR+c5MlJLqyq88Zx/yvJCUneWlXHJPl8ksfNViIAAMDarTvktNY+kqRWePjw9T4vAADALGbpyWGT2Xb8e9Y97xUnHDHHSgAAYOPM5SekAQAANgshBwAA6IqQAwAAdEXIAQAAuiLkAAAAXRFyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF3ZstEFALB5bTv+Peue94oTjphjJQCwdnpyAACArgg5AABAV4QcAACgK0IOAADQFSEHAADoipADAAB0RcgBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKArQg4AANAVIQcAAOiKkAMAAHRFyAEAALoi5AAAAF0RcgAAgK4IOQAAQFeEHAAAoCtbNroA2Hb8e2aa/4oTjphTJQAA9EBPDgAA0BUhBwAA6IrT1YCbDac2AgBroScHAADoipADAAB0ZWGnq1XVw5P8cZK9krymtXbCopYFG2WW06ecOvXdxbqyZ92cT228ua4r3411Jzff2tW9+26u68rNte5ZLaQnp6r2SvJnSR6R5F5JnlhV91rEsgAAACYt6nS1+ye5rLV2eWvt35P8VZIjF7QsAACA61Vrbf5PWvXYJA9vrf2P8f6Tk/xIa+1XJ6Y5Nsmx4917Jvn03AtZjAOSfHGji5iDHtrRQxuSPtqhDZtHD+3ooQ1JH+3Qhs2jh3b00Iakj3bcXNtw99ba1rVMuKhrcmrKuBulqdbaiUlOXNDyF6aqdrTWtm90HbPqoR09tCHpox3asHn00I4e2pD00Q5t2Dx6aEcPbUj6aEcPbVjNok5XuzLJwRP3D0py1YKWBQAAcL1FhZxPJDm0qg6pqlsleUKSUxe0LAAAgOst5HS11tp1VfWrSf4uw09Iv661dvEilrUBbnan2K2gh3b00Iakj3Zow+bRQzt6aEPSRzu0YfPooR09tCHpox09tGGXFvLDAwAAABtlUaerAQAAbAghBwAA6IqQs4Kq+tp4e9eqOmVi/Jur6oKqetbGVbc4VXVFVR2wgcs/afw/S991quoFVfXsja5jXqpqe1W9fAOWu19VPX0PLGdDt5VpqmpbVf3CDPMfXVV3nWdNE899k/W7qg6rqgfN8Jxfm72y2VTVcVV1aVW9aaNr2dOq6lFVdfxG1zGpqv5hF48dVlXv3pP17MoK28SaXtOqemlVXVxVL11chbuvql5TVffa6DrmpaqeWVX7TNz/26rabxze0G1/peXPc9877hP+dB7PtREW9X9yutFauyrJY5Okqr43yYNaa3ff2KoWo6r22ugaZlFVe7XWvrPRdSypqspw3dt/bHQtG6G1tiPJjg1Y9H5Jnp7kFZMjN9v6sSDbkvxCkr9c5/xHJ7koe+4n/w9L8rUkK34xvRl4epJHtNY+t9GFLMKutpvW2qnZZL+c2lpbd2jeDHbjNf2fSba21r41ObKqtrTWrltIcWuw9E/gl7s5fv6O34memeSNSb6RJK21R05MstHb/k2WP77/G7Xv3XT05KxiPDJ60Xj3/UnuVFXnVdVDq+oeVfW+qjqnqj5cVd+/gXU+p6qOG4dfVlUfGocPr6o3VtUTq+rCqrqoql48Md/Xqur3qursJA+cGH+bsW2/vOC6nzL2jJ1fVW8YR/9oVf1DVV2+1KtTg5eO9V9YVY8fxx9WVadX1V8muXAc94tV9fHxffrzqtpr/DtpYv6F9MSN68ulVfWKJOcmefIKr/vDq+rcsd2nTXmeX66q947vw3FVdcn4Ov3VIupei2XbQqrq2eORyDOq6sXja/6Zqnro+PhGHTU9Ick9xvf/E1PWj3eO2+zFVXXsOO5XquolE207uqr+ZBy+yfq0qMKXbw9VdfeqOm0cd1pV3W2c7qSqevny7WRs+0PHWp81vmcfHte1c2ui12T8zLhwXNYJ43NsT/Kmcf7bzKE9z6uqT1fVB5Pcc9lj25I8LcmzJj5Tt1bV28b37RNV9eBx2ttV1evHei+oqp+beJ4XjW04q6ruPGvNu6OqXpXk+5KcWlVfrYmj8uM2v23iM+HV4zr3/nm8tvMw1vapqjp5fF1Pqap9auil/N2q+kiSx43b+B+N69tFVXX/cf5Nd5S3hn1a1ZT9xWjfqnrH+Jn6qqrao9+DdrVNjI9f/5qutJ1X1alJbpvk7Kp6/DjdH1bV6UleXFX3H+f55Hh7k+XMoR0rrTtnVNX2cZobfb8Y16lPjO/LiVVV43T/qao+OG7H59bw3eoNVXXkxPLeVFWPmnMbpu0LJmt+XpK7Jjl9fG2v78Fftu0/q6puW1WvG9v3yaXaq+o3qup14/B/Gdu+z9SCdq/25Z89J1bV+5P8RU3se3dR19FV9fYavud9tm68/3tqDfvyv0/y4InxjxvrP7+qzpy1DXtEa83flL8kXxtvtyW5aPnweP+0JIeOwz+S5EMbWO8Dkvz1OPzhJB9Pcsskzx//Pp9ka4beuw8lefQ4bUvy8xPPc8XYzg8mecqCa/7BJJ9OcsB4f/8kJyX56wwB/F5JLhsf+7kkH8jwk+R3HttzlwxHgr+e5JBxuh9I8jdJbjnef0WSpyS5X5IPTCx7vwW1aVuS/xjfj7tOe93H+1+YqHn/8fYFSZ6d5FczHMm79Tj+qonhhdS9G22bXP+fPdZ8RpL/O457ZJIPjsOHJXn3Rta5fP1Y9nrfJkOvxR3H9+SyiWnem+QhK61PE9vKAQveHv4myVHj/V9K8s5xeKXt5EaveZJ9kuw9Dh+aZMc4/IgMvSf7LHtNzkiyfU7tuV+GYLlPkn2TXJbk2cumecHkuAw9UA8Zh++W5NJx+MVJ/mhiujuMty3Jz47DL0ny2xuwvl2R5IApbbloXBe3JbkuyX3G8W9N8ot7us4Vat82voYPHu+/LsN2fUWS50xMd0aSV4/DPzqxfR2d5E83uh3L2vS17Hp/8c0MXw73Gqd57B6sbS3bxPWv6Urb+VI7J4ZPSvLuJHuN9/dNsmUcfliSt+3Bdef6z5Dc9PvF/hPDb5jYds9O8phxeO/x9fmx3PB5d/skn1tq0xzbMG1fMO070QHT7i8b/t9L23WGswk+kyGI3iLJmUkek6F35cFzrP+K3PDZc06S24zjD8u4H9hFXUcnuXx8bfdO8o9JDh63k6XvLbdK8tGJ9fHCJAcuPdee2m5m+XO62jpV1e2SPCjJX48HI5Lk1htXUc5Jcr+q+p4k38rQi7A9yUMzfFE6o7W2MxmOiGTYUb0zyXeSvG3Zc70ryUtaa4s+z/QnkpzSWvtikrTWvjS+lu9swylel9QNR2YfkuTNbejuvmY8wvDfkvxrko+3G7prD8+wI/nE+Fy3SXJthtfg+2o4Ov+eDL1yi/KPrbWzxiMm01737yQ5c6nm1tqXJuZ9cpIrM4TQb4/jLshwdP2dGd6zzejt4+05GXZ+m8nk+pEkx1XVY8bhgzMcqDhrPFL6gCSfzXCE9aNJnpHp69MiTNseHpjkv4+PvyHDF/kl07aT5W6Z5E+r6j4Z1rv/PI5/WJLXt9aWTsH40grzz+KhSd6xtIzx6PNqHpbkXhOfqfuOn2kPy/BPpZMkrbUvj4P/nuHLXTKsez85h7oX4XOttfPG4c22jXyhtfbRcfiNSY4bh9+ybLo3J0lr7cyq2rfG6xI2qdX2F5cnwzW247SnrPhM87WebWIt23kyHORcOh3s9klOrqpDM3xpv+UsRe/CSuvOkuXfL368qp6TIcTsn+TiqjojwxfndyRJa+2b47R/X1V/VlV3yvAZ+LY2/9PwbrIvmFLzWv1UkkfVDb25eye5W2vt0qo6OsN+/M8nXq95O7W19v/WWtc4fFpr7atJUlWXJLl7htA0+b3lLblhv/HRJCdV1Vtzwz5/UxNy1u8WSb7SWrvPRheSJK21b1fVFUmemuEI7QVJfjzJPTKk8vutMOs3203Pk/1okkdU1V+2MbIvSGX4AF7uW8ummbyd5uvLpj+5tfbcmyys6t5JfjrDF9efz3BkfBGW6lmp5pXanQxHk+6T5KAMR66S5IgM4ehRSX6nqn5wAR/2a3FdbnyK694Tw0vv2Xey+T5Xrl8/quqwDF+YH9ha+8a4g11qx1syrBefyvBFpI2nU0xdnxZgV+vFksnHp20nyz0ryTVJ7p3hvfvmxPR74p+k7e4ybpHhvbnRznp8H6Y917cnPqM2et1by/aRDHVuitPVRstf16X7X1/jdJvRrvYXG92O3V3eWrbz5Mbv1wuTnN5ae0wNp4WesZvLXKvVXsvrv19U1d4ZesK3t9a+UFUvyLCN7KpNb0jypAwHOOa6v97FvmDad6I1PWWSn2utfXrKY4dm6GFcyI+6jJZvr0um1lVVP5Kbfi4tfX5OXUdba08b5zsiyXlVdZ/W2r/MVvZiuSZnnVpr/5rkc1X1uOT6a0buvcFlnZmhu/jMDKesPS3JeUnOSvJj43mkeyV5YpK/38Xz/G6Sf8myC7cX4LQkP19Vd0ySqtp/F9OemeTxNVxbszXDl/6Pr/Ccjx2P/qSq9q/huoYDktyitfa2JL+T5L7zbMgKzs701/1j4/hDlmqcmOeTGS4oPbWGX/a7RZKDW2unJ3lOhu7m2+2B2qe5JsM1aXesqlsn+ZkNqmM1/5bke1Z47PZJvjzu1L4/w2mFS96e4XTCJ+aGo9hT16fFlD11e/iH3NCD8aQkH1nlOZa3/fZJrh6PBD85wyk6ydCT+UtL54ZPrIO7eu1215lJHlPDdWXfk+Rn11Dv+zOcrpmxrvusMP4Oc6pxnq7I+LlSVfdNcsiGVrN2dxt7DJNh3V9pHVu6DvIhSb66dAR4k9rV/uL+VXXI+Nn6+Ky+Tc27rtW2iXm4fZJ/GoePXtAykrWvO8kNof+L45kwj02u/y51ZVU9Okmq6tZ1wzUrJ2W48D+ttYvnXPuu9gWT1vqZ+HdJfm08IJOq+uHx9vZJ/jjDOnjH2vO/Hju1rl04O8lh437+lkket/RAVd2jtXZ2a+13k3wxQ+/XpibkzOZJSY6pqvOTXJzkyFWmX7QPZzif8mOttWsyHLVI+kNAAAACcUlEQVT9cGvt6iTPTXJ6kvOTnNtae9cqz/XMJHvXxMVo8zZ+aL0oQ7f0+Un+cBeTvyND79T5Ga5teU5r7Z+nPOclSX47yfur6oIM51zfJcmBSc6oqvMyfHAu/Mj8Sq/72A18bJK3j+1+y7L5PpIhrL4nwznCb6yqCzMEoJe11r6y6NqnGU+f+70MH4LvztDjsemMR5Y+WsOPJCz/edX3JdkyrhsvzHAAYGm+Lye5JMndW2sfH8ettD4tou5p28NxSZ46LvvJSX59lae5IMl1NVwY+qwMByqOqqqzMpxy8PVxWe/LcN3XjnGbWDqV4aQkr6o5/PBAa+3cDOv2eRlO//hwklTV06rqaeNkf5PhS995NfxgxXFJttdwIfMlGQ7UJMnvJ7lDjRe9Zuil3mzelmT/8fX8lQznvt8cXJphHbkgwylEr1xhui/X8PPMr0pyzMT4zdaj07Lr/cXHMvxAx0UZesvfsccKW9s2MQ8vSfJ/quqjueHAxiKsdd3JuN96dYbrOt6Z5BMTDz85w6ljF2Q4sPO94zzXjMt4/QJqX3FfsMyJSd5b4w8P7MILM5wWeMG473nhOP5lSV7RWvtMhu3mhKWDZnvISnVNNX5veUGG7eSDGS59WPLSGn9IKUNgP38hFc9RLfZsJABgMxpPZXp3a+2HVpnujAwXyO9YNv43k+zbWnv+omrcHWMv6Lmt03/zsJmsdd2ZcRn7ZAhF993kPYdsUnpyAIDdMvY8HJ3hgvMNV8M/sf1Ykj/Y6FqYXVU9LMPZAn8i4LBeenIAAICu6MkBAAC6IuQAAABdEXIAAICuCDkAAEBXhBwAAKAr/x/4UhPI6+SXOgAAAABJRU5ErkJggg==\n",
404        "text/plain": [
405         "<Figure size 1008x576 with 1 Axes>"
406        ]
407       },
408       "metadata": {
409        "needs_background": "light"
410       },
411       "output_type": "display_data"
412      }
413     ],
414     "source": [
415      "# How many of each tag did I do?\n",
416      "\n",
417      "l = only2019(tasks).tags.dropna().to_list()\n",
418      "flat_list = [item for sublist in l for item in sublist]\n",
419      "print(str(len(set(flat_list))) + \" unique tags\")\n",
420      "print(str(len(l)) + \" tagged items\")\n",
421      "print(\"The tags are: \" + str(set(flat_list)))\n",
422      "\n",
423      "fig = plt.figure(figsize=(14,8))\n",
424      "plt.hist(flat_list, rwidth=1/3, align='left', bins=16)\n",
425      "plt.show()"
426     ]
427    },
428    {
429     "cell_type": "code",
430     "execution_count": 57,
431     "metadata": {},
432     "outputs": [
433      {
434       "name": "stdout",
435       "output_type": "stream",
436       "text": [
437        "count              241\n",
438        "unique             216\n",
439        "top       fold clothes\n",
440        "freq                 7\n",
441        "Name: description, dtype: object\n",
442        "\n",
443        "Repeated descriptions: \n",
444        "  laundry\n",
445        "  haircut\n",
446        "  fold clothes\n",
447        "  recharge my way\n",
448        "  change sheets\n",
449        "  cut hair\n",
450        "  buy condoms\n",
451        "  schedule counsellor meeting\n",
452        "  cut nails\n",
453        "  recharge myway\n",
454        "  book counsellor meeting\n",
455        "  shave\n",
456        "  do laundry\n"
457       ]
458      }
459     ],
460     "source": [
461      "# Description mining\n",
462      "\n",
463      "l = only2019(tasks).description\n",
464      "print(l.describe())\n",
465      "print()\n",
466      "print(\"Repeated descriptions: \")\n",
467      "for e in l[l.duplicated(keep=False)].unique():\n",
468      "    print(\"  \" + e)"
469     ]
470    },
471    {
472     "cell_type": "code",
473     "execution_count": 58,
474     "metadata": {},
475     "outputs": [],
476     "source": [
477      "# Most described task?\n",
478      "## TODO"
479     ]
480    },
481    {
482     "cell_type": "markdown",
483     "metadata": {},
484     "source": [
485      "# Habits"
486     ]
487    },
488    {
489     "cell_type": "code",
490     "execution_count": 100,
491     "metadata": {},
492     "outputs": [],
493     "source": [
494      "import pandas as pd\n",
495      "import calendar\n",
496      "import matplotlib \n",
497      "import matplotlib.pyplot as plt"
498     ]
499    },
500    {
501     "cell_type": "code",
502     "execution_count": 105,
503     "metadata": {},
504     "outputs": [
505      {
506       "name": "stdout",
507       "output_type": "stream",
508       "text": [
509        "Days done: Meditate    64\n",
510        "Exercise    65\n",
511        "Read        69\n",
512        "Journal     49\n",
513        "devlog      32\n",
514        "Plants      38\n",
515        "Draw        12\n",
516        "dtype: int64\n",
517        "Non-streak days: Meditate    176\n",
518        "Exercise    179\n",
519        "Read        161\n",
520        "Journal      70\n",
521        "devlog      154\n",
522        "Plants      120\n",
523        "Draw        252\n",
524        "dtype: int64\n",
525        "Streak days: Meditate     35\n",
526        "Exercise     31\n",
527        "Read         45\n",
528        "Journal     156\n",
529        "devlog       89\n",
530        "Plants      117\n",
531        "Draw         11\n",
532        "dtype: int64\n",
533        "Longest streak: Meditate    17\n",
534        "Exercise    11\n",
535        "Read        12\n",
536        "Journal     55\n",
537        "devlog      29\n",
538        "Plants      87\n",
539        "Draw        17\n",
540        "dtype: int64\n",
541        "Longest zeros: Meditate     23\n",
542        "Exercise     26\n",
543        "Read         43\n",
544        "Journal      27\n",
545        "devlog      121\n",
546        "Plants       54\n",
547        "Draw        229\n",
548        "dtype: int64\n"
549       ]
550      },
551      {
552       "data": {
553        "text/html": [
554         "<div>\n",
555         "<style scoped>\n",
556         "    .dataframe tbody tr th:only-of-type {\n",
557         "        vertical-align: middle;\n",
558         "    }\n",
559         "\n",
560         "    .dataframe tbody tr th {\n",
561         "        vertical-align: top;\n",
562         "    }\n",
563         "\n",
564         "    .dataframe thead th {\n",
565         "        text-align: right;\n",
566         "    }\n",
567         "</style>\n",
568         "<table border=\"1\" class=\"dataframe\">\n",
569         "  <thead>\n",
570         "    <tr style=\"text-align: right;\">\n",
571         "      <th></th>\n",
572         "      <th>Meditate</th>\n",
573         "      <th>Exercise</th>\n",
574         "      <th>Read</th>\n",
575         "      <th>Journal</th>\n",
576         "      <th>devlog</th>\n",
577         "      <th>Plants</th>\n",
578         "      <th>Draw</th>\n",
579         "    </tr>\n",
580         "    <tr>\n",
581         "      <th>month</th>\n",
582         "      <th></th>\n",
583         "      <th></th>\n",
584         "      <th></th>\n",
585         "      <th></th>\n",
586         "      <th></th>\n",
587         "      <th></th>\n",
588         "      <th></th>\n",
589         "    </tr>\n",
590         "  </thead>\n",
591         "  <tbody>\n",
592         "    <tr>\n",
593         "      <th>4</th>\n",
594         "      <td>0.133333</td>\n",
595         "      <td>0.266667</td>\n",
596         "      <td>0.000000</td>\n",
597         "      <td>0.900000</td>\n",
598         "      <td>0.000000</td>\n",
599         "      <td>1.133333</td>\n",
600         "      <td>0.000000</td>\n",
601         "    </tr>\n",
602         "    <tr>\n",
603         "      <th>5</th>\n",
604         "      <td>0.387097</td>\n",
605         "      <td>0.193548</td>\n",
606         "      <td>0.193548</td>\n",
607         "      <td>0.645161</td>\n",
608         "      <td>0.000000</td>\n",
609         "      <td>0.516129</td>\n",
610         "      <td>0.000000</td>\n",
611         "    </tr>\n",
612         "    <tr>\n",
613         "      <th>6</th>\n",
614         "      <td>0.500000</td>\n",
615         "      <td>0.266667</td>\n",
616         "      <td>0.166667</td>\n",
617         "      <td>0.666667</td>\n",
618         "      <td>0.000000</td>\n",
619         "      <td>0.000000</td>\n",
620         "      <td>0.000000</td>\n",
621         "    </tr>\n",
622         "    <tr>\n",
623         "      <th>7</th>\n",
624         "      <td>0.516129</td>\n",
625         "      <td>0.838710</td>\n",
626         "      <td>0.967742</td>\n",
627         "      <td>0.774194</td>\n",
628         "      <td>0.064516</td>\n",
629         "      <td>0.612903</td>\n",
630         "      <td>0.000000</td>\n",
631         "    </tr>\n",
632         "    <tr>\n",
633         "      <th>8</th>\n",
634         "      <td>0.806452</td>\n",
635         "      <td>0.741935</td>\n",
636         "      <td>0.935484</td>\n",
637         "      <td>1.161290</td>\n",
638         "      <td>1.225806</td>\n",
639         "      <td>1.193548</td>\n",
640         "      <td>0.000000</td>\n",
641         "    </tr>\n",
642         "    <tr>\n",
643         "      <th>9</th>\n",
644         "      <td>1.133333</td>\n",
645         "      <td>0.600000</td>\n",
646         "      <td>1.233333</td>\n",
647         "      <td>1.300000</td>\n",
648         "      <td>1.066667</td>\n",
649         "      <td>1.333333</td>\n",
650         "      <td>0.000000</td>\n",
651         "    </tr>\n",
652         "    <tr>\n",
653         "      <th>10</th>\n",
654         "      <td>0.774194</td>\n",
655         "      <td>1.064516</td>\n",
656         "      <td>0.580645</td>\n",
657         "      <td>0.935484</td>\n",
658         "      <td>1.032258</td>\n",
659         "      <td>1.193548</td>\n",
660         "      <td>0.000000</td>\n",
661         "    </tr>\n",
662         "    <tr>\n",
663         "      <th>11</th>\n",
664         "      <td>0.700000</td>\n",
665         "      <td>0.733333</td>\n",
666         "      <td>0.800000</td>\n",
667         "      <td>1.233333</td>\n",
668         "      <td>1.000000</td>\n",
669         "      <td>0.333333</td>\n",
670         "      <td>0.533333</td>\n",
671         "    </tr>\n",
672         "    <tr>\n",
673         "      <th>12</th>\n",
674         "      <td>0.387097</td>\n",
675         "      <td>0.548387</td>\n",
676         "      <td>1.096774</td>\n",
677         "      <td>0.709677</td>\n",
678         "      <td>0.612903</td>\n",
679         "      <td>0.000000</td>\n",
680         "      <td>0.612903</td>\n",
681         "    </tr>\n",
682         "  </tbody>\n",
683         "</table>\n",
684         "</div>"
685        ],
686        "text/plain": [
687         "       Meditate  Exercise      Read   Journal    devlog    Plants      Draw\n",
688         "month                                                                      \n",
689         "4      0.133333  0.266667  0.000000  0.900000  0.000000  1.133333  0.000000\n",
690         "5      0.387097  0.193548  0.193548  0.645161  0.000000  0.516129  0.000000\n",
691         "6      0.500000  0.266667  0.166667  0.666667  0.000000  0.000000  0.000000\n",
692         "7      0.516129  0.838710  0.967742  0.774194  0.064516  0.612903  0.000000\n",
693         "8      0.806452  0.741935  0.935484  1.161290  1.225806  1.193548  0.000000\n",
694         "9      1.133333  0.600000  1.233333  1.300000  1.066667  1.333333  0.000000\n",
695         "10     0.774194  1.064516  0.580645  0.935484  1.032258  1.193548  0.000000\n",
696         "11     0.700000  0.733333  0.800000  1.233333  1.000000  0.333333  0.533333\n",
697         "12     0.387097  0.548387  1.096774  0.709677  0.612903  0.000000  0.612903"
698        ]
699       },
700       "execution_count": 105,
701       "metadata": {},
702       "output_type": "execute_result"
703      }
704     ],
705     "source": [
706      "#colNames = ['date', 'Godmode', 'Meditate', 'Exercise', 'Piano','Read', 'Journal', 'Gratitude', 'devlog','Plants', 'Job', 'Draw']\n",
707      "checks = pd.read_csv('data/LoopHabits/Checkmarks.csv', header=0)\n",
708      "checks.set_axis(checks['date'], axis='index', inplace=True)\n",
709      "checks = checks.drop(['Godmode', 'Piano', 'Gratitude', 'Job', 'date'], axis=1)\n",
710      "df = MarchOnwards(only2019(checks))\n",
711      "print(\"Days done: \" + str(countValue(2, df)))\n",
712      "print(\"Non-streak days: \" + str(countZeros(df)))\n",
713      "print(\"Streak days: \" + str(countValue(1, df)))\n",
714      "print(\"Longest streak: \" + str(df.apply(longestStreak, axis=0)))\n",
715      "print(\"Longest zeros: \" + str(df.apply(longestZero, axis=0)))\n",
716      "#print(\"Best month: \" + str(sumMonth(df)))\n",
717      "sumMonth(df)\n",
718      "meanMonth(df)\n",
719      "#print(\"Worst month: \" + calendar.month_name[int(sumMonth(df).idxmin())])\n",
720      "#plotScore(df)"
721     ]
722    },
723    {
724     "cell_type": "code",
725     "execution_count": 102,
726     "metadata": {},
727     "outputs": [],
728     "source": [
729      "def MarchOnwards(df):\n",
730      "    # For habits only, lost my phone in late Febuary, didn't have a recent backup\n",
731      "    return df.filter(regex='[0-9]{4}-(0?[4-9]|1?[0-2])-[0-9]{2}', axis=0)\n",
732      "\n",
733      "def longestZero(df):\n",
734      "    # reverse the series so dates are ascending (increasing?)\n",
735      "    # pad out the series with 0s, then diff it to track total, use that total to calculate highest streak\n",
736      "    # streak increases when sum is 0\n",
737      "    diffStreak = pd.concat([pd.Series([0]) , df[::-1], pd.Series([0])]).diff().tolist()\n",
738      "    runningSum = 0\n",
739      "    streak = 0\n",
740      "    streakList = []\n",
741      "    for e in diffStreak[1:-1]:\n",
742      "        runningSum += e\n",
743      "        if runningSum == 0:\n",
744      "            streak += 1\n",
745      "        else:\n",
746      "            streakList.append(streak)\n",
747      "            streak = 0\n",
748      "    return max(streakList)\n",
749      "\n",
750      "def longestStreak(df):\n",
751      "    # reverse the series so dates are ascending (increasing?)\n",
752      "    # pad out the series with 0s, then diff it to track total, use that total to calculate highest streak\n",
753      "    # streak increases when sum is non-zero\n",
754      "    diffStreak = pd.concat([pd.Series([0]) , df[::-1], pd.Series([0])]).diff().tolist()\n",
755      "    runningSum = 0\n",
756      "    streak = 0\n",
757      "    streakList = []\n",
758      "    for e in diffStreak[1:-1]:\n",
759      "        runningSum += e\n",
760      "        if runningSum != 0:\n",
761      "            streak += 1\n",
762      "        else:\n",
763      "            streakList.append(streak)\n",
764      "            streak = 0\n",
765      "    return max(streakList)\n",
766      "\n",
767      "def sumMonth(df):\n",
768      "    df2 = df.reset_index()\n",
769      "    df['month'] = pd.DatetimeIndex(df2['date']).month\n",
770      "    return df.groupby(['month']).sum()\n",
771      "\n",
772      "def meanMonth(df):\n",
773      "    df2 = df.reset_index()\n",
774      "    df['month'] = pd.DatetimeIndex(df2['date']).month\n",
775      "    return df.groupby(['month']).mean()\n",
776      "\n",
777      "def plotScore(df):\n",
778      "    df = meanMonth(df)\n",
779      "    plt.plot(df)\n",
780      "    plt.show()\n",
781      "\n",
782      "def countZeros(df):\n",
783      "    return countValue(0, df)\n",
784      "\n",
785      "def countValue(countValue, df):\n",
786      "    return (df == countValue).astype(int).sum()\n"
787     ]
788    },
789    {
790     "cell_type": "code",
791     "execution_count": 83,
792     "metadata": {},
793     "outputs": [],
794     "source": [
795      "# checking for CSV of simply one habit\n",
796      "\n",
797      "colNames = ['date', 'value']\n",
798      "meditateChecks = pd.read_csv('LoopHabits/002 Meditate/Checkmarks.csv', names=colNames, header=None)\n",
799      "meditateScore = pd.read_csv('LoopHabits/002 Meditate/Scores.csv', names=colNames, header=None)"
800     ]
801    },
802    {
803     "cell_type": "code",
804     "execution_count": null,
805     "metadata": {},
806     "outputs": [],
807     "source": [
808      "# might be broken now\n",
809      "# need to deal with dates in columns\n",
810      "meditateChecks.set_axis(checks['date'], axis='index', inplace=True)\n",
811      "df = MarchOnwards(only2019(meditateChecks))\n",
812      "print(\"Days done: \" + str(countValue(2, df)))\n",
813      "print(\"Non-streak days: \" + str(countZeros(df)))\n",
814      "print(\"Streak days: \" + str(countValue(1, df)))\n",
815      "print(\"Longest streak: \" + str(longestStreak(df)))\n",
816      "print(\"Longest zeros: \" + str(longestZero(df)))\n",
817      "print(\"Best month: \" + calendar.month_name[int(sumMonth(df).idxmax())])\n",
818      "print(\"Worst month: \" + calendar.month_name[int(sumMonth(df).idxmin())])\n",
819      "plotScore(df)"
820     ]
821    },
822    {
823     "cell_type": "code",
824     "execution_count": 71,
825     "metadata": {},
826     "outputs": [
827      {
828       "name": "stdout",
829       "output_type": "stream",
830       "text": [
831        "306\n"
832       ]
833      },
834      {
835       "data": {
836        "text/plain": [
837         "299"
838        ]
839       },
840       "execution_count": 71,
841       "metadata": {},
842       "output_type": "execute_result"
843      }
844     ],
845     "source": [
846      "# Total days for habits: 306\n",
847      "# How come when counting streaks I only get 299?\n",
848      "# missing 1 week??\n",
849      "print(72 + 195 + 39)\n",
850      "longestStreak(onlyMarch(only2019(meditateChecks))) + longestZero(onlyMarch(only2019(meditateChecks)))"
851     ]
852    },
853    {
854     "cell_type": "markdown",
855     "metadata": {},
856     "source": [
857      "Why don't the above two numbers match??"
858     ]
859    },
860    {
861     "cell_type": "code",
862     "execution_count": null,
863     "metadata": {},
864     "outputs": [],
865     "source": []
866    }
867   ],
868   "metadata": {
869    "kernelspec": {
870     "display_name": "Python 3",
871     "language": "python",
872     "name": "python3"
873    },
874    "language_info": {
875     "codemirror_mode": {
876      "name": "ipython",
877      "version": 3
878     },
879     "file_extension": ".py",
880     "mimetype": "text/x-python",
881     "name": "python",
882     "nbconvert_exporter": "python",
883     "pygments_lexer": "ipython3",
884     "version": "3.7.1"
885    }
886   },
887   "nbformat": 4,
888   "nbformat_minor": 4
889  }