diff --git a/Dockerfile b/Dockerfile index fa53beac..61cdb7d6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,15 @@ -FROM python:3.9-slim - -WORKDIR /usr/src/app - -RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates git gcc g++ python3-dev && \ - curl -sL https://deb.nodesource.com/setup_16.x | bash - && \ - apt-get install -y nodejs=16.* && \ - apt-get remove --purge -y curl && \ - apt-get -y autoremove && \ +FROM python:3.10 +RUN git config --global --add safe.directory /app +WORKDIR /app + +# TODO start redis in here +# see about docker loopback +RUN apt-get update && \ + apt-get install -y python3-dev && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -COPY . . + rm -rf /var/lib/apt/lists/* -RUN pip install -U pip setuptools -RUN pip install --no-cache-dir learning_observer/[wo,awe] -# HACK we need to run the install a second time to properly install lo_dash_react_components -RUN pip install learning_observer/ +COPY . /app -# TODO we may want this to be a generic image that we can do a variety of things with -# For example, we may want to just run tests or deploy via this dockerfile. -# We should support both -CMD ["pytest", "modules/wo_highlight_dashboard"] +RUN make install +CMD ["make", "run"] diff --git a/Makefile b/Makefile index 919f455b..689ba465 100644 --- a/Makefile +++ b/Makefile @@ -3,18 +3,25 @@ PACKAGES ?= wo,awe run: # If you haven't done so yet, run: make install # we need to make sure we are on the virtual env when we do this - cd learning_observer && python learning_observer + cd learning_observer && python learning_observer --watchdog=restart venv: - pip install --no-cache-dir -r requirements.txt + # This is unnecessary since LO installs requirements on install. + # pip install --no-cache-dir -r requirements.txt # install commands install: venv # The following only works with specified packages # we need to install learning_observer in dev mode to # more easily pass in specific files we need, such as creds - pip install --no-cache-dir -e learning_observer/[${PACKAGES}] - # TODO resolve the lodrc-current symlink and fetch that url instead + pip install --no-cache-dir -e learning_observer/ + + # Installing Learning Oberser (LO) Dash React Components + # TODO properly fetch the current version of lodrc. + # We have a symbolic link between `lodrc-current` and the most + # recent version. We would like to directly fetch `lodrc-current`, + # however, the fetch only returns the name of the file it's + # linked to. We do an additional fetch for the linked file. @LODRC_CURRENT=$$(curl -s https://raw.githubusercontent.com/ETS-Next-Gen/lo_assets/main/lo_dash_react_components/lo_dash_react_components-current.tar.gz); \ pip install https://raw.githubusercontent.com/ETS-Next-Gen/lo_assets/main/lo_dash_react_components/$${LODRC_CURRENT} @@ -23,6 +30,20 @@ install-dev: venv pip install --no-cache-dir -e learning_observer/[${PACKAGES}] . ${HOME}/.nvm/nvm.sh && nvm use && pip install -v -e modules/lo_dash_react_components/ +install-packages: venv + pip install -e learning_observer/[${PACKAGES}] + + # Just a little bit of dependency hell... + # The AWE Components are built using a specific version of + # `spacy`. This requires an out-of-date `typing-extensions` + # package. There are few other dependecies that require a + # newer version. As far as I can tell, upgrading this package + # does not effect the functionality we receive from the AWE + # components. + # TODO remove this extra step after AWE Component's `spacy` + # is no longer version locked. + pip install -U typing-extensions + # testing commands test: # this is where we run doctests diff --git a/README.md b/README.md index 6ecbfe41..589843d6 100644 --- a/README.md +++ b/README.md @@ -54,33 +54,9 @@ that the core approach and APIs are correct. ## Getting Started -As an early prototype, getting started isn't seamless. Run: - -```~bash -make install -``` - -And follow the instructions. You'll probably run into bugs. Work around the bugs. Then fix up the makefile and make a PR to address those bugs :) - -Once that's done, run: - -```bash -make -``` - -Again, fix up the makefile, and make a PR. - -You can also go into the devops directory, which has scripts in -progress for spinning up a cloud instance and managing flocks of -_Learning Observer_ instances. - -### Installing Writing Observer - -To setup writing_observer on top of the learning observer platform you must go into modules/writing_observer and run: - -```bash -python setup.py develop -``` +We have a short guide to [getting started](docs/workshop.md). Getting +the base system working is pretty easy. The guide is pretty +comprehensive (including how to develop your first module). ### System requirements @@ -92,34 +68,28 @@ pilots. These instances have 512MB of RAM, and minimal CPU. It's important that this configuration remains usable. For deployment and more sophisticated uses (e.g. NLP) in larger -numbers of classrooms, we expect to need **heavy** metal. As we're -playing with algorithms, deep learning is turning out to work -surprisingly well, and at the same time, requires surprisingly large -amounts of computing power. A GPGPU with plenty of RAM is helpful if -you want to work with more sophisticated algorithms, and is likely to -be a requirement for many types of uses. +numbers of classrooms, we need **heavy** metal. As we're playing with +algorithms, deep learning is turning out to work surprisingly well, +and at the same time, requires surprisingly large amounts of computing +power. A GPGPU with plenty of RAM is helpful if you want to work with +more sophisticated algorithms, and is likely to be a requirement for +many types of uses. All _Learning Observer_ development has been on Linux-based platforms (including Ubuntu and RHEL). There are folks outside of the core team -who have tried to run it on Mac or on WSL, with mixed success. +who have tried to run it on Mac or on WSL, with some success. -Running on RHEL requires the following services: +Running on RHEL typically uses the following services: -* REDIS -* nginx. +* redis +* nginx #### bcrypt -A note on bcrypt. The code uses bcrypt for some internal password management. -We are not including it directly in the install because it acts oddly across -platforms so you may need to install some version manually. - -#### Additional Notes - -At present the system also uses static content that is served from a repo. -This allows us to actually select different sources for the static data. -This can also point to the current copy if necessary and can be configured -as part of the creds.yaml file to generate the repo or add it on startup. +A note on bcrypt. The code uses bcrypt for internal password +management. There is a mess of incompatible versions. Be careful if +installing any way other than the official install to get the right +one. ## Contributing or learning more diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..ca86761b --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,16 @@ +version: '3.8' +services: + app: + build: . + volumes: + - ./:/app + image: learning-observer-image:latest + stdin_open: true # Keep standard input open + tty: true # Allocate a pseudo-TTY + restart: always + ports: + - 8888:8888 + depends_on: + - redis + redis: + image: redis:latest diff --git a/docs/dashboards.md b/docs/dashboards.md new file mode 100644 index 00000000..baa271f7 --- /dev/null +++ b/docs/dashboards.md @@ -0,0 +1,182 @@ +# Dashboards + +We can create custom dashboards for the system. + +## Dash + +Dash is a package for writing and serving web applications directly in Python. In Dash, there are 2 primary items, 1) page components such as headers, divs, spans, etc. and 2) callbacks. + +### Getting started + +Page components can be set up similar to other `html` layouts, like so + +```python +from dash import html + +layout = html.Div([ + html.H1(children='This is a header'), + html.Div(id='A'), + html.Div(id='B'), + html.Input(id='input') +]) +# html version +#
+#

This is a header

+#
+# +#
+``` + +Adding callbacks can introduce interactivity to the dashboard. Dash listens for the value of any `Input` item to change, then runs code and updates the value of the `Output` components. The updated `Output` components could be the `Input` trigger for other callbacks. + +```python +from dash import callback, Output, Input + +@callback( + Output('A', 'children'), + Input('input', 'value') +) +def update_output_children(value): + '''This callback will trigger whenever the contents of `input`'s `value` + property changes. It will update the `children` property of `A`. + ''' + return f'The callback value is: {value}' +``` + +Callbacks are handled on the server, since we are running Python code. This creates an increase in the network and server resources. Instead, we can use `clientside_callbacks` to run Javascript code on the client's browser. + +```python +from dash import clientside_callback, ClientsideFunction, Output, Input + +# note this is no longer a decorator, Dash handles adding this code +# to the pages it serves +clientside_callback( + ClientsideFunction(namespace='my_module', function_name='updateOutputChildren') + Output('B', 'children'), + Input('input', 'value') +) +``` + +```javascript +// `my_module/assets/scripts.js` + +// make sure `dash_clientside` is defined first +if (!window.dash_clientside) { + window.dash_clientside = {}; +} + +// create a dictionary of functions +window.dash_clientside.my_module = { + updateOutputChildren: function(value) { + return `The callback value is: ${value}` + } +} +``` + +### Dash in the Learning Observer + +The `lo_dash_react_components` offers a variety of components (written in React, ported to Python). This includes a handy websocket component for connecting directly to the communication protocol. We can build components based on the information we receive from the communcation protocol. The protocol may eventually offer partial updates in the future, we so any time we get a new message, we should update a stored object. This stored object should be used to build the components. + +```python +from dash import html, dcc, callback, clientside_callback, ClientsideFunction, Output, Input +import lo_dash_react_components as lodrc + +layout = html.Div([ + lodrc.LOConnectionStatusAIO(aio_id=_websocket), + dcc.Store(id=_websocket_storage), + html.H2('Output from reducers'), + html.Div(id=_output) +]) + +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='sendToLOConnection'), + Output(lodrc.LOConnectionStatusAIO.ids.websocket(_websocket), 'send'), + Input(lodrc.LOConnectionStatusAIO.ids.websocket(_websocket), 'state'), # used for initial setup + Input('_pages_location', 'hash') +) + +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='receiveWSMessage'), + Output(_websocket_storage, 'data'), + Input(lodrc.LOConnectionStatusAIO.ids.websocket(_websocket), 'message'), + prevent_initial_call=True +) + +@callback( + Output(_output, 'children'), + Input(_websocket_storage, 'data'), +) +def populate_output(data): + if not data: + return 'No students' + output = [html.Div([ + lodrc.LONameTag( + profile=s['profile'], className='d-inline-block student-name-tag', + includeName=True, id=f'{s["user_id"]}-name-tag' + ), + html.Span(f' - {s["count"]} events') + ]) for s in data] + return output +``` + +And here are the relevant Javascript functions: + +```javascript +window.dash_clientside.learning_observer_template = { + sendToLOConnection: async function (wsReadyState, urlHash) { + if (wsReadyState === undefined) { + return window.dash_clientside.no_update + } + if (wsReadyState.readyState === 1) { + // decode url parameters from hash + if (urlHash.length === 0) { return window.dash_clientside.no_update } + const decodedParams = decode_string_dict(urlHash.slice(1)) + if (!decodedParams.course_id) { return window.dash_clientside.no_update } + // send our request to LO + const outgoingMessage = { + learning_observer_template_query: { + execution_dag: 'learning_observer_template', + target_exports: ['student_event_counter_export'], + kwargs: decodedParams + } + }; + return JSON.stringify(outgoingMessage); + } + return window.dash_clientside.no_update; + }, + + receiveWSMessage: async function (incomingMessage) { + // parse incoming message + const messageData = JSON.parse(incomingMessage.data).learning_observer_template_query.student_event_counter_join_roster || []; + if (messageData.error !== undefined) { + console.error('Error received from server', messageData.error); + return []; + } + return messageData; + } +} +``` + +To add a dashboard to a module, add the following to the module's `module.py` file + +```python +# module.py +# ...other definitions +DASH_PAGES = [ + { + 'MODULE': module.path.dash_dashboard, + 'LAYOUT': module.path.dash_dashboard.layout, + 'ASSETS': 'assets', # define where to find addtional js, css files are + 'TITLE': 'My dashboard title', + 'DESCRIPTION': 'My dashboard description.', + 'SUBPATH': 'my-dashboard-subpath', + # additional js, css files we want to included + 'CSS': [ + thirdparty_url("css/fontawesome_all.css") + ], + 'SCRIPTS': [ + static_url("liblo.js") + ] + } +] +``` diff --git a/docs/docker.md b/docs/docker.md new file mode 100644 index 00000000..34658fa0 --- /dev/null +++ b/docs/docker.md @@ -0,0 +1,44 @@ +# Docker setup + +## Docker + +We also support spinning up a Docker container. First build the Docker image, then run it + +```bash +docker build -t lo_workshop . # build the root directory and tag it lo_workshop +docker run -it -p 8888:8888 lo_workshop # -it attaches a terminal, -p attaches local port 8888 to dockers 8888 port +``` + +Note that building a docker image may take a few minutes. + +## Docker Compose + +Docker compose can manage both the normal Dockerfile and an instance of Redis. To both build and turn them on, run + +```bash +docker compose up --build + +# NOTE: older versions of docker use separate commands for +# building the images and turning them on +docker compose build +docker compose up +``` + +Watchdog will automatically re-run the command used to run application, `make run`. If we wish to develop while the Docker container is open, we need to modify the `run` command to re-install any packages when it restarts. Your local repository is being shared as a mount to the Docker container. Adding an install command makes sure that latest changes are used. + +```Makefile +run: + pip install -e learning_observer/ + cd learning_observer && python learning_observer --watchdog=restart +``` + +## Active development + +We can add commands to re-install our local instances of the packages in Docker. This will allow us to do active development while the docker is running. + +```Makefile +run: + pip install -e learning_observer/ + pip install -e modules/learning_observer_template/ + cd learning_observer && python learning_observer --watchdog=restart +``` diff --git a/docs/reducers.md b/docs/reducers.md index 72a0b8c2..36cb2f58 100644 --- a/docs/reducers.md +++ b/docs/reducers.md @@ -14,12 +14,12 @@ The reducer system is designed to be modular and flexible, allowing for the addi | | | | | +---------------+ | +-------------+ +---------------+ | +-----------+ <------|-- Internal | -| | | | | -------|-> State | +------------+ +------------+ -| Event Source --------|---->| Reducer | | | | | | | -| | | | | | --------|-> External -------->| Aggregator |----> | Dashboard | -+---------------+ | | +-----------+ | State | | | | | -+---------------+ | | | | +------------+ +------------+ -| | | | +-------------+ +| | | | | -------|-> State | +---------------+ +------------+ +| Event Source --------|---->| Reducer | | |------>| | | | +| | | | | | +-------------+ | Communication |----> | Dashboard | ++---------------+ | | +-----------+ | Protocol | | | ++---------------+ | | +---------------+ +------------+ +| | | | | Event Source ----| | | | | +---------------+ v @@ -39,9 +39,9 @@ The reducer system consists of the following components: 3. **Key-Value Store**: This component stores the internal and external state generated by the reducer. The internal state is used for the reducer's internal processing, while the external state is shared with other components, such as aggregators and dashboards. -4. **Aggregator**: The aggregator takes the external state from the key-value store and performs additional processing or aggregation to prepare the data for display in a dashboard. +4. **Communication Protocol**: The communication protocol handles fetching and transforming data from the key-value store using an SQL-like structure. -5. **Dashboard**: The dashboard is the user interface that displays the aggregated data, providing insights into user activities and learning outcomes. +5. **Dashboard**: The dashboard is the user interface that displays the data from the communication protocol, providing insights into user activities and learning outcomes. 6. **Archival Repository**: This component is responsible for archiving event data, ensuring that historical data is available for analysis and reporting purposes. @@ -50,3 +50,36 @@ The reducer system consists of the following components: To create a new reducer, use the `student_event_reducer` decorator. This allows you to define custom reduction functions that process events and transform them into meaningful insights. As the system evolves, it will be possible to plug in different aggregators, state types, and keys (e.g., per-student, per-resource) to the reducer system. In the long term, the goal is to have pluggable, independent modules that can be connected to create a versatile and extensible analytics system. The current reducer system serves as a foundation for building such a system. + +An example of a simple reducer to count events can be defined as + +```python +# import student scope reducer decorator +from learning_observer.stream_analytics.helpers import student_event_reducer + +@student_event_reducer(null_state={"count": 0}) +async def student_event_counter(event, internal_state): + # do something with the internal state, such as increment + state = {"count": internal_state.get('count', 0) + 1} + + # return internal state, external state (no longer used) + return state, state +``` + +To add a reducer to a module, we much define a `REDUCERS` section in a module's `module.py` file like so + +```python +# module.py +# ...other items + +REDUCERS = [ + { + 'context': 'org.mitros.writing_analytics', + 'scope': Scope([KeyField.STUDENT]), + 'function': module.path.to.reducers.student_event_counter, + 'default': {'count': 0} + } +] +``` + +NOTE: the `default` defined in the `module.py` file is for handling defaults when queries are made, while the `null_state` defined in the reducer decorator is used for initializing state of a new incoming event stream (e.g. a new student started sending events). diff --git a/docs/technologies.md b/docs/technologies.md index 5e5e7f0a..aac422a7 100644 --- a/docs/technologies.md +++ b/docs/technologies.md @@ -1,58 +1,46 @@ # Technologies in the _Learning Observer_ +### Technologies + + +You are welcome to use your own instance of redis; however, `docker compose` allows us to spin up an instance of Redis and connect to it. See the Docker Compose section for more information. + +The provided run commands all include watchdog turned on to ease development time on re-running the application. + + Several potential contributors have asked for a list of technologies needed to be productive helping developing the *Learning Observer* or modules for the *Learning Observer*. A short list: -* We use [Python](https://www.python.org/) on the server side, and - JavaScript on the client side. We do rely on current Python (dev - systems are 3.8 or 3.9 as of this writing). -* We use [D3](https://d3js.org/) for displaying data in real-time - on the client, and otherwise, as a front-end framework. D3 is a - relatively small and simple library with a fairly steep learning - curve (in much the same way as Go is a small and simple game). We - recommend going through any short tutorial _before_ doing any - front-end work to get a feel for it. We don't recommend a _long_ - tutorial; beyond that, it's best to learn in-context. -* Since we're managing large numbers of web socket connections, we - make heavy use of [asynchronous - Python](https://docs.python.org/3/library/asyncio.html), and our web - framework is [aiohttp](https://docs.aiohttp.org/en/stable/). If you - haven't done async programming before, there is deep theory behind - it. However, we again recommend any short tutorial for aiohttp, and - then learning in context. -* We make heavy use of `git`, as well as of data structures which are - `git`-like. I recommend reading [Git - Internals](https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain) +* We use [Python](https://www.python.org/) on the server side, and JavaScript on the client side. We do rely on current Python (dev systems are mostly 3.10 as of this writing). +* Since we're managing large numbers of web socket connections, we make heavy use of [asynchronous Python](https://docs.python.org/3/library/asyncio.html). If you haven't done async programming before, there is deep theory behind it. However, we again recommend any short tutorial for aiohttp, and then learning in context. +* Our web framework is [aiohttp](https://docs.aiohttp.org/en/stable/). +* We are moving towards [react](https://react.dev/) and [redux](https://redux.js.org/). +* Simple dashboards can be built with [plot.ly](https://plotly.com/python/) +* Our main database is the original [redis](https://redis.io/), but we plan to switch to a different redis due to licensing and other nasty changes by a company which coopted this from the open source community. We have a simple key-value store abstraction, so this is easy to swap out. +* We make heavy use of `git`, as well as of data structures which are `git`-like. I recommend reading [Git Internals](https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain) and following [Write Yourself a Git](https://wyag.thb.lt/) -* Our CSS framework is [Bulma](https://bulma.io/) +* Our CSS framework is currently [Bulma](https://bulma.io/), but that may change. * Our icon library is [Font Awesome](https://fontawesome.com/) -* For rapid prototyping, we use [P5.js](https://p5js.org/), although - we hope to avoid this beyond the prototype phase. This is super-easy - to learn (even for little kids), and super-fast to develop in. It - doesn't do to production-grade software, though (responsive, i18n, - a11y, testability, etc.). The best way to learn this is by helping a - child do the Khan Academy JavaScript courses :) +* For rapid prototyping, we use [P5.js](https://p5js.org/), although we hope to avoid this beyond the prototype phase. This is super-easy to learn (even for little kids), and super-fast to develop in. It doesn't do to production-grade software, though (responsive, i18n, a11y, testability, etc.). The best way to learn this is by helping a child do the Khan Academy JavaScript courses :) * Our web server is [nginx](https://nginx.org/en/), but that's easy to change. * Our dev-ops framework is home baked, but uses [boto](http://boto.cloudhackers.com/), [invoke](https://www.pyinvoke.org/), [Fabric](https://www.fabfile.org/), and a little bit of [ansible](https://docs.ansible.com/ansible/latest/dev_guide/developing_python_3.html). -* We recommend Debian/Ubuntu, but run on Fedora/Red Hat. We'd like to - run on Mac and Windows someday too. +* We recommend Debian/Ubuntu, but run on Fedora/Red Hat. People have successfully run this on MacOS and on Windows/WSL, but this is not well-tested. * At some point, we do plan do add [postgresql](https://www.postgresql.org/). -* For a while, when we thought we'd need queues, we used an XMPP - server. I don't think we need queues, but if we do, it will come - back. - -For grad students, interns, student volunteers, and other contributors -who are here primarily to learn: One of the fun things here is that -most of these are _deeply interesting tools_ with a strong theoretical -basis in their design. - -On the whole, our goal is to keep a *small set of dependencies*. To -add a new tool to the system, it will need to do something -_substantially_ different than what's in the system already. We do -plan on adding Postgresql once needed, but not too much beyond that. - -Note that some modules within the system (including the _Writing -Observer_) do have more extensive dependencies. \ No newline at end of file +* For a while, when we thought we'd need queues, we used an XMPP server. I don't think we need queues, but if we do, it will come back. + +For grad students, interns, student volunteers, and other contributors who are here primarily to learn: One of the fun things here is that most of these are _deeply interesting tools_ with a strong theoretical basis in their design. + +On the whole, our goal is to keep a *small set of dependencies*. To add a new tool to the system, it will need to do something _substantially_ different than what's in the system already. We do plan on adding Postgresql once needed, but not too much beyond that. + +Note that some modules within the system (including and especially the _Writing Observer_) do have more extensive dependencies. The _Writing Observer_ uses _a lot_ of different NLP libraries, and until we streamline that, can be quite annoying to install. + +# Deprecations + +* We are deprecating [D3](https://d3js.org/) for displaying data in + real-time on the client, and otherwise, as a front-end framework. D3 + is a relatively small and simple library with a fairly steep + learning curve (in much the same way as Go is a small and simple + game). Much of the use of this is obsoleted by our use of react. \ No newline at end of file diff --git a/docs/workshop.md b/docs/workshop.md new file mode 100644 index 00000000..49a1baf2 --- /dev/null +++ b/docs/workshop.md @@ -0,0 +1,404 @@ +# Learning Observer Workshop + +This document will step you through the Learning Observer workshop. Our goals for this workshop are: + +* Give an overview of the platform +* Collect feedback on how to make the platform useful for your own work +* Collect feedback on different major components of the platform +* Have fun hacking learning analytics together + +We recommend working in groups of three. This way: + +* You can help each other +* At least one person will (hopefully) have a working machine + +We suggest having at least **2 terminals** ready for this workshop. The first terminal will be for installing and running the system, while the second will be any additional scripts to need to run. + +Prerequisites: + +* Unix-style system + * Ubuntu is most tested + * MacOS should work as well, but is less tested + * Windows should work with WSL, but you'll need to [install it beforehand](workshop/wsl-install.md). +* `python 3`. We tested and recommend 3.10 and 3.11, but anything newer than 3.9 should work + +Recommendations: + +* `virtualenvwrapper`. If you prefer a different package management system, you can use that instead. + +Options: + +* `redis`. We need a key-value store. However, if you don't have this, we can use files on the file system or in-memory. If you use `docker compose`, it will spin this up for you. Beyond this workshop, we strongly recommend using a `redis` (the recommended `redis` going forward is [ValKey](https://en.wikipedia.org/wiki/Valkey), as opposed to redis proprietary) +* `docker`. We're not big fans of `docker` for this type of work, so this pathway is less tested. However, by popularity, we do provide a `docker` option. We tested with docker 26.1. You should only use this if you're fluent in `docker`, since you'll probably need to tweak instructions slightly (especially if you're not on 26.1). + +If you'd like to use `docker`, we have a quick [tutorial](docker.md). + +If you can install the prerequisites before the workshop, it will save a lot of time, and not put us at risk of issues due to hotel bandwidth. + +We have a document with a more in-depth overview of the [technologies](technologies.md) we use. + +### Python environment + +We recommend working in a Python environment of some sort. Our preferred tool is [virtualenvwrapper](https://pypi.org/project/virtualenvwrapper/). You are welcome to use your own (`anaconda`, or as you prefer). `virtualenvwrapper` lets you manage packages and dependencies without making a mess on your computer. + +If you don't have a way of managing Python virtual environments, or would prefer to use `virtualenvwrapper`, we have a [short guide](workshop/workshop-virtualenv.md). *We strongly recommend working in some virtual environment, however*. + +## Download + +First make sure you clone the repository: + +```bash +git clone https://github.com/ETS-Next-Gen/writing_observer.git lo_workshop +``` + +**or**, if you have a github account properly configured with ssh: + +```bash +git clone git@github.com:ETS-Next-Gen/writing_observer.git lo_workshop +``` + +```bash +cd lo_workshop/ +git checkout berickson/workshop # This is a branch we set up with some extra things for this workshop! +``` + +NOTE: All future commands should be ran starting from the repository's root directory. The command will specify if changing directories is needed. + +## Local environment + +Make sure you are on a fresh virtual environment. In `virtualenvwrapper`: + +```bash +mkvirtualenv lo_workshop +workon lo_workshop +``` + +Then run the install command: + +```bash +pip install --upgrade pip # Probably not needed, but good form +make install +``` + +This will download required backpages. This might take a while, depending on hotel bandwidth. + +## Configuration + +Before starting the system, let's take care of any extra configuration steps. We are currently in the process of moving configuration formats from YAML to [PMSS](https://github.com/ETS-Next-Gen/pmss). + +We may discuss this in the workshop later, but for now, we will configure using YAML. + +We need a system configuration for this workshop. You can copy over this file with the command below, or you can make the changes yourself as per [these instruction](/docs/workshop_creds.md). In essence, the changes are: + +1. Disable teacher authentication. We have pluggable authentication schemes, and we disable Google oauth and other schemes. +2. Disable learning event authentication. Ditto, but for incoming data. +3. Give a key for session management. This should be unique for security +4. Switch from redis to on-disk storage. We have pluggable databases. On-disk storage means you don't need to install redis. + +Making these yourself is a good exercise. Note we are switching configuration formats, but the options will stay the same. + +Copy the workshop `creds.yaml` file: + +```bash +cp learning_observer/learning_observer/creds.yaml.workshop learning_observer/creds.yaml +``` + +If you have a file comparison tool like `meld`, it might be worth comparing our changes: `meld learning_observer/creds.yaml learning_observer/learning_observer/creds.yaml.example` + +## Test the system + +To run the system, use the run command + +```bash +make run +``` + +*This does a lot of sanity checks on startup, and won't work the first time.* Rather, it will download required files, and create a file files (like `admins.yaml` and `teachers.yaml`, which are one way to define roles for teachers and admins on the system, but which we won't need for this workshop since we are using an insecure login). Once it is done, it will give you an opportunity to check whether it fixed issues correctly (we're working on having nice warnings, but we're not 100% of the way there). It did, so just run it again (perhaps 1-3 more times if it has more things to configure): + +```bash +make run +``` + +You should be able to navigate to either `http://localhost:8888/`, `http://0.0.0.0:8888/`, or `http://127.0.0.1:8888/`, depending on your operating system, and see a list of courses and analytics modules. None are installed. We'll build one next! + +## Build your own module + +### Create from template + +We provide a cookiecutter template for creating new modules for the Learning Observer. If you are using Docker, just create a local virtual environment to run this command. To create one run, + +```bash +cd modules/ +cookiecutter lo_template_module/ +``` + +Cookiecutter will prompt you for naming information and create a new module in the `modules/` directory. By default, this is called `learning_observer_template`, but pick your own name and substitute it into the commands below. + +### Installing + +To install the newly created project, use `pip` like any other Python package. + +```bash +pip install -e [name of your module] +``` + +Reload your web page, and you will see the new module. Click on it. + +## Streaming Data + +We can stream data into the system to simulate a classroom of students working. Once the system is up and running, open **a new terminal** and run + +```bash +workon lo_workshop +python learning_observer/util/stream_writing.py --streams=10 +``` + +To avoid cache issues, we recommend this order: + +* Restart your server +* Run the above command +* Load the dashboard + +This will generate events for 10 students typing a set of loremipsum texts and send them to the server. This will send event mimicking those from our Google Docs extension. You should see an event count in the template dashboard. + +## Event Format + +You can look at the format of these specific events in the `/learning_observer/learning_observer/logs/` directory. In the test system, we simply put events into log files, but we are gradually moving towards a more sophisticated, open-science, family-rights oriented data store (shown at the bottom of [this document](system_design.md). This is theoretically interesting, since it gives a cryptographically-verifiable way to audit what data was created and what analyses ran. + +There are several good standards for [event formats](events.md), and to integrate learning data, we will need to support them all. Most of these have converged on a line of JSON per event, but the specifics are format-specific. We are including [some code](https://github.com/ETS-Next-Gen/writing_observer/blob/master/modules/lo_event/lo_event/xapi.cjs) to help support the major ones. However, the events here are somewhat bound to how Google Docs thinks about documents. + +## module.py + +Have a quick look at the `module.py` file. This defines: + +1. A set of reducers to run over event streams. These process data as it comes in. The context tells the system which types of events the reducers handle. +2. A set of queries for that data. These define calls dashboards can make into the system +3. A set of dashboards. `DASH_PAGES` are visible pages, and `COURSE_DASHBOARDS` will typically select a subset of those to show to teachers when they log in. + +## reducers.py + +Have a look at the `reducers.py` file. We define a simple reducer which simply counts events: + +```python +@student_event_reducer(null_state={"count": 0}) +async def student_event_counter(event, internal_state): + ''' + An example of a per-student event counter + ''' + state = {"count": internal_state.get('count', 0) + 1} + + return state, state +``` + +This function takes an event and updates a state. We will expand this in order to measure the median interval between the past 10 edits. This can be a poorman's estimate of typing speed. The function returns two parameters, one is an internal state (which might be a list of the timestamps of the past 10 events), and one is used for the dashboard (which might be the median value). We're planning to eliminate this in the future, though, and just have one state, so that's what we'll do here: + +```python +import numpy + +from learning_observer.stream_analytics.helpers import student_event_reducer + +def median_interval(timestamps): + if len(timestamps) < 2: + return None + + deltas = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)] + deltas.sort() + return int(numpy.median(deltas)) + + +@student_event_reducer(null_state={"count": 0}) +async def student_event_counter(event, internal_state): + ''' + An example of a per-student event counter + ''' + timestamp = event['client'].get('timestamp', None) + count = internal_state.get('count', 0) + 1 + + if timestamp is not None: + ts = internal_state.get('timestamps', []) + ts = ts + [timestamp] + if len(ts) > 10: + ts = ts[1:] + else: + ts = internal_state.get('timestamps', []) + + state = { + "count": count, + "timestamps": ts, # We used to put this in internal_state + "median_interval": median_interval(ts) # And this in external_state + } + + return state, state +``` + +Now, we have a typing speed estimator! It does not yet show up in the dashboard. + +## Queries and communications protocol + +In our first version of this system, we would simply compile the state for all the students, and ship that to the dashboard. However, that didn't allow us to make interactive dashboards, so we created a query language. This is inspired by SQL (with JOIN and friends), but designed for streaming data. It can be written in Python or, soon, JavaScript, which compile queries to an XML object. + +In `module.py`, you see this line: + +```python +EXECUTION_DAG = learning_observer.communication_protocol.util.generate_base_dag_for_student_reducer('student_event_counter', 'my_event_module') +``` + +This is shorthand for a common query which JOINs the class roster with the output of the reducers. The Python code for the query itself is [here](https://github.com/ETS-Next-Gen/writing_observer/blob/berickson/workshop/learning_observer/learning_observer/communication_protocol/util.py#L58), but the jist of the code is: + +```python +'roster': course_roster(runtime=q.parameter('runtime'), course_id=q.parameter("course_id", required=True)), +keys_node: q.keys(f'{module}.{reducer}', STUDENTS=q.variable('roster'), STUDENTS_path='user_id'), +select_node: q.select(q.variable(keys_node), fields=q.SelectFields.All), +join_node: q.join(LEFT=q.variable(select_node), RIGHT=q.variable('roster'), LEFT_ON='provenance.provenance.value.user_id', RIGHT_ON='user_id') +``` + +You can add a `print(EXECUTION_DAG)` statement to see the JSON representation this compiles to. + +To see the data protocol, open up develop tools from your browser, click on network, and see the `communication_protocol` response. + +In the interests of time, we won't do a deep dive here, but this is our third iteration at a query language, and we would love feedback on how to make this better. + +## Dashboard framework + +For creating simple dashboards, we use [dash](https://dash.plotly.com/) and [plotly](https://plotly.com/python/). + +* These are rather simple Python frameworks for making plots and dashboards. +* Unfortunately, the code in the template module is still a bit complex. We're working to simplify it, but we're not there yet. + +We'd suggest skimming a few example [visualizations](https://plotly.com/python/pie-charts/) to get a sense of what they do. + +For now, though, all we want to do is add the intercharacter interval to our dashboard. Modify `dash_dashboard.py` to add a span for it: + +```python + html.Span(f' - {s["count"]} events'), + html.Span(f' - {s.get("median_interval", 0)} ICI') +``` + +You should be able to see the intercharacter interval in a new span. + +## Commit your changes + +To avoid losing work, we recommend committing your changes now (and periodically there-after): + +```bash +git add [directory of your module] +git commit -a -m "My changes" +``` + +## `react` dashboards + +Behind the scenes, `dash` uses `react`, and if we want to go beyond what we can do with `plotly` and `dash`, fortunately, it's easy enough to build components directly in `react`. To see how these are build: + +```bash +cd modules/lo_dash_react_components/ +ls src/lib/components +``` + +And have a look at `LONameTag`. This component is used to show a student name with either a photo (if available in their profile) or initials (if not), and is used in the simple template dashboard. We have a broad array of components here, including: + +- Various ways of visualizing what students know and can do. My favorite is a Vygotskian-style display which places concepts as either mastered, in the zone of proximal development (students can understand with supports), and ones students can't do at all +- Various tables and cards of student data +- Various ways of visualizing course content + +We have many more not committed. + +Getting this up-and-running can be a little bit bandwidth-intensive (since these are developed with `node.js`), but if hotel bandwidth suffices, in most cases, it is sufficent to run: + +```bash +npm install +npm run build-css +npm run-script react-start +``` + +And then navigate to `http://localhost:3000`. *NOTE: The default URL is different. Ignore it.* If there is a lint error, ignore it as well. + +Once set up, the development workflow here is rather fast, since the UX updates on code changes. Most of these are either early prototypes or designed to be used in specific contexts, but `LOStudentTable` and `ZPDPlot` look nice. So does `DAProblemDisplay`, if you scroll way down. + +## Better data sources: `lo_event` and `lo_assess` + +### `lo_event` + +Our data streaming library is [lo_event](https://github.com/ETS-Next-Gen/writing_observer/tree/master/modules/lo_event). This library is designed to stream events (typically) from a JavaScript client, and handles all of the complexity of things like persistance, queuing, and retries for you. Cookiecutter cde, but it should save you a bunch of time. + +### `lo_assess` + +Much more interesting, in development (and probably in need of renaming) is [`lo_assess`](https://github.com/ETS-Next-Gen/writing_observer/tree/pmitros/loevent-v2/modules/lo_event/lo_event/lo_assess). + +There is an XML format (based on edX OLX, which is in turn based on LON-CAPA XML) for creating interactives. + +The very neat thing about this tool is that we *guarantee* that the state of the system at any point in time can be reconstructed from process data. The UX is controlled through React events, which are funneled into `lo_event`. You can see this using the time travel function of [Redux dev tools](https://github.com/reduxjs/redux-devtools). We've developed a handful of interactives in this format, including a GPT-powered graphic organizer, a Vygotskian-style dynamic assessment for middle school mathematics, but for this workshop, we have a little demo of a tool which can change text styles using ChatGPT for different audiences. + +To see the format, see the XML inside of `modules/toy-assess/src/app/changer/page.js`. Right now, this is inside of a .jsx file, but it will be stand-alone XML in the near future. + +Running this is a little bit involved, as you may need to configure Azure ChatGPT credentials (Azure provides better privacy compliance frameworks than using OpenAI directly): + +``` +export OPENAI_URL="https://[your-location].api.cognitive.microsoft.com" +export AZURE_OPENAI_ENDPOINT="https://[your-location].api.cognitive.microsoft.com" + +export OPENAI_DEPLOYMENT_ID="[your-azure-deployment-id]" +export AZURE_OPENAI_DEPLOYMENT_ID="[your-azure-deployment-id]" + +export OPENAI_API_KEY=`cat [your-azure-openai-key]` +export AZURE_OPENAI_API_KEY=`cat [your-azure-openai-key]` +``` +(We don't require both, but it's handy if you switch libraries) + +As an alternative, in `modules/toy-assess/src/app/lib/route.js` you can change the line + +``` +const listChatCompletions = openai.listChatCompletions; +``` + +To: + +``` +const listChatCompletions = stub.listChatCompletions; +``` + +Which will disable ChatGPT (and always give the same response). + +At this point, you can run: +``` +cd [base_dir]/modules/lo_event/ +npm pack +cd [basedir]/modules/toy-assess/ +# rm -Rf .next/cache/ # If necessary +npm install +npm install ../lo_event-0.0.1.tgz +npm run dev +``` + +And the server should be running on `localhost:3000`. + +## `pmss` + +We are creating a new settings format, based on css. This is called `pmss`. It works pretty well already. The basic idea is, like CSS, that we would like to be able to cascade settings. The core problem is that, like CSS, we want well-specified exceptions: + +* "Our key-value store is local redis, except for schools in Australia, where our key-value store is hosted in Australia, to comply with local law" +* "We would like student rosters at JHS to come from Google Classroom, except for afterschool programs, which come from files on disk" + +CSS gives a well-understood syntax for expressing these sorts of configurations, and will hopefully help us avoid the mess of special cases which evolve in most learning systems. + +Note that this is a stand-alone library, and can be used in your own system too. That said, as with all code, it is still evolving, and we do not guarantee backwards-compatibility. + +## Jupyter + +We will not demo this, due to time constraints, but it is possible to run a Jupyter instance with access to our data store (see `ipython_integration.py`, `offline.py`, and `interactive_development.py`). We have means to monitor the communication between the python kernel and ipython/jupyter notebook. This should allow us to track all analyses which ran, either for family rights audits (how was my data used?) or open science audits (was there p-hunting?). + +This is in the prototype stage; we are not yet using this for data analysis. + +## Dev-ops + +If you browse the devops directory, which has scripts in progress for +spinning up a cloud instance and managing flocks of _Learning +Observer_ instances. + +## gitserve + +The system can serve static content directly from a git repo. This allows us: + +* Have a git hash for which version of static data we're using (and we do include this in cookies / logs!) +* Have branches (e.g. for AB tests or different uses) + +Some of this can be configured as part of the creds.yaml file. diff --git a/docs/workshop/README.md b/docs/workshop/README.md new file mode 100644 index 00000000..947ed854 --- /dev/null +++ b/docs/workshop/README.md @@ -0,0 +1 @@ +These files are instructions, not specific to Learning Observer, which were used when running workshops. It is not clear they belong in this repo long-term, but this seems an okay place for them for now. \ No newline at end of file diff --git a/docs/workshop/workshop-virtualenv.md b/docs/workshop/workshop-virtualenv.md new file mode 100644 index 00000000..b4539886 --- /dev/null +++ b/docs/workshop/workshop-virtualenv.md @@ -0,0 +1,28 @@ +Setting up virtualenvwrapper +============================ + +[virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/) makes it quick and easy to manage Python virtual environments. + +1) Install `virtualenvwrapper`. This can be `apt-get install python3-virtualenvwrapper` on Ubuntu, or `pip install virtualenvwrapper` on most other systems. + +2) Run it using `source` or `.` (so environment changes stay for the current shell). For one installed with `apt-get`, it will most likely be in `/usr/share/virtualenvwrapper/`. For `pip`, it is often in `~/.local/bin` (and you might need to run `export PATH=~/.local/bin:$PATH`). From `brew` on Mac, it is likely somwhere under `/opt/homebrew/` + +We normally add these three lines to our `.bashrc` so it runs on startup, but you can run these manually: + +```bash +# Optionally, pick the place you want your virtual environments +export WORKON_HOME=$HOME/.virtualenvs +# Optionally, pick the Python you want to use. +which python3 +VIRTUALENVWRAPPER_PYTHON=/usr/bin/python3 +# Activate virtualenv wrapper +. /usr/share/virtualenvwrapper/virtualenvwrapper.sh # Wherever you installed the script. Note the dot at the beginning! It's important. +``` + +You now have three commands: + +* `mkvirtualenv lo_workshop` makes a virtual environment named `lo_workshop`. +* `workon lo_workshop` switches you to this environment +* `rmvirtualenv lo_workshop` destroys it + +There are other commands too, but those are the essentials. diff --git a/docs/workshop/wsl-install.md b/docs/workshop/wsl-install.md new file mode 100644 index 00000000..0a8d6142 --- /dev/null +++ b/docs/workshop/wsl-install.md @@ -0,0 +1,11 @@ +Windows Subsystem for Linux Install +=================================== + +Microsoft has instructions for installing [WSL](https://learn.microsoft.com/en-us/windows/wsl/install), but on most systems, this simply involves running `wsl --install` from *PowerShell* (not `cmd`). + +Once installed, run: + +```bash +sudo apt-get update +sudo apt-get install python3-pip python3-virtualenvwrapper git +``` \ No newline at end of file diff --git a/docs/workshop_creds.md b/docs/workshop_creds.md new file mode 100644 index 00000000..9cf605d5 --- /dev/null +++ b/docs/workshop_creds.md @@ -0,0 +1,73 @@ +### creds.yaml + +The `creds.yaml` is the primary configuration file on the system. The platform will not launch unless this file is present. Create a copy of the example in `learning_observer/learning_observer/creds.yaml.workshop`. We will copy this over, and then set up the pieces needed for the system to work. + +You're welcome to run the `learning observer` between changes. In most cases, it will tell you exactly what needs to be fixed. + +```bash +cp learning_observer/learning_observer/creds.yaml.workshop learning_observer/creds.yaml +``` + +#### User Authentication + +As a research platform, the Learning Observer supports many authentication schemes, since it's designed for anything from small cognitive labs and user studies (with no log-in) to large-scale school deployments (e.g. integrating with Google Classroom). This is pluggable. + +For this workshop, we will disable Google authentication, and set the system up so we can use it with with no authentication: + +```yaml +auth: + # remove google_oauth from auth + # google_oauth: ... + + # enable passwordless insecure log-ins + # useful for quickly seeing the system up and running + test_case_insecure: true +``` + +#### Event authentication + +Learning event authentication is seperate from user authentication. We also have multiple schemes for this, but for testing and development, we will run without authentication. + +```yaml +# Allow all incoming events +event_auth: + # ... + testcase_auth: {} +``` + +#### Session management + +Session management requires a unique key for the system. Type in anything (just make it complex enough): + +```yaml +# update session information +aio: + session_secret: asupersecretsessionkeychosenbyyou + session_max_age: 3600 +``` + +Pro tip: If you start the system missing a command like this, it will usually tell you what's wrong and how to fix it (in the above case, generating a secure GUID to use as your session secret). + +#### KVS + +```yaml +# If you are using Docker compose, you should change the redis host to +redis_connection: + redis_host: redis + redis_port: 6379 +``` + +### admins.yaml & teachers.yaml + +The platform expects both of these files to exist under `learning_observer/learning_observer/static_data/`. If these are missing on start-up, the platform create them for you and exit. Normally these are populated with the allowed Admins/Teachers for the system. + +### passwd.lo + +Each install of the system needs an admin password file associated with it. The `learning_observer/util/lo_passwd.py` file can be used to generate this password file. This does not have to be done in the same virtual environment as the main server. If you are using Docker, just create a local virtual environment to run this command. + +```bash +cd learning_observer/ +python util/lo_passwd.py --username admin --password supersecureadminpassword --filename passwd.lo +``` + +Depending on how the `creds.yaml` authorization settings are configured, you may be required to use the password you create. diff --git a/learning_observer/learning_observer/cache.py b/learning_observer/learning_observer/cache.py index 0271e448..180bb7aa 100644 --- a/learning_observer/learning_observer/cache.py +++ b/learning_observer/learning_observer/cache.py @@ -3,6 +3,7 @@ import learning_observer.kvs import learning_observer.prestartup +from learning_observer.log_event import debug_log cache_backend = None @@ -23,13 +24,19 @@ def connect_to_memoization_kvs(): 'key in `creds.yaml`.\n'\ '```\nmemoization:\n type: stub\n```\nOR\n'\ '```\nmemoization:\n type: redis_ephemeral\n expiry: 60\n```' - raise learning_observer.prestartup.StartupCheck("KVS: "+error_text) + debug_log(f'WARNING:: {error_text}') + # raise learning_observer.prestartup.StartupCheck("KVS: "+error_text) def async_memoization(): def decorator(func): @functools.wraps(func) async def wrapper(*args, **kwargs): + # if the memoization cache is absent, just run the function + if cache_backend is None: + return await func(*args, **kwargs) + + # process item if the cache is present key = create_key_from_args(args, kwargs) if key in await cache_backend.keys(): return await cache_backend[key] diff --git a/learning_observer/learning_observer/communication_protocol/executor.py b/learning_observer/learning_observer/communication_protocol/executor.py index 21bd45f9..6f235273 100644 --- a/learning_observer/learning_observer/communication_protocol/executor.py +++ b/learning_observer/learning_observer/communication_protocol/executor.py @@ -7,7 +7,6 @@ import asyncio import collections import concurrent.futures -import enum import functools import inspect @@ -380,13 +379,8 @@ async def handle_map(functions, function_name, values, value_path, func_kwargs=N return output -class SelectFields(enum.Enum): - Missing = 'Missing' - All = 'All' - - @handler(learning_observer.communication_protocol.query.DISPATCH_MODES.SELECT) -async def handle_select(keys, fields=SelectFields.Missing): +async def handle_select(keys, fields=learning_observer.communication_protocol.query.SelectFields.Missing): """ We dispatch this function whenever we process a DISPATCH_MODES.SELECT node. This function is used to select data from a kvs. The data being selected @@ -403,7 +397,7 @@ async def handle_select(keys, fields=SelectFields.Missing): TODO add in test cases once we pass kvs as a parameter """ fields_to_keep = fields - if fields is None or fields == SelectFields.Missing.value: + if fields is None or fields == learning_observer.communication_protocol.query.SelectFields.Missing: fields_to_keep = {} response = [] @@ -428,7 +422,7 @@ async def handle_select(keys, fields=SelectFields.Missing): resulting_value = k['default'] # keep all current fields except for provenance (already prepared) - if fields == SelectFields.All.value: + if fields == learning_observer.communication_protocol.query.SelectFields.All: fields_to_keep = {k: k for k in resulting_value.keys() if k != 'provenance'} for f in fields_to_keep: diff --git a/learning_observer/learning_observer/communication_protocol/query.py b/learning_observer/learning_observer/communication_protocol/query.py index 17798056..bd0216ab 100644 --- a/learning_observer/learning_observer/communication_protocol/query.py +++ b/learning_observer/learning_observer/communication_protocol/query.py @@ -17,6 +17,7 @@ where we don't want all of the machinery of the Learning Observer, so also note the lack of dependencies. ''' +import enum dispatch = "dispatch" @@ -66,6 +67,11 @@ def caller(*args, **kwargs): return caller +class SelectFields(str, enum.Enum): + Missing = 'Missing' + All = 'All' + + def select(keys, fields=None): """ Select is used to collect data from the KVS diff --git a/learning_observer/learning_observer/communication_protocol/util.py b/learning_observer/learning_observer/communication_protocol/util.py index fd50017d..ac53b8b5 100644 --- a/learning_observer/learning_observer/communication_protocol/util.py +++ b/learning_observer/learning_observer/communication_protocol/util.py @@ -4,11 +4,11 @@ ''' import inspect -import learning_observer.communication_protocol.query +import learning_observer.communication_protocol.query as q import learning_observer.communication_protocol.exception import learning_observer.util -dispatch = learning_observer.communication_protocol.query.dispatch +dispatch = q.dispatch def _flatten_helper(top_level, current_level, prefix=''): @@ -26,7 +26,7 @@ def _flatten_helper(top_level, current_level, prefix=''): """ for key, value in list(current_level.items()): new_key = f"{prefix}.{key}" if prefix else key - if isinstance(value, dict) and dispatch in value and value[dispatch] != learning_observer.communication_protocol.query.DISPATCH_MODES.VARIABLE: + if isinstance(value, dict) and dispatch in value and value[dispatch] != q.DISPATCH_MODES.VARIABLE: if isinstance(value, dict): top_level[new_key] = _flatten_helper(top_level, value, prefix=new_key) else: @@ -53,3 +53,39 @@ def flatten(endpoint): endpoint['execution_dag'][key] = _flatten_helper(endpoint['execution_dag'], value, prefix=f"impl.{key}") return endpoint + + +def generate_base_dag_for_student_reducer(reducer, module): + course_roster = q.call('learning_observer.courseroster') + keys_node = f'{reducer}_keys' + select_node = f'{reducer}_output' + join_node = f'{reducer}_join_roster' + export_name = f'{reducer}_export' + execution_dag = { + 'execution_dag': { + # If we include runtime as a parameter, then the runtime object, + # which contains the current request, will be passed to the function. + # course_roster expects a `course_id` which we define as q.parameter. + # `course_id` should be provided when querying a node that depends + # on this function. + 'roster': course_roster(runtime=q.parameter('runtime'), course_id=q.parameter("course_id", required=True)), + # q.keys formats requested information into the appropriate keys + keys_node: q.keys(f'{module}.{reducer}', STUDENTS=q.variable('roster'), STUDENTS_path='user_id'), + # q.select handles fetching items from redis based on a list of keys + select_node: q.select(q.variable(keys_node), fields=q.SelectFields.All), + # q.join will combine two lists of dictionaries based on a key_path + join_node: q.join(LEFT=q.variable(select_node), RIGHT=q.variable('roster'), LEFT_ON='provenance.provenance.value.user_id', RIGHT_ON='user_id'), + }, + 'exports': { + export_name: { + 'returns': join_node, + # TODO we ought to automatically know the parameters based on + # the queried node. Including a list of parameters here is + # redundant. + 'parameters': ['course_id'], + # TODO include a description for each exported node + # TODO include sample output for the exported node + } + } + } + return execution_dag diff --git a/learning_observer/learning_observer/creds.yaml.example b/learning_observer/learning_observer/creds.yaml.example index c4e7078a..33d06c72 100644 --- a/learning_observer/learning_observer/creds.yaml.example +++ b/learning_observer/learning_observer/creds.yaml.example @@ -42,7 +42,7 @@ pubsub: type: stub # stub for in-memory debugging, redis for small-scale prod. xmpp will bring scale redis_connection: redis_host: localhost - redis_port: 6389 + redis_port: 6379 redis_password: yoursupersecurepassword kvs: # stub for in-memory debugging diff --git a/learning_observer/learning_observer/creds.yaml.workshop b/learning_observer/learning_observer/creds.yaml.workshop new file mode 100644 index 00000000..c580cf03 --- /dev/null +++ b/learning_observer/learning_observer/creds.yaml.workshop @@ -0,0 +1,21 @@ +config: + run_mode: dev +auth: + test_case_insecure: true +feature_flags: {} +roster_data: + source: all +aio: + session_secret: superdupersecretkey + session_max_age: 3600 +kvs: + default: + type: filesystem + path: .lo_kvs + memoization: + type: stub + +event_auth: + testcase_auth: {} +theme: + server_name: Learning Observer diff --git a/learning_observer/learning_observer/downloads.py b/learning_observer/learning_observer/downloads.py index 0757fcf9..d57afc2d 100644 --- a/learning_observer/learning_observer/downloads.py +++ b/learning_observer/learning_observer/downloads.py @@ -86,9 +86,12 @@ "5.1.3": "c03f5bfd8deb11ad6cec84a6201f4327f28a640e693e56466fd80d983ed54" "16deff1548a0f6bbad013ec278b9750d1d253bd9c5bd1f53c85fcd62adba5eedc59", "5.3.1": "d099dac0135309466dc6208aaa973584843a3efbb40b2c96eb7c179f5f20f" - "80def35bbc1a7a0b08c9d5bdbed6b8e780ba7d013d18e4019e04fd82a19c076a1f8" + "80def35bbc1a7a0b08c9d5bdbed6b8e780ba7d013d18e4019e04fd82a19c076a1f8", + "5.3.3": "54b69b378be9029cb841bce9f33e111148231ce38ae389601c10ee1fec93b" + "bfb84839e84911e9e32e9e026a182e7225fd8531dc8344ba94ef4b467852e7162d5" }, "tested_versions": [ + 'https://cdn.jsdelivr.net/npm/bootswatch@5.3.3/dist/minty/bootstrap.min.css', "https://cdn.jsdelivr.net/npm/bootswatch@5.3.1/dist/minty/bootstrap.min.css", 'https://cdn.jsdelivr.net/npm/bootswatch@5.1.3/dist/minty/bootstrap.min.css', ] diff --git a/learning_observer/learning_observer/ipython_integration.py b/learning_observer/learning_observer/ipython_integration.py index c481569d..ac2e1bef 100644 --- a/learning_observer/learning_observer/ipython_integration.py +++ b/learning_observer/learning_observer/ipython_integration.py @@ -21,7 +21,9 @@ KERNEL_ID = 'learning_observer_kernel' # generic log file for seeing ipython output -logging.basicConfig(filename='ZMQ.log', encoding='utf-8', level=logging.DEBUG) +# TODO this is creating a massive file and we ought to make sure its not causing issue +# on any other systems. +# logging.basicConfig(filename='ZMQ.log', encoding='utf-8', level=logging.DEBUG) async def start_learning_observer_application_server(runner): diff --git a/learning_observer/learning_observer/kvs.py b/learning_observer/learning_observer/kvs.py index 591befc9..469761b7 100644 --- a/learning_observer/learning_observer/kvs.py +++ b/learning_observer/learning_observer/kvs.py @@ -233,6 +233,8 @@ def __init__(self, path=None, subdirs=False): ''' self.path = path or learning_observer.paths.data('kvs') self.subdirs = subdirs + if not os.path.exists(path): + os.mkdir(path) def key_to_safe_filename(self, key): ''' @@ -249,22 +251,22 @@ def key_to_safe_filename(self, key): return os.path.join(self.path, safename) def safe_filename_to_key(self, filename): - raise NotImplementedError("Code this up, please. Or for debugging, comment out the exception") - return filename + # raise NotImplementedError("Code this up, please. Or for debugging, comment out the exception") + return learning_observer.util.from_safe_filename(filename) async def __getitem__(self, key): path = self.key_to_safe_filename(key) if not os.path.exists(path): return None with open(path) as f: - return f.read() + return json.load(f) async def set(self, key, value): path = self.key_to_safe_filename(key) if self.subdirs: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'w') as f: - f.write(value) + json.dump(value, f, indent=4) async def __delitem__(self, key): path = self.key_to_safe_filename(key) @@ -275,13 +277,15 @@ async def keys(self): This one is a little bit tricky, since if subdirs, we need to do a full walk ''' + keys = [] if self.subdirs: for root, dirs, files in os.walk(self.path): for f in files: - yield self.safe_filename_to_key(os.path.join(root, f).replace(os.sep, '/')) + keys.append(self.safe_filename_to_key(os.path.join(root, f).replace(os.sep, '/'))) else: for f in os.listdir(self.path): - yield self.safe_filename_to_key(f) + keys.append(self.safe_filename_to_key(f)) + return keys # TODO change the keys to variables diff --git a/learning_observer/learning_observer/main.py b/learning_observer/learning_observer/main.py index 6696b567..77c3b25d 100644 --- a/learning_observer/learning_observer/main.py +++ b/learning_observer/learning_observer/main.py @@ -15,6 +15,7 @@ import aiohttp import aiohttp.web +import functools import pmss import uvloop @@ -105,13 +106,12 @@ def create_app(): return app -def shutdown(app): +async def shutdown(app): ''' Shutdown the app. ''' - app.shutdown() - app.cleanup() - return app + await app.shutdown() + await app.cleanup() def start(app): @@ -124,9 +124,10 @@ def start(app): print("Arguments:", args) +app = create_app() if args.watchdog is not None: - print("Watchdog mode") + print("Watchdog mode", args.watchdog) # Parse argument to determine watchdog handler restart = { 'restart': learning_observer.watchdog_observer.restart, @@ -138,14 +139,12 @@ def start(app): ) sys.exit(-1) fs_event_handler = learning_observer.watchdog_observer.RestartHandler( - shutdown=shutdown, + shutdown=functools.partial(shutdown, app), restart=restart[args.watchdog], - start=start + start=functools.partial(start, app) ) learning_observer.watchdog_observer.watchdog(fs_event_handler) -app = create_app() - # This creates the file that tells jupyter how to run our custom # kernel. This command needs to be ran once (outside of Jupyter) # before users can get access to the LO Kernel. diff --git a/learning_observer/learning_observer/module_loader.py b/learning_observer/learning_observer/module_loader.py index 47bdbb7b..5b8117b3 100644 --- a/learning_observer/learning_observer/module_loader.py +++ b/learning_observer/learning_observer/module_loader.py @@ -12,6 +12,7 @@ import collections import copy import os.path +import pmss import sys import pkg_resources @@ -27,6 +28,15 @@ import learning_observer.stream_analytics.helpers as helpers +pmss.parser('clone_module_git_repos', parent='string', choices=['prompt', 'y', 'n'], transform=None) +pmss.register_field( + name='clone_module_git_repos', + type='clone_module_git_repos', + description='Determine if we should fetch git repos for installed '\ + 'modules. If None, prompt user instead.', + default='prompt' +) + # This is set to true after we've scanned and loaded modules LOADED = False @@ -459,7 +469,8 @@ def register_git_repos(component_name, module): location=learning_observer.paths.repo(repo), url=module.STATIC_FILE_GIT_REPOS[repo]['url'] )) - yesno = input("Yes/No> ") + yesno = learning_observer.settings.pmss_settings.clone_module_git_repos() + yesno = yesno if yesno != 'prompt' else input("Yes/No> ") if yesno.lower().strip() not in ["y", "tak", "yes", "yup", "好", "نعم"]: print("Fine. Get it yourself, and configure the location") print("in the setting file under repos. Run me again once it's") diff --git a/learning_observer/learning_observer/redis_connection.py b/learning_observer/learning_observer/redis_connection.py index e5152977..c2c55c57 100644 --- a/learning_observer/learning_observer/redis_connection.py +++ b/learning_observer/learning_observer/redis_connection.py @@ -16,19 +16,19 @@ pmss.register_field( name='redis_host', - type=pmss.pmsstypes.TYPES.host, + type=pmss.pmsstypes.TYPES.hostname, description='Determine the host for the redis_connection. Defaults to localhost.', default='localhost' ) pmss.register_field( name='redis_port', type=pmss.pmsstypes.TYPES.port, - description='Determine the port for the redis_connection. Defaults to 6389.', - default=6389 + description='Determine the port for the redis_connection. Defaults to 6379.', + default=6379 ) pmss.register_field( name='redis_password', - types=pmss.pmsstypes.TYPES.passwordtoken, + type=pmss.pmsstypes.TYPES.string, description='Password token for connectioning to redis_connection', default=None ) @@ -46,7 +46,8 @@ async def connect(): REDIS_CONNECTION = redis.asyncio.Redis( host=learning_observer.settings.pmss_settings.redis_host(types=['redis_connection']), port=learning_observer.settings.pmss_settings.redis_port(types=['redis_connection']), - password=learning_observer.settings.pmss_settings.redis_password(types=['redis_connection']) + # TODO figure out how to properly use None from pmss + # password=learning_observer.settings.pmss_settings.redis_password(types=['redis_connection']) ) await REDIS_CONNECTION.ping() diff --git a/learning_observer/learning_observer/routes.py b/learning_observer/learning_observer/routes.py index a2c6ed07..b9bdba96 100644 --- a/learning_observer/learning_observer/routes.py +++ b/learning_observer/learning_observer/routes.py @@ -256,7 +256,7 @@ def register_auth_webapp_views(app): print("Typically:") print("{python_src} learning_observer/util/lo_passwd.py " "--username {username} --password {password} " - "--filename learning_obsserver/{fn}".format( + "--filename learning_observer/{fn}".format( python_src=paths.PYTHON_EXECUTABLE, username=getpass.getuser(), password=secrets.token_urlsafe(16), diff --git a/learning_observer/learning_observer/watchdog_observer.py b/learning_observer/learning_observer/watchdog_observer.py index 9e760eb2..010b98b6 100644 --- a/learning_observer/learning_observer/watchdog_observer.py +++ b/learning_observer/learning_observer/watchdog_observer.py @@ -15,11 +15,7 @@ and disabled by ''' -import watchdog - -from watchdog.observers import Observer -from watchdog.events import FileSystemEventHandler - +import asyncio import importlib import os import os.path @@ -27,12 +23,13 @@ import time import logging import traceback +import watchdog from watchdog.observers import Observer -from watchdog.events import LoggingEventHandler +from watchdog.events import FileSystemEventHandler, LoggingEventHandler - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) +# TODO fix this +LOCAL_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) def reimport_child_modules(paths=[LOCAL_PATH]): @@ -110,6 +107,9 @@ def restart(): os.execl(sys.executable, sys.executable, *sys.argv) +FILETYPES_TO_WATCH = ['yaml', 'py', 'js'] + + class RestartHandler(FileSystemEventHandler): ''' Soft restart the server when a file changes. @@ -129,22 +129,23 @@ def on_any_event(self, event): and skipping cache files, but for now we'll restart on any change, since this is helpful for testing this module. ''' - if event.is_directory: + if (event.is_directory or + event.src_path.split('.')[-1] not in FILETYPES_TO_WATCH or + event.event_type != 'modified'): return None - print("Reloading server") - self.shutdown() - # observer.stop() - # observer.join() - self.restart() - # We only make it beyond this point for some of the softer restarts. - self.start() + print("Reloading server", event) + asyncio.run(self.handle_restart()) + + async def handle_restart(self): + await self.shutdown() + await self.restart() def watchdog(handler=LoggingEventHandler()): ''' Set up watchdog mode. This will (eventually) reimport on file changes. ''' - event_handler = LoggingEventHandler() + event_handler = handler observer = Observer() print("Watching for changes in:", LOCAL_PATH) observer.schedule(event_handler, LOCAL_PATH, recursive=True) @@ -152,9 +153,6 @@ def watchdog(handler=LoggingEventHandler()): return observer -# observer = Observer() -# observer.start() - if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', diff --git a/learning_observer/util/stream_writing.py b/learning_observer/util/stream_writing.py index 3a8c515d..8e08e7a2 100644 --- a/learning_observer/util/stream_writing.py +++ b/learning_observer/util/stream_writing.py @@ -27,15 +27,16 @@ extension log events. ''' -import asyncio -import json -import sys - import aiohttp +import asyncio import docopt - +import json import loremipsum import names +import random +import sys +import time + ARGS = docopt.docopt(__doc__) print(ARGS) @@ -93,6 +94,11 @@ def argument_list(argument, default): sys.exit(-1) return list_string +# TODO what is `source_files` supposed to be? +# when running this script for the workshop, we should either +# 1) move gpt3 texts out of writing observer (dependency hell) OR +# 2) avoid using `--gpt3` parameter and use loremipsum instead +source_files = None if ARGS["--gpt3"] is not None: import writing_observer.sample_essays @@ -130,6 +136,9 @@ def argument_list(argument, default): assert len(USERS) == STREAMS, "len(users) != STREAMS." assert len(DOC_IDS) == STREAMS, "len(document IDs) != STREAMS." +def current_millis(): + return round(time.time() * 1000) + def insert(index, text, doc_id): ''' @@ -142,7 +151,8 @@ def insert(index, text, doc_id): "event": "google_docs_save", "source": "org.mitros.writing_analytics", "doc_id": doc_id, - "origin": "stream_test_script" + "origin": "stream_test_script", + "timestamp": current_millis() } @@ -185,7 +195,8 @@ async def stream_document(text, ici, user, doc_id): for char, index in zip(text, range(len(text))): command = insert(index + 1, char, doc_id) await web_socket.send_str(json.dumps(command)) - await asyncio.sleep(float(ici)) + # We probably want something that doesn't go as big and which isn't as close to zero as often. Perhaps weibull with k=1.5? + await asyncio.sleep(random.expovariate(lambd=1/float(ici))) done = True except aiohttp.client_exceptions.ClientConnectorError: print("Failed to connect on " + url) diff --git a/modules/lo_template_module/README.md b/modules/lo_template_module/README.md new file mode 100644 index 00000000..8fe47c05 --- /dev/null +++ b/modules/lo_template_module/README.md @@ -0,0 +1,27 @@ +# Learning Observer Template Module + +This cookiecutter modules should act as a template Learning Obervser module. + +## Create a new module + +To create a new module, run + +```bash +pip install cookiecutter # if not already installed +cd modules/ +cookiecutter lo_template_module/ +``` + +Cookiecutter will prompt you for the necessary information. + +## Install new module + +To install your new module, run + +```bash +pip install -e modules/learning_observer_template/ +``` + +## Helper functions + +This is where I would describe the script that will look for changes, and rebuild/re-install automatically, if I had one. diff --git a/modules/lo_template_module/cookiecutter.json b/modules/lo_template_module/cookiecutter.json new file mode 100644 index 00000000..f6cf702b --- /dev/null +++ b/modules/lo_template_module/cookiecutter.json @@ -0,0 +1,7 @@ +{ + "project_name": "Learning Observer Template", + "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}", + "project_hyphenated": "{{ cookiecutter.project_slug.replace('_', '-') }}", + "project_short_description": "My Learning Observer Module.", + "reducer": "student_event_counter" +} diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/MANIFEST.in b/modules/lo_template_module/{{ cookiecutter.project_slug }}/MANIFEST.in new file mode 100644 index 00000000..b7ece30a --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/MANIFEST.in @@ -0,0 +1 @@ +include {{ cookiecutter.project_slug }}/assets/* diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/README.md b/modules/lo_template_module/{{ cookiecutter.project_slug }}/README.md new file mode 100644 index 00000000..54fdda9c --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/README.md @@ -0,0 +1,140 @@ +# Learning Observer Example Module + +Welcome to the Learning Observer (LO) example module. This document +will detail everything need to create a module for the LO. + +## packaage structure + +```bash +module/ + {{ cookiecutter.project_slug }}/ + assets/ + ... + helpers/ + additional_script.py + module.py + reducers.py + dash_dashboards.py + MANIFEST.in + setup.py + setup.cfg +``` + +### setup.py + +This is a standard `setup.py` file. + +### setup.cfg + +Notice we include the following items in our `setup.cfg` file. + +```cfg +[options.entry_points] +lo_modules = + {{ cookiecutter.project_slug }} = {{ cookiecutter.project_slug }}.module + +[options.package_data] +{{ cookiecutter.project_slug }} = helpers/* +``` + +The `lo_modules` entry point tells Learning Observer to treat `{{ cookiecutter.project_slug }}.module` as a pluggable application. + +The package data section is where we include additional directories we want included in the build. + +### MANIFEST.in + +The manifest specifies which files to include during Python packaging. This specifies the additional non-python files we want included. If you do not have additional files needed, this file is unnecessary. + +For modules with Dash-made dashboards, this will typically include a relative path to the assets folder. + +### module.py + +This file defines everything about the module. See the dedicated section below. + +## Defining a module (module.py) + +Modules can include a variety items. This will cover each item and its purpose on the system. + +### NAME + +This one is pretty self explanatory. Give the module a short name to refer to it by. + +### EXECUTION_DAG + +The execution directed acyclic graph (DAG) is how we interact with the communication protocol. + +See `{{ cookiecutter.project_slug }}/module.py:EXECUTION_DAG` for a detailed example. + +### REDUCERS + +Reducers to define on the system. These are functions that will run over incoming events from students. + +See `{{ cookiecutter.project_slug }}/module.py:REDUCERS` for a detailed example. + +### DASH_PAGES + +Dashboards built using the Dash framework should be defined here. + +See `{{ cookiecutter.project_slug }}/module.py:DASH_PAGES` for a detailed example. + +### COURSE_DASHBOARDS + +The registered course dashboards are provided to the users for navigating around dashboards, such as on their Home screen. + +See `{{ cookiecutter.project_slug }}/module.py:COURSE_DASHBOARDS` for a detailed example. + +Note that the student counterpart, `STUDENT_DASHBOARDS`, exists. + +### THIRD_PARTY + +The third party items are downloaded and included when serving items from the module. This is usually used for including extra Javascript or CSS files. + +```python +THIRD_PARTY = { + 'name_of_item': { + 'url': 'url_to_third_party_tool', + 'hash': 'hash_of_download_OR_dict_of_versions_and_hashes' + } +} +``` + +### STATIC_FILE_GIT_REPOS + +We're still figuring this out, but we'd like to support hosting static files from the git repo of the module. +This allows us to have a Merkle-tree style record of which version is deployed in our log files. + +A common use case for this is serving static `.html` and `.js` files for your module. + +```python +STATIC_FILE_GIT_REPOS = { + 'repo_name': { + 'url': 'url_to_repo', + 'prefix': 'relative/path/to/directory', + # Branches we serve. This can either be a whitelist (e.g. which ones + # are available) or a blacklist (e.g. which ones are blocked) + 'whitelist': ['master'] + } +} +``` + +### EXTRA_VIEWS + +These are extra views to publish to the user. Currently, we only support `.json` files. + +```python +EXTRA_VIEWS = [{ + 'name': 'Name of view', + 'suburl': 'view-suburl', + 'static_json': python_dictionary_to_return +}] +``` + +## Creating a reducer (reducers.py) + +Reducers are ran over incoming student events. They can be defined using a decorator in the `learning_observer.stream_analytics` module. + +Each reducer should take the incoming `event` and the previous `internal_state` as parameters and return 2 new state objects. + +## Creating dashboards with Dash (dash_dashboard.py) + +Dash pages consist of a layout and callback functions. See `dash_dashboard.py` for a more detailed overview. diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/setup.cfg b/modules/lo_template_module/{{ cookiecutter.project_slug }}/setup.cfg new file mode 100644 index 00000000..f3017da7 --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/setup.cfg @@ -0,0 +1,10 @@ +[metadata] +name = {{ cookiecutter.project_name }} +description = Use this as a base template for creating new modules on the Learning Observer. + +[options] +packages = {{ cookiecutter.project_slug }} + +[options.entry_points] +lo_modules = + {{ cookiecutter.project_slug }} = {{ cookiecutter.project_slug }}.module diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/setup.py b/modules/lo_template_module/{{ cookiecutter.project_slug }}/setup.py new file mode 100644 index 00000000..3fcd10cf --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/setup.py @@ -0,0 +1,14 @@ +''' +Install script. Everything is handled in setup.cfg + +To set up locally for development, run `python setup.py develop`, in a +virtualenv, preferably. +''' +from setuptools import setup + +setup( + name="{{ cookiecutter.project_slug }}", + package_data={ + '{{ cookiecutter.project_slug }}': ['assets/*'], + } +) diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/__init__.py b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/assets/scripts.js b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/assets/scripts.js new file mode 100644 index 00000000..f0f91912 --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/assets/scripts.js @@ -0,0 +1,102 @@ +/** + * Javascript callbacks to be used with the LO Example dashboard + */ + +// Initialize the `dash_clientside` object if it doesn't exist +if (!window.dash_clientside) { + window.dash_clientside = {}; +} + +window.dash_clientside.{{ cookiecutter.project_slug }} = { + /** + * Send updated queries to the communication protocol. + * @param {object} wsReadyState LOConnection status object + * @param {string} urlHash query string from hash for determining course id + * @returns stringified json object that is sent to the communication protocl + */ + sendToLOConnection: async function (wsReadyState, urlHash) { + if (wsReadyState === undefined) { + return window.dash_clientside.no_update + } + if (wsReadyState.readyState === 1) { + if (urlHash.length === 0) { return window.dash_clientside.no_update } + const decodedParams = decode_string_dict(urlHash.slice(1)) + if (!decodedParams.course_id) { return window.dash_clientside.no_update } + const outgoingMessage = { + {{ cookiecutter.project_slug }}_query: { + execution_dag: '{{ cookiecutter.project_slug }}', + target_exports: ['{{ cookiecutter.reducer }}_export'], + kwargs: decodedParams + } + }; + return JSON.stringify(outgoingMessage); + } + return window.dash_clientside.no_update; + }, + + /** + * Process a message from LOConnection + * @param {object} incomingMessage object received from LOConnection + * @returns parsed data to local storage + */ + receiveWSMessage: async function (incomingMessage) { + // TODO the naming here is broken serverside. Notice above we + // called the target export `{{ cookiecutter.reducer }}_export`, i.e. the named + // export. Below, we need to call `{{ cookiecutter.project_slug }}_join_roster`, i.e. the name + // of the node. This ought to be cleaned up in the communication protocl. + const messageData = JSON.parse(incomingMessage.data).{{ cookiecutter.project_slug }}_query.{{ cookiecutter.reducer }}_join_roster || []; + if (messageData.error !== undefined) { + console.error('Error received from server', messageData.error); + return []; + } + return messageData; + }, + + /** + * Build the student UI components based on the stored websocket data + * @param {*} wsStorageData information stored in the websocket store + * @returns Dash object to be displayed on page + */ + populateOutput: function(wsStorageData) { + if (!wsStorageData) { + return 'No students'; + } + let output = [] + // Iterate over students and create UI items for each + for (const student of wsStorageData) { + + // We define Dash components in JS via a dictionary + // of where the component lives, what it is, and any + // parameters we want to pass along to it. + // - `namespace`: the module the component is in + // - `type`: the component to use + // - `props`: any parameters the component expects + // The following produces a LONameTag and Span wrapped in a Div + studentBadge = { + namespace: 'dash_html_components', + type: 'Div', + props: { + children: [{ + namespace: 'lo_dash_react_components', + props: { + profile: student.profile, + className: 'student-name-tag d-inline-block', + includeName: true, + id: `${student.user_id}-activity-img` + }, + type: 'LONameTag' + },{ + namespace: 'dash_html_components', + props: { + children: ` - ${student.count} events`, + }, + type: 'Span' + + }] + } + } + output = output.concat(studentBadge) + } + return output; + } +} diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/dash_dashboard.py b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/dash_dashboard.py new file mode 100644 index 00000000..ee7131c8 --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/dash_dashboard.py @@ -0,0 +1,90 @@ +''' +This file will detail how to build a dashboard using +the Dash framework. + +If you are unfamiliar with Dash, it compiles python code +to react and serves it via a Flask server. You can register +callbacks to run when specific states change. Normal callbacks +execute Python code server side, but Clientside callbacks +execute Javascript code client side. Clientside functions are +preferred as it cuts down server and network resources. +''' +from dash import html, dcc, callback, clientside_callback, ClientsideFunction, Output, Input +import dash_bootstrap_components as dbc +import lo_dash_react_components as lodrc + + +_prefix = '{{ cookiecutter.project_hyphenated }}' +_namespace = '{{ cookiecutter.project_slug }}' +_websocket = f'{_prefix}-websocket' +_websocket_storage = f'{_prefix}-websocket-store' +_output = f'{_prefix}-output' + +def layout(): + ''' + Function to define the page's layout. + ''' + page_layout = html.Div(children=[ + html.H1(children='{{ cookiecutter.project_name }}'), + dbc.InputGroup([ + dbc.InputGroupText(lodrc.LOConnectionStatusAIO(aio_id=_websocket)), + lodrc.ProfileSidebarAIO(class_name='rounded-0 rounded-end', color='secondary'), + ]), + dcc.Store(id=_websocket_storage), + html.H2('Output from reducers'), + html.Div(id=_output) + ]) + return page_layout + +# Send the initial state based on the url hash to LO. +# If this is not included, nothing will be returned from +# the communication protocol. +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='sendToLOConnection'), + Output(lodrc.LOConnectionStatusAIO.ids.websocket(_websocket), 'send'), + Input(lodrc.LOConnectionStatusAIO.ids.websocket(_websocket), 'state'), # used for initial setup + Input('_pages_location', 'hash') +) + +# Handle receiving a message from the websocket. +# This step will parse the message and update the +# local storage accordingly. +clientside_callback( + ClientsideFunction(namespace=_namespace, function_name='receiveWSMessage'), + Output(_websocket_storage, 'data'), + Input(lodrc.LOConnectionStatusAIO.ids.websocket(_websocket), 'message'), + prevent_initial_call=True +) + +# Build the UI based on what we've received from the +# communicaton protocol +# This clientside callback and the serverside callback below are +# the same +# clientside_callback( +# ClientsideFunction(namespace=_namespace, function_name='populateOutput'), +# Output(_output, 'children'), +# Input(_websocket_storage, 'data'), +# ) + + +@callback( + Output(_output, 'children'), + Input(_websocket_storage, 'data'), +) +def populate_output(data): + '''This method creates UI components for each student found + in the websocket's storage. + + This will use more network traffic and server resources + than using the equivalent clientside callback, `populateOutput`. + ''' + if not data: + return 'No students' + output = [html.Div([ + lodrc.LONameTag( + profile=s['profile'], className='d-inline-block student-name-tag', + includeName=True, id=f'{s["user_id"]}-name-tag' + ), + html.Span(f' - {s["count"]} events') + ]) for s in data] + return output diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/module.py b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/module.py new file mode 100644 index 00000000..4ca427f6 --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/module.py @@ -0,0 +1,97 @@ +''' +{{ cookiecutter.project_name }} + +{{ cookiecutter.project_short_description }} +''' +import learning_observer.downloads as d +import learning_observer.communication_protocol.util +from learning_observer.dash_integration import thirdparty_url, static_url +from learning_observer.stream_analytics.helpers import KeyField, Scope + +import {{ cookiecutter.project_slug }}.reducers +import {{ cookiecutter.project_slug }}.dash_dashboard + +# Name for the module +NAME = '{{ cookiecutter.project_name }}' + +''' +Define execution DAGs for this module. We provide a default DAG +for fetching information from the provided reducer. The internal +structure looks like: + +`execution_dag`: defined directed acyclic graph (DAG) for querying data + : q.select() # or some other communication protocol query +`exports`: fetchable nodes from the execution dag + : { + "returns": , + "parameters": ["list", "of", "parameters", "needed"] + } + +NOTE interfacing with the communication protocol may change, +the current flow is the first iteration. We will mark where things +ought to be improved. +''' +EXECUTION_DAG = learning_observer.communication_protocol.util.generate_base_dag_for_student_reducer('{{ cookiecutter.reducer }}', '{{ cookiecutter.project_slug }}') + +''' +Add reducers to the module. + +`context`: TODO +`scope`: the granularity of event (by student, by student + document, etc) +`function`: the reducer function to run +`default` (optional): initial value to start with +''' +REDUCERS = [ + { + 'context': 'org.mitros.writing_analytics', + # TODO scope is defined as a decorator on the function, why is + # is also defined here? + 'scope': Scope([KeyField.STUDENT]), + 'function': {{ cookiecutter.project_slug }}.reducers.{{ cookiecutter.reducer }}, + 'default': {'count': 0} + } +] + +''' +Define pages created with Dash. +''' +DASH_PAGES = [ + { + 'MODULE': {{ cookiecutter.project_slug }}.dash_dashboard, + 'LAYOUT': {{ cookiecutter.project_slug }}.dash_dashboard.layout, + 'ASSETS': 'assets', + 'TITLE': '{{ cookiecutter.project_name }}', + 'DESCRIPTION': '{{ cookiecutter.project_short_description }}', + 'SUBPATH': '{{ cookiecutter.project_hyphenated }}', + 'CSS': [ + thirdparty_url("css/fontawesome_all.css") + ], + 'SCRIPTS': [ + static_url("liblo.js") + ] + } +] + +''' +Additional files we want included that come from a third part. +''' +THIRD_PARTY = { + "css/fontawesome_all.css": d.FONTAWESOME_CSS, + "webfonts/fa-solid-900.woff2": d.FONTAWESOME_WOFF2, + "webfonts/fa-solid-900.ttf": d.FONTAWESOME_TTF +} + +''' +The Course Dashboards are used to populate the modules +on the home screen. + +Note the icon uses Font Awesome v5 +''' +COURSE_DASHBOARDS = [{ + 'name': NAME, + 'url': "/{{ cookiecutter.project_slug }}/dash/{{ cookiecutter.project_hyphenated }}", + "icon": { + "type": "fas", + "icon": "fa-play-circle" + } +}] diff --git a/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/reducers.py b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/reducers.py new file mode 100644 index 00000000..070567bd --- /dev/null +++ b/modules/lo_template_module/{{ cookiecutter.project_slug }}/{{ cookiecutter.project_slug }}/reducers.py @@ -0,0 +1,16 @@ +''' +This file defines reducers we wish to add to the incoming event +pipeline. The `learning_observer.stream_analytics` package includes +helper functions for Scoping the and setting the null state. +''' +from learning_observer.stream_analytics.helpers import student_event_reducer + + +@student_event_reducer(null_state={"count": 0}) +async def {{ cookiecutter.reducer }}(event, internal_state): + ''' + An example of a per-student event counter + ''' + state = {"count": internal_state.get('count', 0) + 1} + + return state, state diff --git a/modules/toy-assess/README.md b/modules/toy-assess/README.md new file mode 100644 index 00000000..1b05a709 --- /dev/null +++ b/modules/toy-assess/README.md @@ -0,0 +1,63 @@ +This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app). + +## Setup +If you haven't yet, run: +npm install next@latest + +This may require also running +npm audit fix + +You will also need to install writing_observer and package up lo_event to use inside the project. +Place to clone from: https://github.com/ETS-Next-Gen/writing_observer +cd to cloned directory, type npm install + +Then, inside writing_observer/modules/lo_event: +npm install redux +npm install redux-thunk + +Once you have writing observer setup, go back to the toysba directory and do something like the following: + +npm install ../writing_observer/modules/lo_event +npm i @azure/openai@1.0.0-beta.7 +npm install formdata-node + +then set the following environment variables: +OPENAI_API_RESOURCE +OPENAI_DEPLOYMENT_ID +OPENAI_API_KEY + +This assumes that writing_observer is installed in the same directory as toy-sba. + +## Getting Started +First, run the development server: + +```bash +npm run dev +# or +yarn dev +# or +pnpm dev +# or +bun dev +``` + +Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. + +You can start editing the page by modifying `app/page.js`. The page auto-updates as you edit the file. + +This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font. + +## Learn More + +To learn more about Next.js, take a look at the following resources: + +- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. +- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. + +You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome! + +## Deploy on Vercel + +The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. + +Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details. diff --git a/modules/toy-assess/jsconfig.json b/modules/toy-assess/jsconfig.json new file mode 100644 index 00000000..b8d6842d --- /dev/null +++ b/modules/toy-assess/jsconfig.json @@ -0,0 +1,7 @@ +{ + "compilerOptions": { + "paths": { + "@/*": ["./src/*"] + } + } +} diff --git a/modules/toy-assess/next.config.js b/modules/toy-assess/next.config.js new file mode 100644 index 00000000..8f4add3c --- /dev/null +++ b/modules/toy-assess/next.config.js @@ -0,0 +1,19 @@ +/** @type {import('next').NextConfig} */ +const nextConfig = { + webpack: ( + config, + { buildId, dev, isServer, defaultLoaders, nextRuntime, webpack } + ) => { + // Important: return the modified config + config.externals.unshift( + { + sqlite3: 'sqlite3', + crypto: 'crypto', + ws: 'ws', + 'indexeddb-js': 'indexeddb-js' + }) + return config + }, +} + +module.exports = nextConfig diff --git a/modules/toy-assess/package.json b/modules/toy-assess/package.json new file mode 100644 index 00000000..b4589688 --- /dev/null +++ b/modules/toy-assess/package.json @@ -0,0 +1,42 @@ +{ + "name": "toy-assess", + "version": "0.1.0", + "private": true, + "scripts": { + "dev": "next dev", + "build": "next build", + "start": "next start", + "lint": "next lint" + }, + "dependencies": { + "@azure/openai": "^1.0.0-beta.7", + "@fortawesome/free-solid-svg-icons": "^6.5.0", + "@fortawesome/react-fontawesome": "^0.2.0", + "amdefine": "^1.0.1", + "bufferutil": "^4.0.8", + "indexeddb-js": "^0.0.14", + "jasmine": "^5.1.0", + "mapbox": "^1.0.0-beta10", + "mock-aws-s3": "^4.0.2", + "next": "13.5.5", + "react": "^18", + "react-dom": "^18", + "react-markdown": "^9.0.1", + "react-redux": "^8.1.3", + "redux": "^4.2.1", + "redux-thunk": "^2.4.2", + "utf-8-validate": "^6.0.3" + }, + "devDependencies": { + "@mapbox/node-pre-gyp": "^1.0.11", + "autoprefixer": "^10", + "better-react-mathjax": "^2.0.3", + "eslint": "^8", + "eslint-config-next": "13.5.5", + "mathjs": "^12.3.0", + "postcss": "^8", + "sqlite3": "^5.1.6", + "tailwindcss": "^3", + "ws": "^8.14.2" + } +} diff --git a/modules/toy-assess/postcss.config.js b/modules/toy-assess/postcss.config.js new file mode 100644 index 00000000..33ad091d --- /dev/null +++ b/modules/toy-assess/postcss.config.js @@ -0,0 +1,6 @@ +module.exports = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/modules/toy-assess/src/app/api/llm/route.js b/modules/toy-assess/src/app/api/llm/route.js new file mode 100644 index 00000000..48ac4217 --- /dev/null +++ b/modules/toy-assess/src/app/api/llm/route.js @@ -0,0 +1,64 @@ +/* + This is an API for calling LLMs. +*/ + +import { NextResponse, NextRequest } from 'next/server'; +import * as openai from '../../lib/azureInterface'; +import * as stub from '../../lib/stubInterface'; + +const listChatCompletions = openai.listChatCompletions; + +const default_messages = [ + { role: "system", content: "I am your writing coach. How can I help you?" }, + { role: "user", content: "Hi, how are you?"}, +]; + +async function processPrompt(prompt) { + return await listChatCompletions( + [ + { role: "system", content: "I am your writing coach. How can I help you?" }, + { role: "user", content: prompt }, + ], + {} + ); +} + +export async function GET(request) { + console.log('GET request called'); + const prompt = request.nextUrl.searchParams.get('prompt') || "How are you?"; + const jsonResponse = await processPrompt(prompt); + return NextResponse.json({'response': jsonResponse}); +} + +// Handles POST requests +export async function POST(request) { + console.log('POST request called'); + const req = await request.json(); + + const prompt = req?.prompt || "How are you?"; + const jsonResponse = await processPrompt(prompt); + return NextResponse.json({'response': jsonResponse}); +} + +/*export async function GET(request) { + const messages = request.nextUrl.searchParams.get('messages'); + const temperature = request.nextUrl.searchParams.get('temperature'); + console.log(messages, temperature); + const jsonResponse = await listChatCompletions(messages, {temperature}); + return NextResponse.json(jsonResponse); +} + +// Handles POST requests +export async function POST(request) { + const req = await request.json(); + const messages = req?.messages || default_messages; + const temperature = req?.temperature || default_temperature; + console.log(messages, temperature); + + const jsonResponse = await listChatCompletions( + messages, + {temperature} + ); + return NextResponse.json(jsonResponse); +} +*/ diff --git a/modules/toy-assess/src/app/base-style.css b/modules/toy-assess/src/app/base-style.css new file mode 100644 index 00000000..d86044e4 --- /dev/null +++ b/modules/toy-assess/src/app/base-style.css @@ -0,0 +1,6 @@ +body { + font-family: 'Arial', sans-serif; + background-color: #f8f9fa; + margin: 0; + padding: 0; +} diff --git a/modules/toy-assess/src/app/base_components.js b/modules/toy-assess/src/app/base_components.js new file mode 100644 index 00000000..e5dc4fae --- /dev/null +++ b/modules/toy-assess/src/app/base_components.js @@ -0,0 +1,130 @@ +// Basic components. This should depend on no other component files. + +import React from 'react'; + +import { useEffect } from 'react'; +import { useDispatch } from 'react-redux'; + +import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; +import { faEdit, faAngleDown, faAngleRight, faExclamationTriangle, faQuestionCircle, fasQuestionCircle, faTimes } from '@fortawesome/free-solid-svg-icons'; +import { faCheckCircle, faTimesCircle, faDotCircle } from '@fortawesome/free-solid-svg-icons'; + +import * as lo_event from 'lo_event'; +import * as reduxLogger from 'lo_event/lo_event/reduxLogger.js'; + +import { useComponentSelector, useSettingSelector } from './utils.js'; +import { library } from '@fortawesome/fontawesome-svg-core'; + +// Debug log function. This should perhaps go away / change / DRY eventually. +const DEBUG = false; +const dclog = (...args) => {if(DEBUG) {console.log.apply(console, Array.from(args));} }; + +library.add(faCheckCircle, faDotCircle, faTimesCircle, faQuestionCircle); + +// Pure CSS spinner taken from public domain spinners at loading.io/css +// https://github.com/loadingio/css-spinner/blob/master/README.md +export const Spinner = () => ( +
+
+
+
+
+); + + +export function Button( {...props} ) { + const className = props.className ?? "blue-button"; + return + ); +} + + +export function List({ component: Component, count, id, componentProps }) { + return ( +
+ {Array.from(Array(count)).map((_, index) => ( + + ))} +
+ ); +} + +// Rotating triangle for hiding / showing e.g. accordion regions +export function ShowHideToggle({visible}) { + return (