diff --git a/GODEL/server.py b/GODEL/server.py index 111aaefd..14a2bca6 100644 --- a/GODEL/server.py +++ b/GODEL/server.py @@ -11,6 +11,7 @@ AutoConfig, AutoModelForSeq2SeqLM, AutoTokenizer, + AutoModelForCausalLM ) @@ -47,7 +48,7 @@ def main(): global model, tokenizer, args config = AutoConfig.from_pretrained(args.model_name_or_path) - model = AutoModelForSeq2SeqLM.from_pretrained( + model = AutoModelForCausalLM.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, diff --git a/README.md b/README.md index 68e803b7..c3168848 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,10 @@ python generate.py --model_name_or_path ${MODEL_PATH} \ We provide a demo interface to chat with finetuned models. The backend server is based on *flask* and the interface is based on *vue*, *bootstrap-vue*, and *BasicVueChat*. +Simple jupiter notebook: + +[Notebook](https://github.com/AlexWortega/GODEL/blob/main/jupiters/godel_faststart.ipynb) + Start the backend server: ```bash # Please create the backend server refering to e.g., dstc9_server.py diff --git a/jupiters/godel_faststart.ipynb b/jupiters/godel_faststart.ipynb new file mode 100644 index 00000000..f6ac0c15 --- /dev/null +++ b/jupiters/godel_faststart.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# download model" + ], + "metadata": { + "id": "Rzfk_BRio6AA" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "\n", + "!wget https://bapengstorage.blob.core.windows.net/fileshare/GODEL-XLarge.tar.gz\n", + "!tar -zxvf GODEL-XLarge.tar.gz" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2022-06-28 06:44:41-- https://bapengstorage.blob.core.windows.net/fileshare/GODEL-XLarge.tar.gz\n", + "Resolving bapengstorage.blob.core.windows.net (bapengstorage.blob.core.windows.net)... 13.66.176.16\n", + "Connecting to bapengstorage.blob.core.windows.net (bapengstorage.blob.core.windows.net)|13.66.176.16|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 19643503525 (18G) [application/gzip]\n", + "Saving to: ‘GODEL-XLarge.tar.gz’\n", + "\n", + "GODEL-XLarge.tar.gz 100%[===================>] 18.29G 23.6MB/s in 11m 20s \n", + "\n", + "2022-06-28 06:56:01 (27.6 MB/s) - ‘GODEL-XLarge.tar.gz’ saved [19643503525/19643503525]\n", + "\n" + ] + } + ], + "metadata": { + "id": "7djvb_LH185s", + "outputId": "c56159f6-419a-4e56-948e-ad5127fa4174", + "colab": { + "base_uri": "https://localhost:8080/" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# install libs" + ], + "metadata": { + "id": "lDf27O9uo7Lh" + } + }, + { + "cell_type": "code", + "source": [ + "# installs\n", + "!pip install transformers\n", + "!pip install dotmap" + ], + "metadata": { + "id": "9koyYG4wo1k7" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# inference" + ], + "metadata": { + "id": "2T8Sz48_o9eJ" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "import torch\n", + "import numpy as np\n", + "import dotmap\n", + "\n", + "from transformers import (\n", + " AutoConfig,\n", + " AutoModelForSeq2SeqLM,\n", + " AutoTokenizer,\n", + " AutoModelForCausalLM\n", + ")\n", + "\n", + "\n", + "def set_seed(args):\n", + " np.random.seed(args.seed)\n", + " torch.manual_seed(args.seed)\n", + " if args.n_gpu > 0:\n", + " torch.cuda.manual_seed_all(args.seed)\n", + "\n", + "\n", + "model = None\n", + "tokenizer = None\n", + "args = dotmap.DotMap()\n", + "args.model_name_or_path = 'GODEL-XLarge'\n", + "args.prompt = ''\n", + "args.padding_text = ''\n", + "args.length = 128\n", + "args.num_samples = 1\n", + "args.temperature = 1\n", + "args.num_beams = 5\n", + "args.repetition_penalty = 1\n", + "args.top_k = 0\n", + "args.top_p = 0.5\n", + "args.no_cuda = False\n", + "args.seed = 2022\n", + "args.stop_token = '<|endoftext|>'\n", + "args.n_gpu = 1\n", + "args.device = 'cuda:5'\n", + "\n", + "set_seed(args)\n", + "\n", + "#config = AutoConfig.from_pretrained(args.model_name_or_path)\n", + "model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path)\n", + "\n", + "model = model.half().to(args.device)\n", + "tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=not args.use_slow_tokenizer)\n", + "\n", + "\n" + ], + "outputs": [], + "metadata": { + "id": "_VXW9Y3p185v" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "\n", + "\n", + " \n", + "\n", + "\n", + "def generate(context, knowledge):\n", + " \n", + " \n", + " \n", + " context = ' EOS '.join(context)\n", + "\n", + " input_ids = tokenizer(context + ' <|knowledge|> ' + knowledge +\n", + " ' =>', return_tensors=\"pt\").input_ids.to(args.device)\n", + " gen_kwargs = {\n", + " # 'num_beams': args.num_beams,\n", + " 'max_length': args.length,\n", + " 'min_length': 32,\n", + " 'top_k': 2000,\n", + " 'no_repeat_ngram_size': 4\n", + "\n", + " }\n", + "\n", + " output_sequences = model.generate(input_ids, **gen_kwargs)\n", + " output_sequences = tokenizer.batch_decode(\n", + " output_sequences, skip_special_tokens=True)\n", + "\n", + " return output_sequences[0].split('=>')[1]" + ], + "outputs": [], + "metadata": { + "id": "PDlug971185w" + } + }, + { + "cell_type": "code", + "execution_count": null, + "source": [ + "context = ['User: Hi']\n", + "knowledge = 'I am searching for new knoweledge.'\n", + "generate(context,knowledge )" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "' Do you have any interesting things to tell me?'" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "metadata": { + "id": "tGToZQh9185x", + "outputId": "0f247f66-f0f4-44de-f83e-717024a72005" + } + } + ], + "metadata": { + "orig_nbformat": 4, + "language_info": { + "name": "python", + "version": "3.8.10", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.8.10 64-bit" + }, + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + }, + "colab": { + "name": "GODEL", + "provenance": [], + "machine_shape": "hm", + "collapsed_sections": [] + }, + "accelerator": "GPU", + "gpuClass": "standard" + }, + "nbformat": 4, + "nbformat_minor": 0 +}