Commit 31782c08 authored by remivantrijp's avatar remivantrijp

git subrepo clone https://github.com/SonyCSLParis/fcg-hybrids.git systems/fcg-hybrids

subrepo:
  subdir:   "systems/fcg-hybrids"
  merged:   "be0aa7e"
upstream:
  origin:   "https://github.com/SonyCSLParis/fcg-hybrids.git"
  branch:   "master"
  commit:   "be0aa7e"
git-subrepo:
  version:  "0.4.0"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "5d6aba9"
parent a15e85ca
; DO NOT EDIT (unless you know what you are doing)
;
; This subdirectory is a git "subrepo", and this file is maintained by the
; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
;
[subrepo]
remote = https://github.com/SonyCSLParis/fcg-hybrids.git
branch = master
commit = be0aa7e9284606b328da2a4f32788df7489e770c
parent = a15e85ca715b279d53125dd938d0fdae9c30f6c6
method = merge
cmdver = 0.4.0
This diff is collapsed.
# fcg-hybrids
Library for Fluid Construction Grammar that allows the implementation of FCG grammars that interface with other NLP tools.
Requires the Babel software suite (Open Source access: https://emergent-languages.org/), including its module "nlp-tools".
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
;;; Data structures.
;;; -------------------------------------------------------------------------------------
(export '(word-dependency-spec
word-dependency-spec-string word-dependency-spec-syn-role word-dependency-spec-unit-name
word-dependency-spec-pos-tag word-dependency-spec-node-id word-dependency-spec-head-id))
(defstruct word-dependency-spec string syn-role unit-name pos-tag node-id head-id)
;;; Helper functions.
;;; -------------------------------------------------------------------------------------
(export '(fcg-get-dependency-conversion-table fcg-set-dependency-conversion-table))
(defgeneric retrieve-category-from-conversion-table (category-name conversion-table))
(defmethod retrieve-category-from-conversion-table ((category-name t)
(conversion-table list))
(second (assoc category-name conversion-table)
:test #'equal))
(defun fcg-get-dependency-conversion-table (&optional cxn-inventory)
(or (when cxn-inventory (get-configuration cxn-inventory :dependency-conversion-table))
t)) ;; At least always return T.
(defun fcg-set-dependency-conversion-table (cxn-inventory data)
(set-configuration cxn-inventory :dependency-conversion-table data))
(defun calculate-boundaries-and-form-constraints (base-transient-structure
unit-tree
&optional (cxn-inventory *fcg-constructions*))
"When the tree is translated, update the boundaries and form constraints."
(let* ((strings (fcg-extract-selected-form-constraints base-transient-structure '(string)))
(temp-node (make-instance 'cip-node
:construction-inventory cxn-inventory
:car (make-cxn-application-result
:resulting-cfs
(make-instance 'coupled-feature-structure
:left-pole (append
(left-pole-structure
base-transient-structure)
unit-tree)))))
(old-boundaries (fcg::fcg-get-boundaries base-transient-structure))
(new-boundaries (fcg::update-list-of-boundaries old-boundaries temp-node))
(new-form-constraints (infer-all-constraints-from-boundaries
new-boundaries
(get-updating-references cxn-inventory)
(fcg-get-transient-unit-structure temp-node))))
`(root
(boundaries ,new-boundaries)
(form ,(append strings new-form-constraints)))))
This diff is collapsed.
This diff is collapsed.
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
;; Basic de-render that builds on :de-render-with-scope
;; Does not assume a hybrid.
;; ------------------------------------------------------------------------------------------------------------
(defmethod de-render ((utterance list) (mode (eql :english-de-render)) &key cxn-inventory &allow-other-keys)
"De-render based on :de-render-with-scope but does some post-processing."
(let ((transient-structure
(de-render utterance :de-render-with-scope :cxn-inventory cxn-inventory)))
;; Check for unknown strings.
(set-data transient-structure :unknown-strings nil)
(loop for string in (reverse utterance)
unless (or (word-in-dictionary-p string cxn-inventory)
(find string (get-data transient-structure :unknown-strings) :test #'equalp))
;;no lex cxn existing
do (set-data transient-structure :unknown-strings
(cons string (get-data transient-structure :unknown-strings))))
;; Keep the utterance.
(set-data transient-structure :utterance utterance)
;; We return the transient structure:
transient-structure))
(defmethod de-render ((utterance string) (mode (eql :english-de-render)) &key cxn-inventory &allow-other-keys)
"De-renders just like de-render-with-scope, but uses English tokenizer to split the utterance into tokens."
(declare (ignorable mode))
(de-render (tokenize-english-sentence utterance) :english-de-render :cxn-inventory cxn-inventory))
;;; (defmethod de-render ((utterance list) (mode (eql :english-de-render)) &key cxn-inventory &allow-other-keys)
;;; "Takes a list of one string and de-renders the string."
;;; (assert (single-solution-p utterance))
;;; (de-render (first utterance) mode :cxn-inventory cxn-inventory))
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;;; DE-RENDER methods for the FCG hybrid.
;;; Uses the translation functions in /hybrids/semantic-dependency-parser.lisp
;;; -----------------------------------------------------------------------------------
;;; 1. Get a syntactic dependency tree from Penelope.
;;; 2. Make the following modifications to the analysis:
;;; (a) Reconstruct adjectival compounds
;;; e.g. Re-compose ("home" "-" "brewn") into the string "home-brewn"
;;; (b) Treat named entities as single strings
;;; e.g. ("Barack" "Obama") -> "Barack Obama"
;;; (c) Treat compound nouns as single strings
;;; e.g. ("climate" "change") -> "climate change"
;;;
;;; Each time, we take the information from the last string as the information to
;;; keep in the analysis, and we throw away what was found for the preceding strings.
;;;
;;; 3. We take the list of strings that we extract from the modified dependency analysis.
;;; It is important to pass the LIST of strings instead of passing the sentence as a
;;; string, because this avoids the next method to perform tokenization again and thereby
;;; create inconsistencies in the analysis and translation.
;;; The list of strings is passed to the de-render method that specializes on
;;; :english-de-render. This de-render method will give us a basic transient structure that
;;; has all the form constraints and that has a root-unit filled in. It also adds several
;;; things to the data of the transient structure, such as the :utterance and :unknown-strings.
;;; Next, we call the translation function that populates the basic transient structure with
;;; additional units based on the modified dependency analysis.
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(defparameter *punctuation-list* (loop for char in (coerce ".?! ,:; ()[]{}'`<>$^&*-\/#$\"" 'list)
collect (format nil "~a" char))
"List of what counts as a punctuation character.")
(defun punct-p (string)
(find string *punctuation-list* :test #'string=))
;; (punct-p "?")
;; (punct-p "a")
(defun remove-punctuation-from-list-of-strings (list-of-strings)
(remove-if #'punct-p list-of-strings))
;; (remove-punctuation-from-list-of-strings '("John" "," "my" "friend" "," "where" "are" "you" "?"))
(defmethod de-render ((utterance string) (mode (eql :english-with-dependency-parser))
&key cxn-inventory &allow-other-keys)
(declare (ignorable mode))
(unless cxn-inventory (setf cxn-inventory *fcg-constructions*))
(multiple-value-bind (syntactic-analysis utterance-as-list)
(preprocess-using-dependency-tree utterance
:preprocessing-steps (list #'dependency-string-append-compounds-in-np
#'dependency-string-append-named-entities
#'dependency-string-append-compounds
#'dependency-string-promote-adverbs-in-np
#'dependency-remove-punct)
:cxn-inventory cxn-inventory)
(let ((basic-transient-structure (de-render
;; We remove punctuation for the time being.
(remove-punctuation-from-list-of-strings utterance-as-list)
:english-de-render :cxn-inventory cxn-inventory)))
(set-data basic-transient-structure :pos-tags
(loop for constituent in syntactic-analysis
collect (list (nlp-tools::dp-get-token constituent)
(nlp-tools::dp-get-tag constituent))))
(set-data basic-transient-structure :dependency-analysis syntactic-analysis)
(translate-dependency-tree basic-transient-structure syntactic-analysis *dependency-specs*))))
(export '(translate-and-show))
(defun translate-and-show (utterance &key (mode :english-with-dependency-parser))
(let ((transient-structure (show-translated-sentence utterance mode)))
(pprint (get-data transient-structure :dependency-analysis))
transient-structure))
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;;; DE-RENDER methods for the FCG hybrid.
;;; Uses the translation functions in /hybrids/semantic-dependency-parser.lisp
;;; -----------------------------------------------------------------------------------
;;; 1. Get a syntactic dependency tree from Penelope.
;;; 2. Make the following modifications to the analysis:
;;; (a) Reconstruct adjectival compounds
;;; e.g. Re-compose ("home" "-" "brewn") into the string "home-brewn"
;;; (b) Treat named entities as single strings
;;; e.g. ("Barack" "Obama") -> "Barack Obama"
;;; (c) Treat compound nouns as single strings
;;; e.g. ("climate" "change") -> "climate change"
;;;
;;; Each time, we take the information from the last string as the information to
;;; keep in the analysis, and we throw away what was found for the preceding strings.
;;;
;;; 3. We take the list of strings that we extract from the modified dependency analysis.
;;; It is important to pass the LIST of strings instead of passing the sentence as a
;;; string, because this avoids the next method to perform tokenization again and thereby
;;; create inconsistencies in the analysis and translation.
;;; The list of strings is passed to the de-render method that specializes on
;;; :english-de-render. This de-render method will give us a basic transient structure that
;;; has all the form constraints and that has a root-unit filled in. It also adds several
;;; things to the data of the transient structure, such as the :utterance and :unknown-strings.
;;; Next, we call the translation function that populates the basic transient structure with
;;; additional units based on the modified dependency analysis.
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(defmethod de-render ((utterance string) (mode (eql :english-with-dependency-parser))
&key cxn-inventory &allow-other-keys)
(declare (ignorable mode))
(unless cxn-inventory (setf cxn-inventory *fcg-constructions*))
(multiple-value-bind (syntactic-analysis utterance-as-list)
(preprocess-using-dependency-tree utterance
:preprocessing-steps (list #'dependency-string-append-compounds-in-np
#'dependency-string-append-named-entities
#'dependency-string-append-compounds
#'dependency-remove-punct)
:cxn-inventory cxn-inventory)
(let ((basic-transient-structure (de-render utterance-as-list :english-de-render :cxn-inventory cxn-inventory)))
(set-data basic-transient-structure :pos-tags
(loop for constituent in syntactic-analysis
collect (list (nlp-tools:dp-get-token constituent)
(nlp-tools:dp-get-tag constituent))))
(translate-dependency-tree basic-transient-structure syntactic-analysis *dependency-specs*))))
;; (show-translated-sentence "Sales of home-brewn beer have fallen 2% in 50 years, affecting beer fanatics.")
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
(defmethod render ((cfs coupled-feature-structure)
(mode (eql :english-render))
&key node &allow-other-keys)
(render (left-pole-structure cfs) mode :node node))
;; Very simplistic render, should be updated for including punctuation, etc.
(defmethod render ((unit-structure list)
(mode (eql :english-render))
&key node &allow-other-keys)
;; First we get the utterance as we would normally get.
(let ((utterance (render unit-structure :render-with-scope :node node)))
(when utterance
(let ((first-char (first (coerce (first utterance) 'list))))
(if (char= (char-upcase first-char) first-char)
utterance
(cons (string-capitalize (first utterance))
(rest utterance)))))))
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
;;;;; Example of the basic syntactic dependency parser.
;;;;; -------------------------------------------------
(defmethod de-render ((utterance string) (mode (eql :example-dependency-translation))
&key cxn-inventory &allow-other-keys)
(declare (ignorable mode))
(multiple-value-bind (dependency-tree utterance-as-list base-transient-structure)
(preprocess-using-dependency-tree utterance :cxn-inventory cxn-inventory)
(translate-dependency-tree base-transient-structure
dependency-tree
(fcg-get-dependency-conversion-table cxn-inventory))))
;; (show-translated-sentence "Oxygen levels in oceans have fallen 2%, affecting marine habitat and large fish." :example-dependency-translation)
(defmethod de-render ((utterance string) (mode (eql :example-dependency-translation-with-preprocessing))
&key cxn-inventory &allow-other-keys)
(declare (ignorable mode))
(multiple-value-bind (dependency-tree utterance-as-list base-transient-structure)
(preprocess-using-dependency-tree utterance
;; These steps are executed in order!!
:preprocessing-steps (list #'dependency-string-append-compounds-in-np
#'dependency-string-append-named-entities
#'dependency-string-append-compounds)
:cxn-inventory cxn-inventory)
(translate-dependency-tree base-transient-structure
dependency-tree
(fcg-get-dependency-conversion-table cxn-inventory))))
;; (show-translated-sentence "The ever-popular Barack Obama warned about the falling oxygen levels in oceans." :example-dependency-translation-with-preprocessing)
\ No newline at end of file
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package #:asdf)
(defsystem :fcg-hybrids
:depends-on (:fcg
:xmls
:nlp-tools
:plot-raw-data
:category-hierarchies)
:serial t
:components ((:file "data-structures-and-utils")
(:file "preprocessing")
(:file "syn-dependency")
(:file "examples")
(:module "english-hybrids"
:serial t
:components ((:file "clear-dependency-tags")
(:file "categories")
(:file "translate-dependency-tree")
(:file "render")
(:file "de-render")))))
\ No newline at end of file
This diff is collapsed.
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
; (ql:quickload :fcg-hybrids)
(in-package :fcg)
(export '(fcg-get-dependency-conversion-table
fcg-set-dependency-conversion-table
translate-dependency-tree
make-word-specs-for-boundaries
show-translated-sentence))
;;; Translate dependency tree.
;;; -------------------------------------------------------------------------------------
(defgeneric translate-dependency-tree (base-transient-structure
dependency-tree
conversion-table
&key cxn-inventory &allow-other-keys))
(defun make-word-specs-for-boundaries (boundaries dependency-tree)
"A useful function to have for customizing the translate-dependency-tree method."
(loop for boundary in boundaries
for dependency in dependency-tree
collect (make-word-dependency-spec :string (nlp-tools::dp-get-token dependency)
:unit-name (first boundary)
:syn-role (nlp-tools::dp-get-dependency dependency)
:pos-tag (nlp-tools::dp-get-tag dependency)
:node-id (parse-integer
(nlp-tools::dp-get-node-id dependency))
:head-id (nlp-tools::dp-get-head-id dependency))))
(defmethod translate-dependency-tree ((base-transient-structure coupled-feature-structure)
(dependency-tree list)
(conversion-table t)
&key cxn-inventory &allow-other-keys)
(declare (ignore cxn-inventory))
(let* ((boundaries (fcg-get-boundaries base-transient-structure))
(word-specs (make-word-specs-for-boundaries boundaries dependency-tree))
(units-id-and-name (loop for spec in word-specs
collect (list (word-dependency-spec-node-id spec)
(word-dependency-spec-unit-name spec))))
(structure-to-append (loop for word-spec in word-specs
for parent = (unless (= (word-dependency-spec-node-id word-spec)
(word-dependency-spec-head-id word-spec))
(second (assoc (word-dependency-spec-head-id word-spec)
units-id-and-name)))
for subunits = (loop for other-word-spec in word-specs
when (and
(not (equal word-spec other-word-spec))
(= (word-dependency-spec-head-id other-word-spec)
(word-dependency-spec-node-id word-spec)))
collect (word-dependency-spec-unit-name other-word-spec))
collect (make-unit :name (word-dependency-spec-unit-name word-spec)
:features `((parent ,parent)
(subunits ,subunits)
(form ((string ,(word-dependency-spec-unit-name word-spec)
,(word-dependency-spec-string word-spec))))
(dependency
((pos-tag
,(intern (upcase (word-dependency-spec-pos-tag word-spec)) :fcg))
(edge
,(intern (upcase (word-dependency-spec-syn-role word-spec)) :fcg))))))))
(new-root (calculate-boundaries-and-form-constraints base-transient-structure structure-to-append)))
(setf (left-pole-structure base-transient-structure)
(cons new-root structure-to-append))
base-transient-structure))
;;; Showing the tree.
;;; -------------------------------------------------------------------------------------
(defun show-translated-sentence (utterance de-render-mode &key (cxn-inventory *fcg-constructions*))
"For showing the translated transient structure in the web interface."
(let ((transient-structure (de-render utterance de-render-mode
:cxn-inventory cxn-inventory)))
(add-element (make-html-fcg-light transient-structure :cxn-inventory cxn-inventory
:configuration (visualization-configuration cxn-inventory)
:feature-types (feature-types cxn-inventory)))
transient-structure))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment