Commit 1d951154 authored by remivantrijp's avatar remivantrijp
Browse files

Obsolete (removed)

parent 387402ec
;; Copyright 2020 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
(defparameter *benepar-conversion-table* nil)
(setf *benepar-conversion-table* '((np np)))
(defun fetch-benepar-category (category)
"Find a conversion category, if not return itself."
(or (second (assoc category *benepar-conversion-table* :test #'string= :key #'symbol-name))
category))
;; (fetch-benepar-category 'np)
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
;;; ----------------------------------------------------------------------------
;;; *IMPORTANT*: Any changes in the *english-grammar-categories* must remain
;;; compatible with the "type hierarchy" at the end of the file, which is based
;;; on Paul Van Eecke's type hierarchies (see Van Eecke, 2018).
;;; ----------------------------------------------------------------------------
(in-package :fcg)
(export '(clause clausal-subject matrix-clause subclause intransitive-clause transitive-clause ditransitive-clause
phrase NP VP dativeNP advP PrepNP by-phrase
verb aux auxpass
noun compound proper-noun common-noun adjective participle
determiner article quantifier pronoun personal-pronoun possessive-pronoun demonstrative
demonstrative-pronoun args referent
discourse-structure object-complement
functional-structure head *english-grammar-categories*))
;;; 1. Definition of the English grammar categories.
;;; ------------------------------------------------------------------------------
(defparameter *english-grammar-categories* nil "Contains a type hierarchy of the categories of the grammar.")
(setf *english-grammar-categories*
'(;; CLAUSES
;; ---------------------------------------------------------------
(clause ()
(referent ?ref)
(syn-cat ((is-matrix-clause ?plus-or-minus)
(clause-type ?clause-type))))
(matrix-clause (clause)
(syn-cat ((is-matrix-clause +))))
(subclause (clause)
(syn-cat ((is-matrix-clause -))))
;; Phrases
;; ---------------------------------------------------------------
(phrase ()
(referent ?ref)
(parent ?parent)
(head ?head)
(syn-cat ((phrase-type ?phrase-type)
(agreement ?agreement))))
(NP (phrase)
(syn-cat ((phrase-type NP))))
(advp (phrase)
(syn-cat ((phrase-type AdvP))))
(PrepNP (NP)
(syn-cat ((phrase-type PrepNP))))
(by-phrase (PrepNP)
(syn-cat ((case by-phrase))))
(AdjP (phrase)
(syn-cat ((phrase-type AdjP))))
(object-complement (phrase)
(syn-cat ((phrase-type Object-Complement))))
(VP (phrase)
(referent ?ev-ref)
(tam ((tense ?tense)
(aspect ((perfect ?perfect)
(progressive ?prog)))
(modality ?modality)))
(syn-cat ((phrase-type VP))))
;; Mixtures
;; ---------------------------------------------------------------
(clausal-subject (subclause NP))
;; Lex-classes
;; ---------------------------------------------------------------
(verb ()
(syn-cat ((lex-class verb)
(agreement ?agr)
(finite ?finite)
(verb-form ?verb-form))))
(aux (verb)
(syn-cat ((lex-class aux))))
(auxpass (aux)
(syn-cat ((is-passive-marker +))))
(noun ()
(referent ?ref)
(args (?output ?input))
(syn-cat ((agreement ?agr)
(lex-class noun))))
(common-noun (noun)
(syn-cat ((lex-class common-noun))))
(compound (noun))
(proper-noun (noun)
(syn-cat ((lex-class proper-noun))))
(adjective ()
(referent ?ref)
(args (?output ?input))
(syn-cat ((lex-class adjective))))
(determiner ()
(referent ?ref)
(args (?ref ?input))
(syn-cat ((lex-class determiner))))
(article (determiner)
(syn-cat ((lex-class article))))))
;;; 2. Definition of a CATEGORY-SPEC.
;;; ------------------------------------------------------------------------------
;;; A category-spec is a triplet of the form (category-name (parents) (fv-pairs)).
;;; The feature-value pairs of the category-spec are meant to be the VALUE of a
;;; feature in the construction, such as SYN. Overwritting a value only goes
;;; one level deep.
(defun category-name (category-spec)
(first category-spec))
(defun category-parents (category-spec)
(second category-spec))
(defun category-features (category-spec)
(when category-spec
(subseq category-spec 2)))
(defun find-feature-for-category (feature-name category category-tree &key (features-so-far))
"Return only a specific feature-value pair for a category."
(let* ((category-spec (assoc category category-tree))
(parents (category-parents category-spec)))
(tagbody
point-a
(setf features-so-far (union features-so-far
(second (assoc feature-name (category-features category-spec)))
:key #'first))
(when parents
(setf category-spec (assoc (first parents) category-tree)
parents (append (rest parents) (category-parents category-spec)))
(go point-a)))
`(,feature-name ,features-so-far)))
(defun find-feature-value-for-category (feature-name category category-tree &key (features-so-far))
(feature-value (find-feature-for-category feature-name category category-tree :features-so-far features-so-far)))
(defun first-or-var (list-or-var)
"Helper function that returns either the argument if it's a variable, and its first element if it is a list."
(if (variable-p list-or-var)
list-or-var
(first list-or-var)))
(defun find-all-features-for-category (category category-tree &key (features-so-far nil))
"Given a category-name, obtain all the features or a particular feature associated with it."
(let* ((category-spec (assoc (if (consp category) (second category) category) category-tree))
(features (fcg::rename-variables (category-features category-spec))))
(loop for fv-pair in features
for old-feature = (assoc (feature-name fv-pair) features-so-far)
for new-feature = `(,(feature-name fv-pair)
,(let ((old-value (feature-value old-feature))
(new-value (feature-value fv-pair)))
(cond ((null old-value) new-value)
((null new-value) old-value)
((variable-p old-value) new-value)
((variable-p new-value) old-value)
(t
(union old-value new-value :key #'first)))))
do (setf features-so-far (if old-feature
(substitute new-feature old-feature features-so-far :test #'equal)
(cons new-feature features-so-far))))
(loop for parent in (category-parents category-spec)
do (setf features-so-far (find-all-features-for-category parent category-tree
:features-so-far features-so-far)))
features-so-far))
;;; 3. Expansion operator to be used in FCG constructions
;;; ------------------------------------------------------------------------------
;;; (defmethod fcg-expand ((type (eql :category-tree)) &key value source bindings merge?))
;;;
(defparameter *spacy-pos-tag-conversion-table* nil "List of POS Tags and their corresponding categories in the grammar.")
;; Note: this conversion table is far from complete, but contains only relevant conversions.
(setf *spacy-pos-tag-conversion-table*
'(;;("AFX" affix)
;;("JJ" adjective)
;;("JJR" adjective)
;;("JJS" adjective)
("NN" noun)
("NNS" noun)
("NNP" proper-noun)
("NNPS" proper-noun)
("VB" verb)
("VBD" verb)
("VBG" verb)
("VBN" verb)
("VBP" verb)
("VBZ" verb)
("DT" determiner)
("JJ" adjective)
("PRP$" possessive-pronoun)))
\ No newline at end of file
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
;; Basic de-render that builds on :de-render-with-scope
;; Does not assume a hybrid.
;; ------------------------------------------------------------------------------------------------------------
(defmethod de-render ((utterance list) (mode (eql :english-de-render)) &key cxn-inventory &allow-other-keys)
"De-render based on :de-render-with-scope but does some post-processing."
(let ((transient-structure
(de-render utterance :de-render-with-scope :cxn-inventory cxn-inventory)))
;; Check for unknown strings.
(set-data transient-structure :unknown-strings nil)
(loop for string in (reverse utterance)
unless (or (word-in-dictionary-p string cxn-inventory)
(find string (get-data transient-structure :unknown-strings) :test #'equalp))
;;no lex cxn existing
do (set-data transient-structure :unknown-strings
(cons string (get-data transient-structure :unknown-strings))))
;; Keep the utterance.
(set-data transient-structure :utterance utterance)
;; We return the transient structure:
transient-structure))
(defmethod de-render ((utterance string) (mode (eql :english-de-render)) &key cxn-inventory &allow-other-keys)
"De-renders just like de-render-with-scope, but uses English tokenizer to split the utterance into tokens."
(declare (ignorable mode))
(de-render (tokenize-english-sentence utterance) :english-de-render :cxn-inventory cxn-inventory))
;;; (defmethod de-render ((utterance list) (mode (eql :english-de-render)) &key cxn-inventory &allow-other-keys)
;;; "Takes a list of one string and de-renders the string."
;;; (assert (single-solution-p utterance))
;;; (de-render (first utterance) mode :cxn-inventory cxn-inventory))
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;;; DE-RENDER methods for the FCG hybrid.
;;; Uses the translation functions in /hybrids/semantic-dependency-parser.lisp
;;; -----------------------------------------------------------------------------------
;;; 1. Get a syntactic dependency tree from Penelope.
;;; 2. Make the following modifications to the analysis:
;;; (a) Reconstruct adjectival compounds
;;; e.g. Re-compose ("home" "-" "brewn") into the string "home-brewn"
;;; (b) Treat named entities as single strings
;;; e.g. ("Barack" "Obama") -> "Barack Obama"
;;; (c) Treat compound nouns as single strings
;;; e.g. ("climate" "change") -> "climate change"
;;;
;;; Each time, we take the information from the last string as the information to
;;; keep in the analysis, and we throw away what was found for the preceding strings.
;;;
;;; 3. We take the list of strings that we extract from the modified dependency analysis.
;;; It is important to pass the LIST of strings instead of passing the sentence as a
;;; string, because this avoids the next method to perform tokenization again and thereby
;;; create inconsistencies in the analysis and translation.
;;; The list of strings is passed to the de-render method that specializes on
;;; :english-de-render. This de-render method will give us a basic transient structure that
;;; has all the form constraints and that has a root-unit filled in. It also adds several
;;; things to the data of the transient structure, such as the :utterance and :unknown-strings.
;;; Next, we call the translation function that populates the basic transient structure with
;;; additional units based on the modified dependency analysis.
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(defparameter *punctuation-list* (loop for char in (coerce ".?! ,:; ()[]{}'`<>$^&*-\/#$\"" 'list)
collect (format nil "~a" char))
"List of what counts as a punctuation character.")
(defun punct-p (string)
(find string *punctuation-list* :test #'string=))
;; (punct-p "?")
;; (punct-p "a")
(defun remove-punctuation-from-list-of-strings (list-of-strings)
(remove-if #'punct-p list-of-strings))
;; (remove-punctuation-from-list-of-strings '("John" "," "my" "friend" "," "where" "are" "you" "?"))
(defmethod de-render ((utterance string) (mode (eql :english-with-dependency-parser))
&key cxn-inventory &allow-other-keys)
(declare (ignorable mode))
(unless cxn-inventory (setf cxn-inventory *fcg-constructions*))
(multiple-value-bind (syntactic-analysis utterance-as-list)
(preprocess-using-dependency-tree utterance
:preprocessing-steps (list #'dependency-string-append-compounds-in-np
#'dependency-string-append-named-entities
#'dependency-string-append-compounds
#'dependency-string-promote-adverbs-in-np
#'dependency-remove-punct)
:cxn-inventory cxn-inventory)
(let ((basic-transient-structure (de-render
;; We remove punctuation for the time being.
(remove-punctuation-from-list-of-strings utterance-as-list)
:english-de-render :cxn-inventory cxn-inventory)))
(set-data basic-transient-structure :pos-tags
(loop for constituent in syntactic-analysis
collect (list (nlp-tools::dp-get-token constituent)
(nlp-tools::dp-get-tag constituent))))
(set-data basic-transient-structure :dependency-analysis syntactic-analysis)
(translate-dependency-tree basic-transient-structure syntactic-analysis *dependency-specs*))))
(export '(translate-and-show))
(defun translate-and-show (utterance &key (mode :english-with-dependency-parser))
(let ((transient-structure (show-translated-sentence utterance mode)))
(pprint (get-data transient-structure :dependency-analysis))
transient-structure))
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;;; DE-RENDER methods for the FCG hybrid.
;;; Uses the translation functions in /hybrids/semantic-dependency-parser.lisp
;;; -----------------------------------------------------------------------------------
;;; 1. Get a syntactic dependency tree from Penelope.
;;; 2. Make the following modifications to the analysis:
;;; (a) Reconstruct adjectival compounds
;;; e.g. Re-compose ("home" "-" "brewn") into the string "home-brewn"
;;; (b) Treat named entities as single strings
;;; e.g. ("Barack" "Obama") -> "Barack Obama"
;;; (c) Treat compound nouns as single strings
;;; e.g. ("climate" "change") -> "climate change"
;;;
;;; Each time, we take the information from the last string as the information to
;;; keep in the analysis, and we throw away what was found for the preceding strings.
;;;
;;; 3. We take the list of strings that we extract from the modified dependency analysis.
;;; It is important to pass the LIST of strings instead of passing the sentence as a
;;; string, because this avoids the next method to perform tokenization again and thereby
;;; create inconsistencies in the analysis and translation.
;;; The list of strings is passed to the de-render method that specializes on
;;; :english-de-render. This de-render method will give us a basic transient structure that
;;; has all the form constraints and that has a root-unit filled in. It also adds several
;;; things to the data of the transient structure, such as the :utterance and :unknown-strings.
;;; Next, we call the translation function that populates the basic transient structure with
;;; additional units based on the modified dependency analysis.
;;; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(defmethod de-render ((utterance string) (mode (eql :english-with-dependency-parser))
&key cxn-inventory &allow-other-keys)
(declare (ignorable mode))
(unless cxn-inventory (setf cxn-inventory *fcg-constructions*))
(multiple-value-bind (syntactic-analysis utterance-as-list)
(preprocess-using-dependency-tree utterance
:preprocessing-steps (list #'dependency-string-append-compounds-in-np
#'dependency-string-append-named-entities
#'dependency-string-append-compounds
#'dependency-remove-punct)
:cxn-inventory cxn-inventory)
(let ((basic-transient-structure (de-render utterance-as-list :english-de-render :cxn-inventory cxn-inventory)))
(set-data basic-transient-structure :pos-tags
(loop for constituent in syntactic-analysis
collect (list (nlp-tools:dp-get-token constituent)
(nlp-tools:dp-get-tag constituent))))
(translate-dependency-tree basic-transient-structure syntactic-analysis *dependency-specs*))))
;; (show-translated-sentence "Sales of home-brewn beer have fallen 2% in 50 years, affecting beer fanatics." :english-with-dependency-parser)
;; Copyright 2019 Sony Computer Science Laboratories Paris
;; Remi van Trijp (http://www.remivantrijp.eu)
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;; http://www.apache.org/licenses/LICENSE-2.0
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;=========================================================================
(in-package :fcg)
(defmethod render ((cfs coupled-feature-structure)
(mode (eql :english-render))
&key node &allow-other-keys)
(render (left-pole-structure cfs) mode :node node))
;; Very simplistic render, should be updated for including punctuation, etc.
(defmethod render ((unit-structure list)
(mode (eql :english-render))
&key node &allow-other-keys)
;; First we get the utterance as we would normally get.
(let ((utterance (render unit-structure :render-with-scope :node node)))
(when utterance
(let ((first-char (first (coerce (first utterance) 'list))))
(if (char= (char-upcase first-char) first-char)
utterance
(cons (string-capitalize (first utterance))
(rest utterance)))))))
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment