/**
 * Nodes are a part of the Captions.
 *
 * They take the elements of captions (which, although we encourage them to be words, or at least intonable sounds,
 * have no strict semantic meaning except "a string with a time attached"), and combine them into "words". Word nodes
 * can then further be combined to "phrases", "sentences" and so on.
 *
 * That is to say, in their most complex form, nodes model a dependency tree. In the simplest form (if most of the
 * node types are simply missing), they just help us define word boundaries.
 *
 * In their role as word nodes, they can also serve to link a particular word in the text to a semantic meaning
 * of the word. If the word is "seat", are we talking about a chair or a seat in parliament? The word id will
 * be able to make this clear.
 *
 * Annotations can be attached to nodes. It is important that these are two different concepts. In theory, the
 * information contained in nodes could instead be added as annotations to the constituent text parts (elements),
 * for example, an annotation saying "these two vocables make up a compound word". But once we want to annotate
 * a sentence, we'd then have to annotate those individual-word annotations together, which gets confusing.
 *
 * Captions + Nodes together gives us the sentence structure, identify clickable ideas which are more than one
 * word etc. You could imagine a completely alternative format for the combination, for example an XML tree, or
 * a less flexible structure, where from the beginning you can only add timings to a single unit, and that unit
 * can either be a word, a phrase, but not both (that is, maybe we are overdesigning).
 */

import {Annotation, DatabaseWordId} from "../annotations";
import {VocableId} from "./formats/CaptionTrack";


// We used to be like: Hey, let this be either a string or a number, but since it does not make sense for
// VocableIds, we don't use it here either.
export type StructureNodeId = string;


interface BaseNode {
  // Each node has an ID, so other nodes can refer to it
  id: StructureNodeId,

  // Applied to which vocables?
  vocables: VocableId[],
}


export type WordStructurePart<D=never> = {
  orth: string,
  type: string,
  value: string,
  data?: D
};


export type WordStructure<D=never> = {
  parts: WordStructurePart<D>[]
};


/**
 * This associates a word with one, or a set of vocables.
 *
 * If the word id given is just a text string, then this just tell
 */
export interface WordNode<D=object> extends BaseNode {
  type: 'word',

  // Which word is it? This will almost always be a link
  wordId: DatabaseWordId,

  // Which grammar form is it? This would depend on the language.
  inflection?: D,

  // This is the word in parts, as split up by the grammar.
  structure?: WordStructure,

  // It is possible to set up the node manually. We use this in particular
  // in cases such as "rize rize rize", which should simply link to the entry
  // for "rize rize".
  isManual?: {
    diacritization: string
  }
}


/**
 * This defines that something is a numeral.
 */
export interface NumeralNode extends BaseNode {
  type: 'numeral',

  // The actual number represented
  quantity: number,

  // If it is an ordinal number (otherwise, a cardinal).
  isOrdinal: boolean,
  isLiteral: boolean,

  wordId?: DatabaseWordId

  // This is the word in parts, as split up by the grammar.
  structure?: WordStructure
}


/**
 * We generally prefer English words to be spelled in Farsi, for example `jeypeg` for JPEG. In particular,
 * someone might say `jeypeg-ha`, so clearly this is real farsi grammar.
 *
 * However, in some cases, it does not make sense, plus, the foreign word used is really a foreign entity
 * that does not inflect in any way, an is spelled *in latin letters* in the original farsi text. In those
 * cases, you attached a `ForeignWordNode`. This has two effects:
 *
 * Instead of adding the node explicitly, this could be resolved on the consumer side, recognizing that a
 * word without a node, but spelled fully in latin, should be treated a certain way. A consumer may want to
 * implement such a logic anyway when dealing with texts which have not benefited from grammar assigning.
 *
 * However, for those which have, an explicit node makes sense.
 */
export interface ForeignWordNode<D=any> extends BaseNode {
  type: 'foreign'
}


// Rarely used, since this is no way to make it language-specific.
export interface CustomNode extends BaseNode {
  type: 'custom',
  annotations: Annotation[]
}


export function isWordNode(node?: StructureNode): node is WordNode { return node ? node.type === 'word' : false; }
export function isNumeralNode(node?: StructureNode): node is NumeralNode { return node ? node.type === 'numeral' : false; }
export function isCustomNode(node?: StructureNode): node is CustomNode { return node ? node.type === 'custom' : false; }
export function isForeignWordNode(node?: StructureNode): node is ForeignWordNode { return node ? node.type === 'foreign' : false; }


export type StructureNode<WD=any> = WordNode<WD>|CustomNode|NumeralNode|ForeignWordNode;
export function isStructureNode(node?: StructureNode): node is StructureNode {
  return isWordNode(node) || isNumeralNode(node) || isForeignWordNode(node);
}

// NodeType=sentence
// NodeType=chunk type=ProperNoun children=[node1, node1]
// NodeType=word tag=V vocabs=[1]
// NodeType=generic vocabs=[1,2,3]