elasticsearch : object mapping and getting data fr

2019-08-10 20:59发布

问题:

I use elasticsearch and oracle for the database.

  • Database : I create a view with all the data I need to index. I have 1-N relations between my "occurences" table and "determinations" table and between "occurrences" table and "multimedias" table so one occurrence have multiples determinations and multimedias.

  • Elasticsearch : I create a mapping and the river to get data from the database view.

the problem is I need an array of object for multimedias and determinations instead of an array for each fields in elasticsearch result ( example below ).


mapping

curl -XPUT 'localhost:9200/botanic/' -d '{
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "keylower": {
            "tokenizer": "keyword",
            "filter": "lowercase"
          }
        }
      }
    }
  },
  "mappings": {
    "specimens": {
      "_all": {
        "enabled": true
      },
      "_index": {
        "enabled": true
      },
      "_id": {
        "index": "not_analyzed",
        "store": false
      },
      "properties": {
        "_id": {
          "type": "string",
          "store": "no",
          "index": "not_analyzed"
        },
        ...
        "MULTIMEDIA": {
          "_id": {
            "path": "M_MULTIMEDIAID"
          },
          "type": "object",
          "properties": {
            "M_MULTIMEDIAID": {
              "type": "string",
              "store": "yes",
              "index": "not_analyzed"
            },
            "M_CREATOR": {
              "type": "string",
              "store": "yes",
              "index": "not_analyzed"
            },
            "M_DESCRIPTION": {
              "type": "string",
              "store": "yes",
              "index": "analyzed"
            }
            ...
          }
        },
        "DETERMINATIONS": {
          "_id": {
            "path": "D_OCCURRENCEID"
          },
          "type": "object",
          "properties": {
            "D_OCCURRENCEID": {
              "type": "string",
              "store": "yes",
              "index": "not_analyzed"
            },
            "D_DETERMINATIONID": {
              "type": "string",
              "store": "yes",
              "index": "not_analyzed"
            },
            "D_DATEIDENTIFIED": {
              "type": "string",
              "store": "yes",
              "index": "analyzed"
            },
            "D_TYPESTATUS": {
              "type": "string",
              "store": "yes",
              "index": "analyzed"
            },
            "D_CREATED": {
              "type": "date",
              "store": "yes",
              "index": "analyzed"
            }
          }
        },
        ...
        "I_INSTITUTIONID": {
          "type": "string",
          "store": "yes",
          "index": "not_analyzed"
        },
        "I_INSTITUTIONCODE": {
          "type": "string",
          "store": "yes",
          "index": "analyzed"
        }
      }
    }
  }
}'

the river

curl -XPUT 'localhost:9200/_river/botanic_river/_meta' -d '{
  "type": "jdbc",
  "jdbc": {
    "index": "botanic",
    "type": "specimens",
    "url": "jdbc:oracle:thin:@localhost:1523:database",
    "user": "user",
    "password": "password",
    "sql": "select * from elasticsearchview"
  }
}'

the result I get ( multiple fields and for each an array ):

"hits": [
  {
    "_index": "botanic",
    "_type": "specimens",
    "_id": "345F5BEA7FDB4B17A7831514E25CD29B",
    "_score": 0.4430604,
    "_source": {
      ...
      "M_MULTIMEDIAID": [
        "0E91818D48DE40C785733F9F3A7932F1",
        "833C6E79D7844D568B828DF2D8BA8AC7",
        "F76F6766398042D38902DA9165D41514"
      ],
      "M_CREATOR": [
        "creator1",
        "creator2",
        "creator3"
      ],
      "M_DESCRIPTION": [
        "descr1",
        "descr3",
        "descr2"
      ],
      ...
    }
  }
]

but I need something like this ( array of object ) :

"hits": [
  {
    "_index": "botanic",
    "_type": "specimens",
    "_id": "345F5BEA7FDB4B17A7831514E25CD29B",
    "_score": 0.4430604,
    "_source": {
      ...
      "MULTIMEDIA": [
        {
          "M_MULTIMEDIAID": "0E91818D48DE40C785733F9F3A7932F1",
          "M_CREATOR": "creator1",
          "M_DESCRIPTION": "descr1"
        },
        {
          "M_MULTIMEDIAID": "833C6E79D7844D568B828DF2D8BA8AC7",
          "M_CREATOR": "creator2",
          "M_DESCRIPTION": "descr2"
        },
        {
          "M_MULTIMEDIAID": "F76F6766398042D38902DA9165D41514",
          "M_CREATOR": "creator3",
          "M_DESCRIPTION": "descr3"
        }
      ]
      ...
    }
  }
]

I tried "type" : "object" and "type" : "nested" in the mapping but same result.

how can do this ?