Commit 8549692b authored by Timo Hund's avatar Timo Hund Committed by Markus Kalkbrenner

[BUGFIX] Extract request rejected because of missing multipart boundary (#614)

When a ExtractQuery is executed Apache Solr throws the following error:

* SEVERE: null:org.apache.commons.fileupload.FileUploadException: the request was rejected because no multipart boundary was found

The patch:

* Add's the boundary to the header and to the body

Fixes: #301
parent 010caf20
<?php
namespace Solarium\Core\Client\Adapter;
use Solarium\Core\Client\Request;
/**
* Helper class for shared adapter functionality.
*/
class AdapterHelper
{
/**
* This method is used to build the upload body for a file upload with the boundary markers.
*
* @param Request $request
*
* @return string
*/
public function buildUploadBodyFromRequest(Request $request)
{
$baseName = basename($request->getFileUpload());
$body = "--{$request->getHash()}\r\n";
$body .= 'Content-Disposition: form-data; name="file"; filename="'.$baseName.'"';
$body .= "\r\nContent-Type: application/octet-stream\r\n\r\n";
$body .= file_get_contents($request->getFileUpload(), 'r');
$body .= "\r\n--{$request->getHash()}--\r\n";
return $body;
}
}
......@@ -120,12 +120,9 @@ class Curl extends Configurable implements AdapterInterface
curl_setopt($handler, CURLOPT_POST, true);
if ($request->getFileUpload()) {
if (version_compare(PHP_VERSION, '5.5.0') >= 0) {
$curlFile = curl_file_create($request->getFileUpload());
curl_setopt($handler, CURLOPT_POSTFIELDS, ['content' => $curlFile]);
} else {
curl_setopt($handler, CURLOPT_POSTFIELDS, ['content' => '@'.$request->getFileUpload()]);
}
$helper = new AdapterHelper();
$data = $helper->buildUploadBodyFromRequest($request);
curl_setopt($handler, CURLOPT_POSTFIELDS, $data);
} else {
curl_setopt($handler, CURLOPT_POSTFIELDS, $request->getRawData());
}
......
......@@ -106,7 +106,8 @@ class Guzzle extends Configurable implements AdapterInterface
}
if ($request->getFileUpload()) {
return fopen($request->getFileUpload(), 'r');
$helper = new AdapterHelper();
return $helper->buildUploadBodyFromRequest($request);
}
return $request->getRawData();
......
......@@ -101,7 +101,9 @@ class Guzzle3 extends Configurable implements AdapterInterface
}
if ($request->getFileUpload()) {
return fopen($request->getFileUpload(), 'r');
$helper = new AdapterHelper();
$body = $helper->buildUploadBodyFromRequest($request);
return $body;
}
return $request->getRawData();
......
......@@ -87,22 +87,9 @@ class Http extends Configurable implements AdapterInterface
if (Request::METHOD_POST == $method) {
if ($request->getFileUpload()) {
$boundary = '----------'.md5(time());
$CRLF = "\r\n";
$file = $request->getFileUpload();
// Add the proper boundary to the Content-Type header
$headers = $request->getHeaders();
// Remove the Content-Type header, because we will replace it with something else.
if (false !== ($key = array_search('Content-Type: multipart/form-data', $headers, true))) {
unset($headers[$key]);
}
$request->setHeaders($headers);
$request->addHeader("Content-Type: multipart/form-data; boundary={$boundary}");
$data = "--{$boundary}".$CRLF;
$data .= 'Content-Disposition: form-data; name="upload"; filename='.$file.$CRLF;
$data .= 'Content-Type: application/octet-stream'.$CRLF.$CRLF;
$data .= file_get_contents($file).$CRLF;
$data .= '--'.$boundary.'--';
$helper = new AdapterHelper();
$data = $helper->buildUploadBodyFromRequest($request);
$content_length = strlen($data);
$request->addHeader("Content-Length: $content_length\r\n");
stream_context_set_option(
......
......@@ -306,4 +306,12 @@ class Request extends Configurable implements RequestParamsInterface
}
}
}
/**
* @return string
*/
public function getHash()
{
return spl_object_hash($this);
}
}
......@@ -68,7 +68,7 @@ class RequestBuilder extends BaseRequestBuilder
} elseif (is_readable($file)) {
$request->setFileUpload($file);
$request->addParam('resource.name', basename($query->getFile()));
$request->addHeader('Content-Type: multipart/form-data');
$request->addHeader('Content-Type: multipart/form-data; boundary='.$request->getHash());
} else {
throw new RuntimeException('Extract query file path/url invalid or not available');
}
......
......@@ -115,7 +115,7 @@ final class Guzzle3Test extends TestCase
$this->assertCount(1, $receivedRequests);
$this->assertSame('POST', $receivedRequests[0]->getMethod());
$this->assertStringEqualsFile(__FILE__, (string) $receivedRequests[0]->getBody());
$this->assertContains(file_get_contents(__FILE__), (string) $receivedRequests[0]->getBody());
$this->assertSame(
'request value',
(string) $receivedRequests[0]->getHeader('X-PHPUnit')
......
......@@ -115,7 +115,7 @@ final class GuzzleTest extends TestCase
$this->assertCount(1, $container);
$this->assertSame('POST', $container[0]['request']->getMethod());
$this->assertSame('request value', $container[0]['request']->getHeaderline('X-PHPUnit'));
$this->assertStringEqualsFile(__FILE__, (string) $container[0]['request']->getBody());
$this->assertContains(file_get_contents(__FILE__), (string) $container[0]['request']->getBody());
}
/**
......
......@@ -8,6 +8,7 @@ use Solarium\Component\QueryTraits\TermsTrait;
use Solarium\Component\Result\Terms\Result;
use Solarium\Core\Client\ClientInterface;
use Solarium\QueryType\Select\Query\Query as SelectQuery;
use Solarium\QueryType\Select\Result\Document;
abstract class AbstractTechproductsTest extends TestCase
{
......@@ -443,6 +444,54 @@ abstract class AbstractTechproductsTest extends TestCase
$this->assertSame($without, $with);
}
public function testExtractIntoDocument()
{
$extract = $this->client->createExtract();
$extract->setUprefix('attr_');
$extract->setFile(__DIR__.DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.'testpdf.pdf');
$extract->setCommit(true);
$extract->setCommitWithin(0);
$extract->setOmitHeader(false);
// add document
$doc = $extract->createDocument();
$doc->id = 'extract-test';
$extract->setDocument($doc);
$this->client->extract($extract);
// now get the document and check the content
$select = $this->client->createSelect();
$select->setQuery('id:extract-test');
$selectResult = $this->client->select($select);
$iterator = $selectResult->getIterator();
/** @var Document $document */
$document = $iterator->current();
$this->assertSame('PDF Test', trim($document['content'][0]), 'Written document does not contain extracted result');
// now cleanup the document the have the initial index state
$update = $this->client->createUpdate();
$update->addDeleteById('extract-test');
$update->addCommit(true, true);
$this->client->extract($update);
}
public function testExtractTextOnly()
{
$query = $this->client->createExtract();
$fileName = 'testpdf.pdf';
$query->setFile(__DIR__.DIRECTORY_SEPARATOR.'Fixtures'.DIRECTORY_SEPARATOR.$fileName);
$query->setExtractOnly(true);
$query->addParam('extractFormat', 'text');
$response = $this->client->extract($query);
$json = json_decode($response->getResponse()->getBody());
$content = $json->{$fileName};
$this->assertSame('PDF Test', trim($content), 'Can not extract the plain content from the file');
}
}
class TestQuery extends SelectQuery
......
......@@ -106,10 +106,11 @@ class RequestBuilderTest extends TestCase
public function testContentTypeHeader()
{
$request = $this->builder->build($this->query);
$headers = [
'Content-Type: multipart/form-data',
'Content-Type: multipart/form-data; boundary='.$request->getHash(),
];
$request = $this->builder->build($this->query);
$this->assertSame($headers, $request->getHeaders());
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment